├── .circleci └── config.yml ├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── feature_request.md │ ├── question-discussion.md │ └── security-vulnerability-report.md ├── PULL_REQUEST_TEMPLATE.md ├── dependabot.yml ├── release.yml └── workflows │ ├── add-to-project-v2.yml │ ├── apply-labels.yml │ ├── stale.yml │ └── validate-pr-title.yml ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── NOTICE ├── OSSMETADATA ├── README.md ├── RELEASING.md ├── SECURITY.md ├── SUPPORT.md ├── avgsamplerate.go ├── avgsamplerate_test.go ├── avgsamplewithmin.go ├── avgsamplewithmin_test.go ├── blocklist.go ├── blocklist_test.go ├── doc.go ├── dynsampler.go ├── emasamplerate.go ├── emasamplerate_test.go ├── emathroughput.go ├── emathroughput_test.go ├── genericsampler_test.go ├── go.mod ├── go.sum ├── keyCalculation.go ├── onlyonce.go ├── onlyonce_test.go ├── perkeythroughput.go ├── perkeythroughput_test.go ├── static.go ├── static_test.go ├── totalthroughput.go ├── totalthroughput_test.go ├── windowedthroughput.go └── windowedthroughput_test.go /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | 3 | # enable a job when tag created (tag create is ignored by default) 4 | filters_always: &filters_always 5 | filters: 6 | tags: 7 | only: /.*/ 8 | 9 | # restrict a job to only run when a version tag (vNNNN) is created 10 | filters_publish: &filters_publish 11 | filters: 12 | tags: 13 | only: /^v[0-9].*/ 14 | branches: 15 | ignore: /.*/ 16 | 17 | matrix_goversions: &matrix_goversions 18 | matrix: 19 | parameters: 20 | goversion: ["17", "18", "19", "20"] 21 | 22 | # Default version of Go to use for Go steps 23 | default_goversion: &default_goversion "20" 24 | 25 | executors: 26 | go: 27 | parameters: 28 | goversion: 29 | type: string 30 | default: *default_goversion 31 | docker: 32 | - image: cimg/go:1.<< parameters.goversion >> 33 | environment: 34 | GO111MODULE: "on" 35 | 36 | jobs: 37 | test: 38 | parameters: 39 | goversion: 40 | type: string 41 | default: *default_goversion 42 | executor: 43 | name: go 44 | goversion: "<< parameters.goversion >>" 45 | steps: 46 | - checkout 47 | - run: make test 48 | - store_test_results: 49 | path: ./unit-tests.xml 50 | 51 | publish_github: 52 | executor: go 53 | steps: 54 | - checkout 55 | - run: 56 | name: Install ghr for drafting GitHub Releases 57 | command: go install github.com/tcnksm/ghr@latest 58 | - run: 59 | name: "create draft release at GitHub" 60 | command: make publish_github 61 | 62 | workflows: 63 | build: 64 | jobs: 65 | - test: 66 | <<: *matrix_goversions 67 | <<: *filters_always 68 | - publish_github: 69 | <<: *filters_publish 70 | context: Honeycomb Secrets for Public Repos 71 | requires: 72 | - test 73 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # Code owners file. 2 | # This file controls who is tagged for review for any given pull request. 3 | 4 | # For anything not explicitly taken by someone else: 5 | * @honeycombio/pipeline-team 6 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Let us know if something is not working as expected 4 | title: '' 5 | labels: 'type: bug' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 17 | 18 | **Versions** 19 | 20 | - Go: 21 | - Dynsampler: 22 | 23 | **Steps to reproduce** 24 | 25 | 1. 26 | 27 | **Additional context** 28 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: 'type: enhancement' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 15 | 16 | **Is your feature request related to a problem? Please describe.** 17 | 18 | 19 | **Describe the solution you'd like** 20 | 21 | 22 | **Describe alternatives you've considered** 23 | 24 | 25 | **Additional context** 26 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question-discussion.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Question/Discussion 3 | about: General question about how things work or a discussion 4 | title: '' 5 | labels: 'type: discussion' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 15 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/security-vulnerability-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Security vulnerability report 3 | about: Let us know if you discover a security vulnerability 4 | title: '' 5 | labels: 'type: security' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 15 | **Versions** 16 | 17 | - Go: 18 | - Dynsampler: 19 | 20 | **Description** 21 | 22 | (Please include any relevant CVE advisory links) 23 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 12 | 13 | ## Which problem is this PR solving? 14 | 15 | - 16 | 17 | ## Short description of the changes 18 | 19 | - 20 | 21 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "gomod" 4 | directory: "/" 5 | schedule: 6 | interval: "monthly" 7 | labels: 8 | - "type: dependencies" 9 | reviewers: 10 | - "honeycombio/collection-team" 11 | commit-message: 12 | prefix: "maint" 13 | include: "scope" 14 | -------------------------------------------------------------------------------- /.github/release.yml: -------------------------------------------------------------------------------- 1 | # .github/release.yml 2 | 3 | changelog: 4 | exclude: 5 | labels: 6 | - no-changelog 7 | categories: 8 | - title: 💥 Breaking Changes 💥 9 | labels: 10 | - "version: bump major" 11 | - breaking-change 12 | - title: 💡 Enhancements 13 | labels: 14 | - "type: enhancement" 15 | - title: 🐛 Fixes 16 | labels: 17 | - "type: bug" 18 | - title: 🛠 Maintenance 19 | labels: 20 | - "type: maintenance" 21 | - "type: dependencies" 22 | - "type: documentation" 23 | - title: 🤷 Other Changes 24 | labels: 25 | - "*" 26 | -------------------------------------------------------------------------------- /.github/workflows/add-to-project-v2.yml: -------------------------------------------------------------------------------- 1 | name: Add to project 2 | on: 3 | issues: 4 | types: [opened] 5 | pull_request_target: 6 | types: [opened] 7 | jobs: 8 | add-to-project: 9 | runs-on: ubuntu-latest 10 | name: Add issues and PRs to project 11 | steps: 12 | - uses: actions/add-to-project@main 13 | with: 14 | project-url: https://github.com/orgs/honeycombio/projects/27 15 | github-token: ${{ secrets.GHPROJECTS_TOKEN }} 16 | -------------------------------------------------------------------------------- /.github/workflows/apply-labels.yml: -------------------------------------------------------------------------------- 1 | name: Apply project labels 2 | on: [issues, pull_request_target, label] 3 | jobs: 4 | apply-labels: 5 | runs-on: ubuntu-latest 6 | name: Apply common project labels 7 | steps: 8 | - uses: honeycombio/oss-management-actions/labels@v1 9 | with: 10 | github-token: ${{ secrets.GITHUB_TOKEN }} 11 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | name: 'Close stale issues and PRs' 2 | on: 3 | schedule: 4 | - cron: '30 1 * * *' 5 | 6 | jobs: 7 | stale: 8 | name: 'Close stale issues and PRs' 9 | runs-on: ubuntu-latest 10 | permissions: 11 | issues: write 12 | pull-requests: write 13 | 14 | steps: 15 | - uses: actions/stale@v4 16 | with: 17 | start-date: '2021-09-01T00:00:00Z' 18 | stale-issue-message: 'Marking this issue as stale because it has been open 14 days with no activity. Please add a comment if this is still an ongoing issue; otherwise this issue will be automatically closed in 7 days.' 19 | stale-pr-message: 'Marking this PR as stale because it has been open 30 days with no activity. Please add a comment if this PR is still relevant; otherwise this PR will be automatically closed in 7 days.' 20 | close-issue-message: 'Closing this issue due to inactivity. Please see our [Honeycomb OSS Lifecyle and Practices](https://github.com/honeycombio/home/blob/main/honeycomb-oss-lifecycle-and-practices.md).' 21 | close-pr-message: 'Closing this PR due to inactivity. Please see our [Honeycomb OSS Lifecyle and Practices](https://github.com/honeycombio/home/blob/main/honeycomb-oss-lifecycle-and-practices.md).' 22 | days-before-issue-stale: 14 23 | days-before-pr-stale: 30 24 | days-before-issue-close: 7 25 | days-before-pr-close: 7 26 | any-of-labels: 'status: info needed,status: revision needed' 27 | -------------------------------------------------------------------------------- /.github/workflows/validate-pr-title.yml: -------------------------------------------------------------------------------- 1 | name: "Validate PR Title" 2 | 3 | on: 4 | pull_request: 5 | types: 6 | - opened 7 | - edited 8 | - synchronize 9 | 10 | jobs: 11 | main: 12 | name: Validate PR title 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: amannn/action-semantic-pull-request@v5 16 | id: lint_pr_title 17 | name: "🤖 Check PR title follows conventional commit spec" 18 | env: 19 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 20 | with: 21 | # Have to specify all types because `maint` and `rel` aren't defaults 22 | types: | 23 | maint 24 | rel 25 | fix 26 | feat 27 | chore 28 | ci 29 | docs 30 | style 31 | refactor 32 | perf 33 | test 34 | ignoreLabels: | 35 | "type: dependencies" 36 | # When the previous steps fails, the workflow would stop. By adding this 37 | # condition you can continue the execution with the populated error message. 38 | - if: always() && (steps.lint_pr_title.outputs.error_message != null) 39 | name: "📝 Add PR comment about using conventional commit spec" 40 | uses: marocchino/sticky-pull-request-comment@v2 41 | with: 42 | header: pr-title-lint-error 43 | message: | 44 | Thank you for contributing to the project! 🎉 45 | 46 | We require pull request titles to follow the [Conventional Commits specification](https://www.conventionalcommits.org/en/v1.0.0/) and it looks like your proposed title needs to be adjusted. 47 | 48 | Make sure to prepend with `feat:`, `fix:`, or another option in the list below. 49 | 50 | Once you update the title, this workflow will re-run automatically and validate the updated title. 51 | 52 | Details: 53 | 54 | ``` 55 | ${{ steps.lint_pr_title.outputs.error_message }} 56 | ``` 57 | 58 | # Delete a previous comment when the issue has been resolved 59 | - if: ${{ steps.lint_pr_title.outputs.error_message == null }} 60 | name: "❌ Delete PR comment after title has been updated" 61 | uses: marocchino/sticky-pull-request-comment@v2 62 | with: 63 | header: pr-title-lint-error 64 | delete: true 65 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # dynsampler-go changelog 2 | 3 | ## 0.6.0 2024-01-12 4 | 5 | This version tweaks Throughput samplers to permit calculating non-integer sample rates, which makes them choose better sample rates in many scenarios. It also fixes a race condition that was recently detected by an improved Go runtime. 6 | 7 | ### Fixes 8 | 9 | - fix: allow throughput samplers to have non-integer rates (#74) | [Yi Zhao](https://github.com/yizzlez) 10 | - fix: race condition in WindowedThroughput sampler (#73) | [Kent Quirk](https://github.com/KentQuirk) 11 | 12 | ## Maintenance 13 | 14 | - maint: update codeowners to pipeline-team (#72) | [Jamie Danielson](https://github.com/JamieDanielson) 15 | - maint: update project workflow for pipeline (#71) | [Jamie Danielson](https://github.com/JamieDanielson) 16 | - maint: update codeowners to pipeline (#70) | [Jamie Danielson](https://github.com/JamieDanielson) 17 | 18 | 19 | ## 0.5.1 2023-06-26 20 | 21 | This version corrects a math error in the EMAThroughput sampler. 22 | 23 | ### Fixes 24 | 25 | - fix: Correct EMAThroughput math error (#67) | [Kent Quirk](https://github.com/kentquirk) 26 | 27 | ## 0.5.0 2023-06-08 28 | 29 | This version extends the Sampler interface to include a new GetMetrics function, 30 | which returns a collection of metrics relevant to that specific sampler. This 31 | improves visibility into the sampler and will be used in an upcoming release of 32 | Honeycomb's Refinery. This is a breaking change for code implemented so as to 33 | conform to the `dynsampler.Sampler` interface, such as hand-coded mocks used for 34 | testing. Code using the interface is unaffected. 35 | 36 | ### Features 37 | - feat: Add metrics counter retrieval (#65) | [Kent Quirk](https://github.com/kentquirk) 38 | 39 | ### Maintenance 40 | - maint(deps): bump github.com/stretchr/testify from 1.8.2 to 1.8.4 (#64) | [dependabot[bot]](https://github.com/dependabot[bot]) 41 | - maint: update dependabot.yml (#63) | [Vera Reynolds](https://github.com/vreynolds) 42 | 43 | ## 0.4.0 2023-03-22 44 | 45 | This version contains two new samplers and some (backwards-compatible) changes to the API: 46 | - Many thanks to [Yi Zhao](https://github.com/yizzlez) for the contribution of the `WindowedThroughput` sampler. This sampler is like the Throughput sampler, but uses a moving average to accumulate sample rates across multiple sampling periods. 47 | - The new `EMAThroughput` sampler adjusts overall throughput to achieve a goal while also ensuring that all values in the key space are represented. 48 | - The `GetSampleRateMulti()` function allows a single request to represent multiple events. This is particularly useful when tail-sampling at the trace level (because each trace represents a number of spans). 49 | - All samplers now support specifying a `time.Duration` instead of a time in seconds. Fields like `ClearFrequencySec` are now deprecated and will be dropped in a future release. 50 | 51 | ⚠️ As of this version, dynsampler-go requires and is tested on versions of Go 1.17 and greater. 52 | 53 | ### Features 54 | 55 | - feat: EMAThroughput sampler (#58) | [Kent Quirk](https://github.com/kentquirk) 56 | - feat: Deprecate integer seconds and replace with time.Duration (#59) | [Kent Quirk](https://github.com/kentquirk) 57 | - feat: add GetSampleRateMulti (#53) | [Kent Quirk](https://github.com/kentquirk) 58 | - feat: Windowed Throughput Sampling (#45) | [Yi Zhao](https://github.com/yizzlez) 59 | - fix: Fix flaky blocklist test (#52) | [Yi Zhao](https://github.com/yizzlez) 60 | 61 | ### Maintenance 62 | 63 | - maint: Pull out common calculation into a function (#57) | [Kent Quirk](https://github.com/kentquirk) 64 | - maint: bump the go versions we support (#55) | [Kent Quirk](https://github.com/kentquirk) 65 | - maint(deps): bump github.com/stretchr/testify from 1.6.1 to 1.8.2 (#49) | [dependabot[bot]](https://github.com/dependabot[bot]) 66 | - maint: remove buildevents from circle (#48) | [Jamie Danielson](https://github.com/JamieDanielson) 67 | - chore: Update workflow (#47) | [Tyler Helmuth](https://github.com/TylerHelmuth) 68 | - chore: Update CODEOWNERS (#46) | [Tyler Helmuth](https://github.com/TylerHelmuth) 69 | - chore: update dependabot.yml (#44) | [Kent Quirk](https://github.com/kentquirk) 70 | 71 | ## 0.3.0 2022-12-07 72 | 73 | ⚠️ As of this version, dynsampler-go is only tested on Go 1.16 or greater. 74 | 75 | ### Maintenance 76 | 77 | - maint: drop versions of go below 1.16 (#39) | @vreynolds 78 | - maint: add go 1.18, 1.19 to CI (#30, #31) | @vreynolds 79 | - maint: add go 1.16, 1.17 to CI (#28) | @MikeGoldsmith 80 | - ... and a lot of project management stuff. 81 | [Details in the commits](https://github.com/honeycombio/dynsampler-go/compare/v0.2.1...0356ba0). 82 | 83 | ## 0.2.1 2019-08-07 84 | 85 | Fixes 86 | 87 | - Corrects some sample rate calculations in the Exponential Moving Averge for very small counts. 88 | 89 | ## 0.2.0 2019-07-31 90 | 91 | Features 92 | 93 | - Adds Exponential Moving Average (`EMASampleRate`) implementation with Burst Detection, based on the `AvgSampleRate` implementation. See docs for description. 94 | - Adds `SaveState` and `LoadState` to interface to enable serialization of internal state for persistence between process restarts. 95 | 96 | ## 0.1.0 2019-05-22 97 | 98 | Versioning introduced. 99 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | This project has adopted the Honeycomb User Community Code of Conduct to clarify expected behavior in our community. 4 | 5 | https://www.honeycomb.io/honeycomb-user-community-code-of-conduct/ -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guide 2 | 3 | Please see our [general guide for OSS lifecycle and practices.](https://github.com/honeycombio/home/blob/main/honeycomb-oss-lifecycle-and-practices.md) 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: test 2 | #: run the tests! 3 | test: 4 | ifeq (, $(shell which gotestsum)) 5 | @echo " ***" 6 | @echo "Running with standard go test because gotestsum was not found on PATH. Consider installing gotestsum for friendlier test output!" 7 | @echo " ***" 8 | go test -race -v ./... 9 | else 10 | gotestsum --junitfile unit-tests.xml --format testname -- -race ./... 11 | endif 12 | 13 | ######################### 14 | ### RELEASES ### 15 | ######################### 16 | 17 | CIRCLE_TAG ?= 18 | RELEASE_VERSION ?= $(or $(CIRCLE_TAG), $(shell git describe --tags)) 19 | 20 | .PHONY: publish_github 21 | #: draft a GitHub release for current commit/tag and upload builds as its assets 22 | publish_github: github_prereqs 23 | @echo "+++ drafting GitHub release, tag $(RELEASE_VERSION)" 24 | @ghr -draft \ 25 | -name ${RELEASE_VERSION} \ 26 | -token ${GITHUB_TOKEN} \ 27 | -username ${CIRCLE_PROJECT_USERNAME} \ 28 | -repository ${CIRCLE_PROJECT_REPONAME} \ 29 | -commitish ${CIRCLE_SHA1} \ 30 | ${RELEASE_VERSION} 31 | 32 | .PHONY: github_prereqs 33 | github_prereqs: ghr_present 34 | @:$(call check_defined, RELEASE_VERSION, the tag from which to create this release) 35 | @:$(call check_defined, GITHUB_TOKEN, auth to create this release) 36 | @:$(call check_defined, CIRCLE_PROJECT_USERNAME, user who will create this release) 37 | @:$(call check_defined, CIRCLE_PROJECT_REPONAME, the repository getting a new release) 38 | @:$(call check_defined, CIRCLE_SHA1, the git ref to associate with this release) 39 | 40 | 41 | ################# 42 | ### Utilities ### 43 | ################# 44 | 45 | .PHONY: ghr_present 46 | ghr_present: 47 | @which ghr || (echo "ghr missing; required to create release at GitHub"; exit 1) 48 | 49 | check_defined = \ 50 | $(strip $(foreach 1,$1, \ 51 | $(call __check_defined,$1,$(strip $(value 2))))) 52 | __check_defined = \ 53 | $(if $(value $1),, \ 54 | $(error Undefined $1$(if $2, ($2))$(if $(value @), \ 55 | required by target `$@'))) 56 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016-Present Honeycomb, Hound Technology, Inc. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /OSSMETADATA: -------------------------------------------------------------------------------- 1 | osslifecycle=maintained 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # dynsampler-go 2 | 3 | [![OSS Lifecycle](https://img.shields.io/osslifecycle/honeycombio/dynsampler-go?color=success)](https://github.com/honeycombio/home/blob/main/honeycomb-oss-lifecycle-and-practices.md) 4 | 5 | Dynsampler is a golang library for doing dynamic sampling of traffic before sending it on to [Honeycomb](https://honeycomb.io) (or another analytics system) 6 | It contains several sampling algorithms to help you select a representative set of events instead of a full stream. 7 | 8 | A "sample rate" of 100 means that for every 100 requests, we capture a single event and indicate that it represents 100 similar requests. 9 | 10 | For full documentation, look at the [official documentation](https://pkg.go.dev/github.com/honeycombio/dynsampler-go). 11 | 12 | For more information about using Honeycomb, see our [docs](https://honeycomb.io/docs). 13 | 14 | ## Sampling Techniques 15 | 16 | This package is intended to help sample a stream of tracking events, where events are typically created in response to a stream of traffic (for the purposes of logging or debugging). In general, sampling is used to reduce the total volume of events necessary to represent the stream of traffic in a meaningful way. 17 | 18 | There are a variety of available techniques for reducing a high-volume stream of incoming events to a lower-volume, more manageable stream of events. 19 | Depending on the shape of your traffic, one may serve better than another, or you may need to write a new one! Please consider contributing it back to this package if you do. 20 | 21 | * If your system has a completely homogeneous stream of requests: use `Static` sampling to use a constant sample rate. 22 | * If your system has a steady stream of requests and a well-known low cardinality partition key (e.g. http status): use `Static` sampling and override sample rates on a per-key basis (e.g. if you know want to sample `HTTP 200/OK` events at a different rate from `HTTP 503/Server Error`). 23 | * If your logging system has a strict cap on the rate it can receive events, use `TotalThroughput`, which will calculate sample rates based on keeping *the entire system's* representative event throughput right around (or under) particular cap. 24 | * If you need a throughput sampler that is responsive to spikes, but also averages sample rates over a longer period of time, use `WindowedThroughput`. 25 | * If your system has a rough cap on the rate it can receive events and your partitioned keyspace is fairly steady, use `PerKeyThroughput`, which will calculate sample rates based on keeping the event throughput roughly constant *per key/partition* (e.g. per user id) 26 | * The best choice for a system with a large key space and a large disparity between the highest volume and lowest volume keys is `AvgSampleRateWithMin` - it will increase the sample rate of higher volume traffic proportionally to the logarithm of the specific key's volume. If total traffic falls below a configured minimum, it stops sampling to avoid any sampling when the traffic is too low to warrant it. 27 | * `EMASampleRate` works like `AvgSampleRate`, but calculates sample rates based on a moving average (Exponential Moving Average) of many measurement intervals rather than a single isolated interval. In addition, it can detect large bursts in traffic and will trigger a recalculation of sample rates before the regular interval. 28 | * If you want the benefit of a key-based sampler that also has limits on throughput, use `EMAThroughput`. It will adjust sample rates across a key space to achieve a given throughput while still ensuring that all keys are represented. 29 | -------------------------------------------------------------------------------- /RELEASING.md: -------------------------------------------------------------------------------- 1 | # Release Process 2 | 3 | 1. Add release entry to [changelog](./CHANGELOG.md) 4 | 2. Open a PR with above changes. 5 | 3. Once the above PR is merged, pull the updated `main` branch down and tag the merged release commit on `main` with the new version, e.g. `git tag -a v2.3.1 -m "v2.3.1"`. 6 | 4. Push the tag, e.g. `git push origin v2.3.1`. This will kick off a CI workflow, which will publish a draft GitHub release. 7 | 5. Update Release Notes on the new draft GitHub release by generating notes with the button and review for any PR titles that could use some wordsmithing or recategorization. 8 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | This security policy applies to public projects under the [honeycombio organization][gh-organization] on GitHub. 4 | For security reports involving the services provided at `(ui|ui-eu|api|api-eu).honeycomb.io`, refer to the [Honeycomb Bug Bounty Program][bugbounty] for scope, expectations, and reporting procedures. 5 | 6 | ## Security/Bugfix Versions 7 | 8 | Security and bug fixes are generally provided only for the last minor version. 9 | Fixes are released either as part of the next minor version or as an on-demand patch version. 10 | 11 | Security fixes are given priority and might be enough to cause a new version to be released. 12 | 13 | ## Reporting a Vulnerability 14 | 15 | We encourage responsible disclosure of security vulnerabilities. 16 | If you find something suspicious, we encourage and appreciate your report! 17 | 18 | ### Ways to report 19 | 20 | In order for the vulnerability reports to reach maintainers as soon as possible, the preferred way is to use the "Report a vulnerability" button under the "Security" tab of the associated GitHub project. 21 | This creates a private communication channel between the reporter and the maintainers. 22 | 23 | If you are absolutely unable to or have strong reasons not to use GitHub's vulnerability reporting workflow, please reach out to the Honeycomb security team at [security@honeycomb.io](mailto:security@honeycomb.io). 24 | 25 | [gh-organization]: https://github.com/honeycombio 26 | [bugbounty]: https://www.honeycomb.io/bugbountyprogram 27 | -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | # How to Get Help 2 | 3 | This project uses GitHub issues to track bugs, feature requests, and questions about using the project. Please search for existing issues before filing a new one. 4 | -------------------------------------------------------------------------------- /avgsamplerate.go: -------------------------------------------------------------------------------- 1 | package dynsampler 2 | 3 | import ( 4 | "encoding/json" 5 | "errors" 6 | "fmt" 7 | "math" 8 | "sync" 9 | "time" 10 | ) 11 | 12 | // AvgSampleRate implements Sampler and attempts to average a given sample rate, 13 | // weighting rare traffic and frequent traffic differently so as to end up with 14 | // the correct average. This method breaks down when total traffic is low 15 | // because it will be excessively sampled. 16 | // 17 | // Keys that occur only once within ClearFrequencyDuration will always have a 18 | // sample rate of 1. Keys that occur more frequently will be sampled on a 19 | // logarithmic curve. In other words, every key will be represented at least 20 | // once per ClearFrequencyDuration and more frequent keys will have their sample 21 | // rate increased proportionally to wind up with the goal sample rate. 22 | type AvgSampleRate struct { 23 | // DEPRECATED -- use ClearFrequencyDuration. 24 | // ClearFrequencySec is how often the counters reset in seconds. 25 | ClearFrequencySec int 26 | 27 | // ClearFrequencyDuration is how often the counters reset as a Duration. 28 | // Note that either this or ClearFrequencySec can be specified, but not both. 29 | // If neither one is set, the default is 30s. 30 | ClearFrequencyDuration time.Duration 31 | 32 | // GoalSampleRate is the average sample rate we're aiming for, across all 33 | // events. Default 10 34 | GoalSampleRate int 35 | 36 | // MaxKeys, if greater than 0, limits the number of distinct keys used to build 37 | // the sample rate map within the interval defined by `ClearFrequencyDuration`. Once 38 | // MaxKeys is reached, new keys will not be included in the sample rate map, but 39 | // existing keys will continue to be be counted. 40 | MaxKeys int 41 | 42 | savedSampleRates map[string]int 43 | currentCounts map[string]float64 44 | 45 | // haveData indicates that we have gotten a sample of traffic. Before we've 46 | // gotten any samples of traffic, we should we should use the default goal 47 | // sample rate for all events instead of sampling everything at 1 48 | haveData bool 49 | done chan struct{} 50 | 51 | lock sync.Mutex 52 | 53 | // metrics 54 | requestCount int64 55 | eventCount int64 56 | } 57 | 58 | // Ensure we implement the sampler interface 59 | var _ Sampler = (*AvgSampleRate)(nil) 60 | 61 | func (a *AvgSampleRate) Start() error { 62 | // apply defaults 63 | if a.ClearFrequencyDuration != 0 && a.ClearFrequencySec != 0 { 64 | return fmt.Errorf("the ClearFrequencySec configuration value is deprecated; use only ClearFrequencyDuration") 65 | } 66 | 67 | if a.ClearFrequencyDuration == 0 && a.ClearFrequencySec == 0 { 68 | a.ClearFrequencyDuration = 30 * time.Second 69 | } else if a.ClearFrequencySec != 0 { 70 | a.ClearFrequencyDuration = time.Duration(a.ClearFrequencySec) * time.Second 71 | } 72 | 73 | if a.GoalSampleRate == 0 { 74 | a.GoalSampleRate = 10 75 | } 76 | 77 | // initialize internal variables 78 | // Create saved sample rate map if we're not loading from a previous state 79 | if a.savedSampleRates == nil { 80 | a.savedSampleRates = make(map[string]int) 81 | } 82 | a.currentCounts = make(map[string]float64) 83 | a.done = make(chan struct{}) 84 | 85 | // spin up calculator 86 | go func() { 87 | ticker := time.NewTicker(a.ClearFrequencyDuration) 88 | defer ticker.Stop() 89 | for { 90 | select { 91 | case <-ticker.C: 92 | a.updateMaps() 93 | case <-a.done: 94 | return 95 | } 96 | } 97 | }() 98 | return nil 99 | } 100 | 101 | func (a *AvgSampleRate) Stop() error { 102 | close(a.done) 103 | return nil 104 | } 105 | 106 | // updateMaps calculates a new saved rate map based on the contents of the 107 | // counter map 108 | func (a *AvgSampleRate) updateMaps() { 109 | // make a local copy of the sample counters for calculation 110 | a.lock.Lock() 111 | tmpCounts := a.currentCounts 112 | a.currentCounts = make(map[string]float64) 113 | a.lock.Unlock() 114 | // short circuit if no traffic 115 | numKeys := len(tmpCounts) 116 | if numKeys == 0 { 117 | // no traffic the last 30s. clear the result map 118 | a.lock.Lock() 119 | defer a.lock.Unlock() 120 | a.savedSampleRates = make(map[string]int) 121 | return 122 | } 123 | 124 | // Goal events to send this interval is the total count of received events 125 | // divided by the desired average sample rate 126 | var sumEvents float64 127 | for _, count := range tmpCounts { 128 | sumEvents += count 129 | } 130 | goalCount := sumEvents / float64(a.GoalSampleRate) 131 | // goalRatio is the goalCount divided by the sum of all the log values - it 132 | // determines what percentage of the total event space belongs to each key 133 | var logSum float64 134 | for _, count := range tmpCounts { 135 | logSum += math.Log10(count) 136 | } 137 | goalRatio := goalCount / logSum 138 | 139 | newSavedSampleRates := calculateSampleRates(goalRatio, tmpCounts) 140 | a.lock.Lock() 141 | defer a.lock.Unlock() 142 | a.savedSampleRates = newSavedSampleRates 143 | a.haveData = true 144 | } 145 | 146 | // GetSampleRate takes a key and returns the appropriate sample rate for that 147 | // key. 148 | func (a *AvgSampleRate) GetSampleRate(key string) int { 149 | return a.GetSampleRateMulti(key, 1) 150 | } 151 | 152 | // GetSampleRateMulti takes a key representing count spans and returns the 153 | // appropriate sample rate for that key. 154 | func (a *AvgSampleRate) GetSampleRateMulti(key string, count int) int { 155 | a.lock.Lock() 156 | defer a.lock.Unlock() 157 | 158 | a.requestCount++ 159 | a.eventCount += int64(count) 160 | 161 | // Enforce MaxKeys limit on the size of the map 162 | if a.MaxKeys > 0 { 163 | // If a key already exists, increment it. If not, but we're under the limit, store a new key 164 | if _, found := a.currentCounts[key]; found || len(a.currentCounts) < a.MaxKeys { 165 | a.currentCounts[key] += float64(count) 166 | } 167 | } else { 168 | a.currentCounts[key] += float64(count) 169 | } 170 | if !a.haveData { 171 | return a.GoalSampleRate 172 | } 173 | if rate, found := a.savedSampleRates[key]; found { 174 | return rate 175 | } 176 | return 1 177 | } 178 | 179 | type avgSampleRateState struct { 180 | // This field is exported for use by `JSON.Marshal` and `JSON.Unmarshal` 181 | SavedSampleRates map[string]int `json:"saved_sample_rates"` 182 | } 183 | 184 | // SaveState returns a byte array with a JSON representation of the sampler state 185 | func (a *AvgSampleRate) SaveState() ([]byte, error) { 186 | a.lock.Lock() 187 | defer a.lock.Unlock() 188 | 189 | if a.savedSampleRates == nil { 190 | return nil, errors.New("saved sample rate map is nil") 191 | } 192 | s := &avgSampleRateState{SavedSampleRates: a.savedSampleRates} 193 | return json.Marshal(s) 194 | } 195 | 196 | // LoadState accepts a byte array with a JSON representation of a previous instance's 197 | // state 198 | func (a *AvgSampleRate) LoadState(state []byte) error { 199 | a.lock.Lock() 200 | defer a.lock.Unlock() 201 | 202 | s := avgSampleRateState{} 203 | err := json.Unmarshal(state, &s) 204 | if err != nil { 205 | return err 206 | } 207 | 208 | // Load the previously calculated sample rates 209 | a.savedSampleRates = s.SavedSampleRates 210 | // Allow GetSampleRate to return calculated sample rates from the loaded map 211 | a.haveData = true 212 | 213 | return nil 214 | } 215 | 216 | func (a *AvgSampleRate) GetMetrics(prefix string) map[string]int64 { 217 | a.lock.Lock() 218 | defer a.lock.Unlock() 219 | mets := map[string]int64{ 220 | prefix + "request_count": a.requestCount, 221 | prefix + "event_count": a.eventCount, 222 | prefix + "keyspace_size": int64(len(a.currentCounts)), 223 | } 224 | return mets 225 | } 226 | -------------------------------------------------------------------------------- /avgsamplerate_test.go: -------------------------------------------------------------------------------- 1 | package dynsampler 2 | 3 | import ( 4 | "crypto/rand" 5 | "fmt" 6 | "math" 7 | mrand "math/rand" 8 | "strconv" 9 | "sync" 10 | "testing" 11 | "time" 12 | 13 | "github.com/stretchr/testify/assert" 14 | ) 15 | 16 | func TestAvgSampleUpdateMaps(t *testing.T) { 17 | a := &AvgSampleRate{ 18 | GoalSampleRate: 20, 19 | } 20 | tsts := []struct { 21 | inputSampleCount map[string]float64 22 | expectedSavedSampleRates map[string]int 23 | }{ 24 | { 25 | map[string]float64{ 26 | "one": 1, 27 | "two": 1, 28 | "three": 2, 29 | "four": 5, 30 | "five": 8, 31 | "six": 15, 32 | "seven": 45, 33 | "eight": 612, 34 | "nine": 2000, 35 | "ten": 10000, 36 | }, 37 | map[string]int{ 38 | "one": 1, 39 | "two": 1, 40 | "three": 1, 41 | "four": 1, 42 | "five": 1, 43 | "six": 1, 44 | "seven": 1, 45 | "eight": 6, 46 | "nine": 14, 47 | "ten": 47, 48 | }, 49 | }, 50 | { 51 | map[string]float64{ 52 | "one": 1, 53 | "two": 1, 54 | "three": 2, 55 | "four": 5, 56 | "five": 8, 57 | "six": 15, 58 | "seven": 45, 59 | "eight": 50, 60 | "nine": 60, 61 | }, 62 | map[string]int{ 63 | "one": 1, 64 | "two": 1, 65 | "three": 2, 66 | "four": 5, 67 | "five": 8, 68 | "six": 11, 69 | "seven": 24, 70 | "eight": 26, 71 | "nine": 30, 72 | }, 73 | }, 74 | { 75 | map[string]float64{ 76 | "one": 1, 77 | "two": 1, 78 | "three": 2, 79 | "four": 5, 80 | "five": 7, 81 | }, 82 | map[string]int{ 83 | "one": 1, 84 | "two": 1, 85 | "three": 2, 86 | "four": 5, 87 | "five": 7, 88 | }, 89 | }, 90 | { 91 | map[string]float64{ 92 | "one": 1000, 93 | "two": 1000, 94 | "three": 2000, 95 | "four": 5000, 96 | "five": 7000, 97 | }, 98 | map[string]int{ 99 | "one": 7, 100 | "two": 7, 101 | "three": 13, 102 | "four": 29, 103 | "five": 39, 104 | }, 105 | }, 106 | { 107 | map[string]float64{ 108 | "one": 6000, 109 | "two": 6000, 110 | "three": 6000, 111 | "four": 6000, 112 | "five": 6000, 113 | }, 114 | map[string]int{ 115 | "one": 20, 116 | "two": 20, 117 | "three": 20, 118 | "four": 20, 119 | "five": 20, 120 | }, 121 | }, 122 | { 123 | map[string]float64{ 124 | "one": 12000, 125 | }, 126 | map[string]int{ 127 | "one": 20, 128 | }, 129 | }, 130 | { 131 | map[string]float64{}, 132 | map[string]int{}, 133 | }, 134 | { 135 | map[string]float64{ 136 | "one": 10, 137 | "two": 1, 138 | "three": 1, 139 | "four": 1, 140 | "five": 1, 141 | "six": 1, 142 | "seven": 1, 143 | "eight": 1, 144 | "nine": 1, 145 | "ten": 1, 146 | "eleven": 1, 147 | "twelve": 1, 148 | "thirteen": 1, 149 | "fourteen": 1, 150 | "fifteen": 1, 151 | "sixteen": 1, 152 | "seventeen": 1, 153 | "eighteen": 1, 154 | "nineteen": 1, 155 | "twenty": 1, 156 | }, 157 | map[string]int{ 158 | "one": 7, 159 | "two": 1, 160 | "three": 1, 161 | "four": 1, 162 | "five": 1, 163 | "six": 1, 164 | "seven": 1, 165 | "eight": 1, 166 | "nine": 1, 167 | "ten": 1, 168 | "eleven": 1, 169 | "twelve": 1, 170 | "thirteen": 1, 171 | "fourteen": 1, 172 | "fifteen": 1, 173 | "sixteen": 1, 174 | "seventeen": 1, 175 | "eighteen": 1, 176 | "nineteen": 1, 177 | "twenty": 1, 178 | }, 179 | }, 180 | } 181 | for i, tst := range tsts { 182 | a.currentCounts = tst.inputSampleCount 183 | a.updateMaps() 184 | assert.Equal(t, len(a.currentCounts), 0) 185 | assert.Equal(t, a.savedSampleRates, tst.expectedSavedSampleRates, fmt.Sprintf("test %d failed", i)) 186 | } 187 | } 188 | 189 | func TestAvgSampleGetSampleRateStartup(t *testing.T) { 190 | a := &AvgSampleRate{ 191 | GoalSampleRate: 10, 192 | currentCounts: map[string]float64{}, 193 | } 194 | rate := a.GetSampleRate("key") 195 | assert.Equal(t, rate, 10) 196 | // and the counters still get bumped 197 | assert.Equal(t, a.currentCounts["key"], 1.0) 198 | } 199 | 200 | func TestAvgSampleRace(t *testing.T) { 201 | a := &AvgSampleRate{ 202 | GoalSampleRate: 2, 203 | currentCounts: map[string]float64{}, 204 | savedSampleRates: map[string]int{}, 205 | haveData: true, 206 | } 207 | wg := sync.WaitGroup{} 208 | wg.Add(1) 209 | wg.Add(1) 210 | // set up 100 parallel readers, each reading 1000 times 211 | go func() { 212 | for i := 0; i < 100; i++ { 213 | wg.Add(1) 214 | go func(i int) { 215 | for j := 0; j < 1000; j++ { 216 | rate := a.GetSampleRate("key" + strconv.Itoa(i)) 217 | assert.NotEqual(t, rate <= 0, true, "rate should never be lte zero") 218 | } 219 | wg.Done() 220 | }(i) 221 | } 222 | wg.Done() 223 | }() 224 | go func() { 225 | for i := 0; i < 100; i++ { 226 | a.updateMaps() 227 | } 228 | wg.Done() 229 | }() 230 | wg.Wait() 231 | } 232 | 233 | func TestAvgSampleRateGetSampleRate(t *testing.T) { 234 | a := &AvgSampleRate{ 235 | haveData: true, 236 | } 237 | a.currentCounts = map[string]float64{ 238 | "one": 5, 239 | "two": 8, 240 | } 241 | a.savedSampleRates = map[string]int{ 242 | "one": 10, 243 | "two": 1, 244 | "three": 5, 245 | } 246 | tsts := []struct { 247 | inputKey string 248 | expectedSampleRate int 249 | expectedCurrentCountForKey float64 250 | }{ 251 | {"one", 10, 6}, 252 | {"two", 1, 9}, 253 | {"two", 1, 10}, 254 | {"three", 5, 1}, // key missing from current counts 255 | {"three", 5, 2}, 256 | {"four", 1, 1}, // key missing from current and saved counts 257 | {"four", 1, 2}, 258 | } 259 | for _, tst := range tsts { 260 | rate := a.GetSampleRate(tst.inputKey) 261 | assert.Equal(t, rate, tst.expectedSampleRate) 262 | assert.Equal(t, a.currentCounts[tst.inputKey], tst.expectedCurrentCountForKey) 263 | } 264 | } 265 | 266 | func TestAvgSampleRateMaxKeys(t *testing.T) { 267 | a := &AvgSampleRate{ 268 | MaxKeys: 3, 269 | } 270 | a.currentCounts = map[string]float64{ 271 | "one": 1, 272 | "two": 1, 273 | } 274 | a.savedSampleRates = map[string]int{} 275 | 276 | // with MaxKeys 3, we are under the key limit, so three should get added 277 | a.GetSampleRate("three") 278 | assert.Equal(t, 3, len(a.currentCounts)) 279 | assert.Equal(t, 1., a.currentCounts["three"]) 280 | // Now we're at 3 keys - four should not be added 281 | a.GetSampleRate("four") 282 | assert.Equal(t, 3, len(a.currentCounts)) 283 | _, found := a.currentCounts["four"] 284 | assert.Equal(t, false, found) 285 | // We should still support bumping counts for existing keys 286 | a.GetSampleRate("one") 287 | assert.Equal(t, 3, len(a.currentCounts)) 288 | assert.Equal(t, 2., a.currentCounts["one"]) 289 | } 290 | 291 | func TestAvgSampleRateSaveState(t *testing.T) { 292 | var sampler Sampler 293 | asr := &AvgSampleRate{} 294 | // ensure the interface is implemented 295 | sampler = asr 296 | err := sampler.Start() 297 | assert.Nil(t, err) 298 | 299 | asr.lock.Lock() 300 | asr.savedSampleRates = map[string]int{"foo": 2, "bar": 4} 301 | asr.haveData = true 302 | asr.lock.Unlock() 303 | 304 | assert.Equal(t, 2, sampler.GetSampleRate("foo")) 305 | assert.Equal(t, 4, sampler.GetSampleRate("bar")) 306 | 307 | state, err := sampler.SaveState() 308 | assert.Nil(t, err) 309 | 310 | var newSampler Sampler = &AvgSampleRate{} 311 | 312 | err = newSampler.LoadState(state) 313 | assert.Nil(t, err) 314 | err = newSampler.Start() 315 | assert.Nil(t, err) 316 | 317 | assert.Equal(t, 2, newSampler.GetSampleRate("foo")) 318 | assert.Equal(t, 4, newSampler.GetSampleRate("bar")) 319 | } 320 | 321 | // This is a long test because we generate a lot of random data and run it through the sampler 322 | // The goal is to determine if we actually hit the specified target rate (within a tolerance) an acceptable 323 | // number of times. Most of the time, the average sample rate of observations kept should be close 324 | // to the target rate 325 | func TestAvgSampleRateHitsTargetRate(t *testing.T) { 326 | mrand.Seed(time.Now().Unix()) 327 | testRates := []int{50, 100} 328 | testKeyCount := []int{10, 100} 329 | tolerancePct := float64(0.2) 330 | 331 | for _, rate := range testRates { 332 | tolerance := float64(rate) * tolerancePct 333 | toleranceUpper := float64(rate) + tolerance 334 | toleranceLower := float64(rate) - tolerance 335 | 336 | for _, keyCount := range testKeyCount { 337 | sampler := &AvgSampleRate{GoalSampleRate: rate, currentCounts: make(map[string]float64)} 338 | 339 | // build a consistent set of keys to use 340 | keys := make([]string, keyCount) 341 | for i := 0; i < keyCount; i++ { 342 | keys[i] = randomString(8) 343 | } 344 | 345 | for i, key := range keys { 346 | // generate key counts of different magnitudes - keys reliably get the same magnitude 347 | // so that count ranges are reasonable (i.e. they don't go from 1 to 10000 back to 100) 348 | base := math.Pow10(i%3 + 1) 349 | count := float64(((i%10)+1))*base + float64(mrand.Intn(int(base))) 350 | sampler.currentCounts[key] = count 351 | } 352 | 353 | // build an initial set of sample rates so we don't just return the target rate 354 | sampler.updateMaps() 355 | 356 | var success float64 357 | 358 | for i := 0; i < 100; i++ { 359 | totalSampleRate := 0 360 | totalKeptObservations := 0 361 | for j, key := range keys { 362 | base := math.Pow10(j%3 + 1) 363 | count := float64(((j%10)+1))*base + float64(mrand.Intn(int(base))) 364 | for k := 0; k < int(count); k++ { 365 | rate := sampler.GetSampleRate(key) 366 | if mrand.Intn(rate) == 0 { 367 | totalSampleRate += rate 368 | totalKeptObservations++ 369 | } 370 | } 371 | } 372 | 373 | avgSampleRate := float64(totalSampleRate) / float64(totalKeptObservations) 374 | if avgSampleRate <= toleranceUpper && avgSampleRate >= toleranceLower { 375 | success++ 376 | } 377 | sampler.updateMaps() 378 | } 379 | 380 | assert.True(t, success/100.0 >= 0.95, "target rate test %d with key count %d failed with success rate of only %f", rate, keyCount, success/100.0) 381 | } 382 | } 383 | } 384 | 385 | func TestAvgSampleUpdateMapsSparseCounts(t *testing.T) { 386 | a := &AvgSampleRate{ 387 | GoalSampleRate: 20, 388 | } 389 | 390 | a.savedSampleRates = make(map[string]int) 391 | 392 | for i := 0; i <= 100; i++ { 393 | input := make(map[string]float64) 394 | // simulate steady stream of input from one key 395 | input["largest_count"] = 20 396 | // sporadic keys with single counts that come and go with each interval 397 | for j := 0; j < 5; j++ { 398 | key := randomString(8) 399 | input[key] = 1 400 | } 401 | a.currentCounts = input 402 | a.updateMaps() 403 | } 404 | 405 | assert.Equal(t, 16, a.savedSampleRates["largest_count"]) 406 | } 407 | 408 | func randomString(length int) string { 409 | b := make([]byte, length/2) 410 | rand.Read(b) 411 | return fmt.Sprintf("%x", b) 412 | } 413 | 414 | func TestAvgSampleRate_Start(t *testing.T) { 415 | tests := []struct { 416 | name string 417 | ClearFrequencySec int 418 | ClearFrequencyDuration time.Duration 419 | wantDuration time.Duration 420 | wantErr bool 421 | }{ 422 | {"sec only", 2, 0, 2 * time.Second, false}, 423 | {"dur only", 0, 1003 * time.Millisecond, 1003 * time.Millisecond, false}, 424 | {"default", 0, 0, 30 * time.Second, false}, 425 | {"both", 2, 2 * time.Second, 0, true}, 426 | } 427 | for _, tt := range tests { 428 | t.Run(tt.name, func(t *testing.T) { 429 | a := &AvgSampleRate{ 430 | ClearFrequencySec: tt.ClearFrequencySec, 431 | ClearFrequencyDuration: tt.ClearFrequencyDuration, 432 | } 433 | err := a.Start() 434 | if (err != nil) != tt.wantErr { 435 | t.Errorf("AvgSampleRate error = %v, wantErr %v", err, tt.wantErr) 436 | } 437 | if err == nil { 438 | defer a.Stop() 439 | if tt.wantDuration != a.ClearFrequencyDuration { 440 | t.Errorf("AvgSampleRate duration mismatch = want %v, got %v", tt.wantDuration, a.ClearFrequencyDuration) 441 | } 442 | } 443 | }) 444 | } 445 | } 446 | -------------------------------------------------------------------------------- /avgsamplewithmin.go: -------------------------------------------------------------------------------- 1 | package dynsampler 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | "sync" 7 | "time" 8 | ) 9 | 10 | // AvgSampleWithMin implements Sampler and attempts to average a given sample 11 | // rate, with a minimum number of events per second (i.e. it will reduce 12 | // sampling if it would end up sending fewer than the mininum number of events). 13 | // This method attempts to get the best of the normal average sample rate 14 | // method, without the failings it shows on the low end of total traffic 15 | // throughput 16 | // 17 | // Keys that occur only once within ClearFrequencyDuration will always have a sample 18 | // rate of 1. Keys that occur more frequently will be sampled on a logarithmic 19 | // curve. In other words, every key will be represented at least once per 20 | // ClearFrequencyDuration and more frequent keys will have their sample rate 21 | // increased proportionally to wind up with the goal sample rate. 22 | type AvgSampleWithMin struct { 23 | // DEPRECATED -- use ClearFrequencyDuration. 24 | // ClearFrequencySec is how often the counters reset in seconds. 25 | ClearFrequencySec int 26 | 27 | // ClearFrequencyDuration is how often the counters reset as a Duration. 28 | // Note that either this or ClearFrequencySec can be specified, but not both. 29 | // If neither one is set, the default is 30s. 30 | ClearFrequencyDuration time.Duration 31 | 32 | // GoalSampleRate is the average sample rate we're aiming for, across all 33 | // events. Default 10 34 | GoalSampleRate int 35 | 36 | // MaxKeys, if greater than 0, limits the number of distinct keys used to build 37 | // the sample rate map within the interval defined by `ClearFrequencyDuration`. Once 38 | // MaxKeys is reached, new keys will not be included in the sample rate map, but 39 | // existing keys will continue to be be counted. 40 | MaxKeys int 41 | 42 | // MinEventsPerSec - when the total number of events drops below this 43 | // threshold, sampling will cease. default 50 44 | MinEventsPerSec int 45 | 46 | savedSampleRates map[string]int 47 | currentCounts map[string]float64 48 | 49 | // haveData indicates that we have gotten a sample of traffic. Before we've 50 | // gotten any samples of traffic, we should we should use the default goal 51 | // sample rate for all events instead of sampling everything at 1 52 | haveData bool 53 | done chan struct{} 54 | 55 | lock sync.Mutex 56 | 57 | // metrics 58 | requestCount int64 59 | eventCount int64 60 | } 61 | 62 | // Ensure we implement the sampler interface 63 | var _ Sampler = (*AvgSampleWithMin)(nil) 64 | 65 | func (a *AvgSampleWithMin) Start() error { 66 | // apply defaults 67 | if a.ClearFrequencyDuration != 0 && a.ClearFrequencySec != 0 { 68 | return fmt.Errorf("the ClearFrequencySec configuration value is deprecated; use only ClearFrequencyDuration") 69 | } 70 | 71 | if a.ClearFrequencyDuration == 0 && a.ClearFrequencySec == 0 { 72 | a.ClearFrequencyDuration = 30 * time.Second 73 | } else if a.ClearFrequencySec != 0 { 74 | a.ClearFrequencyDuration = time.Duration(a.ClearFrequencySec) * time.Second 75 | } 76 | 77 | if a.GoalSampleRate == 0 { 78 | a.GoalSampleRate = 10 79 | } 80 | if a.MinEventsPerSec == 0 { 81 | a.MinEventsPerSec = 50 82 | } 83 | 84 | // initialize internal variables 85 | a.savedSampleRates = make(map[string]int) 86 | a.currentCounts = make(map[string]float64) 87 | a.done = make(chan struct{}) 88 | 89 | // spin up calculator 90 | go func() { 91 | ticker := time.NewTicker(a.ClearFrequencyDuration) 92 | defer ticker.Stop() 93 | for { 94 | select { 95 | case <-ticker.C: 96 | a.updateMaps() 97 | case <-a.done: 98 | return 99 | } 100 | } 101 | }() 102 | return nil 103 | } 104 | 105 | func (a *AvgSampleWithMin) Stop() error { 106 | close(a.done) 107 | return nil 108 | } 109 | 110 | // updateMaps calculates a new saved rate map based on the contents of the 111 | // counter map 112 | func (a *AvgSampleWithMin) updateMaps() { 113 | // make a local copy of the sample counters for calculation 114 | a.lock.Lock() 115 | tmpCounts := a.currentCounts 116 | a.currentCounts = make(map[string]float64) 117 | a.lock.Unlock() 118 | newSavedSampleRates := make(map[string]int) 119 | // short circuit if no traffic 120 | numKeys := len(tmpCounts) 121 | if numKeys == 0 { 122 | // no traffic the last 30s. clear the result map 123 | a.lock.Lock() 124 | defer a.lock.Unlock() 125 | a.savedSampleRates = newSavedSampleRates 126 | return 127 | } 128 | 129 | // Goal events to send this interval is the total count of received events 130 | // divided by the desired average sample rate 131 | var sumEvents float64 132 | for _, count := range tmpCounts { 133 | sumEvents += count 134 | } 135 | goalCount := float64(sumEvents) / float64(a.GoalSampleRate) 136 | // check to see if we fall below the minimum 137 | if sumEvents < float64(a.MinEventsPerSec)*a.ClearFrequencyDuration.Seconds() { 138 | // we still need to go through each key to set sample rates individually 139 | for k := range tmpCounts { 140 | newSavedSampleRates[k] = 1 141 | } 142 | a.lock.Lock() 143 | defer a.lock.Unlock() 144 | a.savedSampleRates = newSavedSampleRates 145 | return 146 | } 147 | // goalRatio is the goalCount divided by the sum of all the log values - it 148 | // determines what percentage of the total event space belongs to each key 149 | var logSum float64 150 | for _, count := range tmpCounts { 151 | logSum += math.Log10(float64(count)) 152 | } 153 | // Note that this can produce Inf if logSum is 0 154 | goalRatio := goalCount / logSum 155 | 156 | newSavedSampleRates = calculateSampleRates(goalRatio, tmpCounts) 157 | a.lock.Lock() 158 | defer a.lock.Unlock() 159 | a.savedSampleRates = newSavedSampleRates 160 | a.haveData = true 161 | } 162 | 163 | // GetSampleRate takes a key and returns the appropriate sample rate for that 164 | // key. 165 | func (a *AvgSampleWithMin) GetSampleRate(key string) int { 166 | return a.GetSampleRateMulti(key, 1) 167 | } 168 | 169 | // GetSampleRateMulti takes a key representing count spans and returns the 170 | // appropriate sample rate for that key. 171 | func (a *AvgSampleWithMin) GetSampleRateMulti(key string, count int) int { 172 | a.lock.Lock() 173 | defer a.lock.Unlock() 174 | 175 | a.requestCount++ 176 | a.eventCount += int64(count) 177 | 178 | // Enforce MaxKeys limit on the size of the map 179 | if a.MaxKeys > 0 { 180 | // If a key already exists, increment it. If not, but we're under the limit, store a new key 181 | if _, found := a.currentCounts[key]; found || len(a.currentCounts) < a.MaxKeys { 182 | a.currentCounts[key] += float64(count) 183 | } 184 | } else { 185 | a.currentCounts[key] += float64(count) 186 | } 187 | if !a.haveData { 188 | return a.GoalSampleRate 189 | } 190 | if rate, found := a.savedSampleRates[key]; found { 191 | return rate 192 | } 193 | return 1 194 | } 195 | 196 | // SaveState is not implemented 197 | func (a *AvgSampleWithMin) SaveState() ([]byte, error) { 198 | return nil, nil 199 | } 200 | 201 | // LoadState is not implemented 202 | func (a *AvgSampleWithMin) LoadState(state []byte) error { 203 | return nil 204 | } 205 | 206 | func (a *AvgSampleWithMin) GetMetrics(prefix string) map[string]int64 { 207 | a.lock.Lock() 208 | defer a.lock.Unlock() 209 | mets := map[string]int64{ 210 | prefix + "request_count": a.requestCount, 211 | prefix + "event_count": a.eventCount, 212 | prefix + "keyspace_size": int64(len(a.currentCounts)), 213 | } 214 | return mets 215 | } 216 | -------------------------------------------------------------------------------- /avgsamplewithmin_test.go: -------------------------------------------------------------------------------- 1 | package dynsampler 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | "sync" 7 | "testing" 8 | "time" 9 | 10 | "github.com/stretchr/testify/assert" 11 | ) 12 | 13 | func TestAvgSampleWithMinUpdateMaps(t *testing.T) { 14 | a := &AvgSampleWithMin{ 15 | GoalSampleRate: 20, 16 | MinEventsPerSec: 50, 17 | ClearFrequencyDuration: 30 * time.Second, 18 | } 19 | tsts := []struct { 20 | inputSampleCount map[string]float64 21 | expectedSavedSampleRates map[string]int 22 | }{ 23 | { 24 | map[string]float64{ 25 | "one": 1, 26 | "two": 1, 27 | "three": 2, 28 | "four": 5, 29 | "five": 8, 30 | "six": 15, 31 | "seven": 45, 32 | "eight": 612, 33 | "nine": 2000, 34 | "ten": 10000, 35 | }, 36 | map[string]int{ 37 | "one": 1, 38 | "two": 1, 39 | "three": 1, 40 | "four": 1, 41 | "five": 1, 42 | "six": 1, 43 | "seven": 1, 44 | "eight": 6, 45 | "nine": 14, 46 | "ten": 47, 47 | }, 48 | }, 49 | { 50 | map[string]float64{ 51 | "one": 1, 52 | "two": 1, 53 | "three": 2, 54 | "four": 5, 55 | "five": 8, 56 | "six": 15, 57 | "seven": 45, 58 | "eight": 50, 59 | "nine": 60, 60 | }, 61 | map[string]int{ 62 | "one": 1, 63 | "two": 1, 64 | "three": 1, 65 | "four": 1, 66 | "five": 1, 67 | "six": 1, 68 | "seven": 1, 69 | "eight": 1, 70 | "nine": 1, 71 | }, 72 | }, 73 | { 74 | map[string]float64{ 75 | "one": 1, 76 | "two": 1, 77 | "three": 2, 78 | "four": 5, 79 | "five": 7, 80 | }, 81 | map[string]int{ 82 | "one": 1, 83 | "two": 1, 84 | "three": 1, 85 | "four": 1, 86 | "five": 1, 87 | }, 88 | }, 89 | { 90 | map[string]float64{ 91 | "one": 1, 92 | }, 93 | map[string]int{ 94 | "one": 1, 95 | }, 96 | }, 97 | { 98 | map[string]float64{ 99 | "one": 8, 100 | }, 101 | map[string]int{ 102 | "one": 1, 103 | }, 104 | }, 105 | { 106 | map[string]float64{ 107 | "one": 12000, 108 | }, 109 | map[string]int{ 110 | "one": 20, 111 | }, 112 | }, 113 | { 114 | map[string]float64{ 115 | "one": 1000, 116 | "two": 1000, 117 | "three": 2000, 118 | "four": 5000, 119 | "five": 7000, 120 | }, 121 | map[string]int{ 122 | "one": 7, 123 | "two": 7, 124 | "three": 13, 125 | "four": 29, 126 | "five": 39, 127 | }, 128 | }, 129 | { 130 | map[string]float64{ 131 | "one": 6000, 132 | "two": 6000, 133 | "three": 6000, 134 | "four": 6000, 135 | "five": 6000, 136 | }, 137 | map[string]int{ 138 | "one": 20, 139 | "two": 20, 140 | "three": 20, 141 | "four": 20, 142 | "five": 20, 143 | }, 144 | }, 145 | { 146 | map[string]float64{}, 147 | map[string]int{}, 148 | }, 149 | } 150 | for i, tst := range tsts { 151 | a.currentCounts = tst.inputSampleCount 152 | a.updateMaps() 153 | assert.Equal(t, len(a.currentCounts), 0) 154 | assert.Equal(t, a.savedSampleRates, tst.expectedSavedSampleRates, fmt.Sprintf("test %d failed", i)) 155 | } 156 | } 157 | 158 | func TestAvgSampleWithMinGetSampleRateStartup(t *testing.T) { 159 | a := &AvgSampleWithMin{ 160 | GoalSampleRate: 10, 161 | currentCounts: map[string]float64{}, 162 | } 163 | rate := a.GetSampleRate("key") 164 | assert.Equal(t, rate, 10) 165 | // and the counters still get bumped 166 | assert.Equal(t, a.currentCounts["key"], 1.) 167 | } 168 | 169 | func TestAvgSampleWithMinGetSampleRate(t *testing.T) { 170 | a := &AvgSampleWithMin{ 171 | haveData: true, 172 | } 173 | a.currentCounts = map[string]float64{ 174 | "one": 5, 175 | "two": 8, 176 | } 177 | a.savedSampleRates = map[string]int{ 178 | "one": 10, 179 | "two": 1, 180 | "three": 5, 181 | } 182 | tsts := []struct { 183 | inputKey string 184 | expectedSampleRate int 185 | expectedCurrentCountForKey float64 186 | }{ 187 | {"one", 10, 6}, 188 | {"two", 1, 9}, 189 | {"two", 1, 10}, 190 | {"three", 5, 1}, // key missing from current counts 191 | {"three", 5, 2}, 192 | {"four", 1, 1}, // key missing from current and saved counts 193 | {"four", 1, 2}, 194 | } 195 | for _, tst := range tsts { 196 | rate := a.GetSampleRate(tst.inputKey) 197 | assert.Equal(t, rate, tst.expectedSampleRate) 198 | assert.Equal(t, a.currentCounts[tst.inputKey], tst.expectedCurrentCountForKey) 199 | } 200 | } 201 | 202 | func TestAvgSampleWithMinRace(t *testing.T) { 203 | a := &AvgSampleWithMin{ 204 | GoalSampleRate: 2, 205 | currentCounts: map[string]float64{}, 206 | savedSampleRates: map[string]int{}, 207 | haveData: true, 208 | } 209 | wg := sync.WaitGroup{} 210 | wg.Add(1) 211 | wg.Add(1) 212 | // set up 100 parallel readers, each reading 1000 times 213 | go func() { 214 | for i := 0; i < 100; i++ { 215 | wg.Add(1) 216 | go func(i int) { 217 | for j := 0; j < 1000; j++ { 218 | rate := a.GetSampleRate("key" + strconv.Itoa(i)) 219 | assert.NotEqual(t, rate <= 0, true, "rate should never be lte zero", rate) 220 | } 221 | wg.Done() 222 | }(i) 223 | } 224 | wg.Done() 225 | }() 226 | go func() { 227 | for i := 0; i < 100; i++ { 228 | a.updateMaps() 229 | } 230 | wg.Done() 231 | }() 232 | wg.Wait() 233 | } 234 | 235 | func TestAvgSampleWithMinMaxKeys(t *testing.T) { 236 | a := &AvgSampleWithMin{ 237 | MaxKeys: 3, 238 | } 239 | a.currentCounts = map[string]float64{ 240 | "one": 1, 241 | "two": 1, 242 | } 243 | a.savedSampleRates = map[string]int{} 244 | 245 | // with MaxKeys 3, we are under the key limit, so three should get added 246 | a.GetSampleRate("three") 247 | assert.Equal(t, 3, len(a.currentCounts)) 248 | assert.Equal(t, 1., a.currentCounts["three"]) 249 | // Now we're at 3 keys - four should not be added 250 | a.GetSampleRate("four") 251 | assert.Equal(t, 3, len(a.currentCounts)) 252 | _, found := a.currentCounts["four"] 253 | assert.Equal(t, false, found) 254 | // We should still support bumping counts for existing keys 255 | a.GetSampleRate("one") 256 | assert.Equal(t, 3, len(a.currentCounts)) 257 | assert.Equal(t, 2., a.currentCounts["one"]) 258 | } 259 | 260 | func TestAvgSampleWithMin_Start(t *testing.T) { 261 | tests := []struct { 262 | name string 263 | ClearFrequencySec int 264 | ClearFrequencyDuration time.Duration 265 | wantDuration time.Duration 266 | wantErr bool 267 | }{ 268 | {"sec only", 2, 0, 2 * time.Second, false}, 269 | {"dur only", 0, 1003 * time.Millisecond, 1003 * time.Millisecond, false}, 270 | {"default", 0, 0, 30 * time.Second, false}, 271 | {"both", 2, 2 * time.Second, 0, true}, 272 | } 273 | for _, tt := range tests { 274 | t.Run(tt.name, func(t *testing.T) { 275 | a := &AvgSampleWithMin{ 276 | ClearFrequencySec: tt.ClearFrequencySec, 277 | ClearFrequencyDuration: tt.ClearFrequencyDuration, 278 | } 279 | err := a.Start() 280 | if (err != nil) != tt.wantErr { 281 | t.Errorf("AvgSampleWithMin error = %v, wantErr %v", err, tt.wantErr) 282 | } 283 | if err == nil { 284 | defer a.Stop() 285 | if tt.wantDuration != a.ClearFrequencyDuration { 286 | t.Errorf("AvgSampleWithMin duration mismatch = want %v, got %v", tt.wantDuration, a.ClearFrequencyDuration) 287 | } 288 | } 289 | }) 290 | } 291 | } 292 | -------------------------------------------------------------------------------- /blocklist.go: -------------------------------------------------------------------------------- 1 | package dynsampler 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | "sync" 7 | ) 8 | 9 | // BlockList is a data structure that keeps track of how often keys occur in a given time range in 10 | // order to perform windowed lookback sampling. BlockList operates with monotonically increasing 11 | // indexes, instead of timestamps. 12 | // A BlockList is a single linked list of Blocks. Each Block has a frequency hashmap and a unique 13 | // index. 14 | type BlockList interface { 15 | IncrementKey(key string, keyIndex int64, count int) error 16 | AggregateCounts(currentIndex int64, lookbackIndex int64) map[string]int 17 | } 18 | 19 | type Block struct { 20 | index int64 // MUST be monotonically increasing. 21 | keyToCount map[string]int 22 | next *Block 23 | } 24 | 25 | // UnboundedBlockList can have unlimited keys. 26 | type UnboundedBlockList struct { 27 | head *Block // Sentinel node for our linked list. 28 | lock sync.Mutex 29 | } 30 | 31 | // Creates a new BlockList with a sentinel node. 32 | func NewUnboundedBlockList() BlockList { 33 | // Create a sentinel node. 34 | 35 | head := &Block{ 36 | index: math.MaxInt64, 37 | keyToCount: make(map[string]int), 38 | next: nil, 39 | } 40 | return &UnboundedBlockList{ 41 | head: head, 42 | } 43 | } 44 | 45 | // IncrementKey is used when we've encounted a new key. The current keyIndex is 46 | // also provided. This function will increment the key in the current block or 47 | // create a new block, if needed. The happy path invocation is very fast, O(1). 48 | // The count is the number of events that this call represents. 49 | func (b *UnboundedBlockList) IncrementKey(key string, keyIndex int64, count int) error { 50 | b.lock.Lock() 51 | defer b.lock.Unlock() 52 | return b.doIncrement(key, keyIndex, count) 53 | } 54 | 55 | func (b *UnboundedBlockList) doIncrement(key string, keyIndex int64, count int) error { 56 | // A block matching keyStamp exists. Just increment the key there. 57 | if b.head.next != nil && b.head.next.index == keyIndex { 58 | b.head.next.keyToCount[key] += count 59 | return nil 60 | } 61 | 62 | // We need to create a new block. 63 | currentFront := b.head.next 64 | b.head.next = &Block{ 65 | index: keyIndex, 66 | keyToCount: make(map[string]int), 67 | next: currentFront, 68 | } 69 | b.head.next.keyToCount[key] += count 70 | return nil 71 | } 72 | 73 | // AggregateCounts returns a frequency hashmap of all counts from the currentIndex to the 74 | // lookbackIndex. It also drops old blocks. This is an O(N) operation, where N is the length of the 75 | // linked list. 76 | func (b *UnboundedBlockList) AggregateCounts( 77 | currentIndex int64, 78 | lookbackIndex int64, 79 | ) map[string]int { 80 | b.lock.Lock() 81 | defer b.lock.Unlock() 82 | return b.doAggregation(currentIndex, lookbackIndex) 83 | } 84 | 85 | // Split out the actual functionality into doAggregation to support better locking semantics. 86 | func (b *UnboundedBlockList) doAggregation( 87 | currentIndex int64, 88 | lookbackIndex int64, 89 | ) (aggregateCounts map[string]int) { 90 | aggregateCounts = make(map[string]int) 91 | 92 | // Aggregate from currentIndex - 1 and lookback lookbackIndex. 93 | startIndex := currentIndex - 1 94 | finishIndex := startIndex - lookbackIndex 95 | 96 | // front is a pointer that iterates through our linked list. Start at the sentinel. 97 | front := b.head 98 | for front != nil { 99 | // Start aggregation at current index - 1. 100 | if front.index <= startIndex { 101 | for k, v := range front.keyToCount { 102 | aggregateCounts[k] += v 103 | } 104 | } 105 | 106 | // Stop and drop remaining blocks after t - lookbackIndex. 107 | // Never drop the first block. 108 | if front.next != nil && front.next.index <= finishIndex { 109 | front.next = nil 110 | break 111 | } 112 | front = front.next 113 | } 114 | 115 | return aggregateCounts 116 | } 117 | 118 | // BoundedBlockList have a limit on the maximum number of keys within the blocklist. Additional keys 119 | // will be dropped by IncrementKey. 120 | // This is implemented with another data structure ontop of an UnboundedBlockList that keeps track 121 | // of total keys. We use a map from keys to indexes that the key appears in. 122 | type BoundedBlockList struct { 123 | baseList *UnboundedBlockList 124 | 125 | maxKeys int 126 | keyToIndexes map[string][]int64 127 | } 128 | 129 | // Error encounted when the BoundedBlockList has reached maxKeys capacity. 130 | type MaxSizeError struct { 131 | key string 132 | } 133 | 134 | func (e MaxSizeError) Error() string { 135 | return fmt.Sprintf("Max size for blocklist reached, new key %s rejected.", e.key) 136 | } 137 | 138 | // Creates a new BlockList with a sentinel node. 139 | func NewBoundedBlockList(maxKeys int) BlockList { 140 | return &BoundedBlockList{ 141 | baseList: NewUnboundedBlockList().(*UnboundedBlockList), 142 | maxKeys: maxKeys, 143 | keyToIndexes: make(map[string][]int64), 144 | } 145 | } 146 | 147 | // IncrementKey will always increment an existing key. If the key is new, it will be rejected if 148 | // there are maxKeys existing entries. 149 | func (b *BoundedBlockList) IncrementKey(key string, keyIndex int64, count int) error { 150 | b.baseList.lock.Lock() 151 | defer b.baseList.lock.Unlock() 152 | 153 | canInsert := b.tryInsert(key, keyIndex) 154 | if !canInsert { 155 | return MaxSizeError{key: key} 156 | } 157 | 158 | b.baseList.doIncrement(key, keyIndex, count) 159 | return nil 160 | } 161 | 162 | // tryInsert checks if we can insert a new key. This function is NOT idempotent. 163 | func (b *BoundedBlockList) tryInsert(key string, keyIndex int64) bool { 164 | // See if we can insert through reads. 165 | 166 | // Reject new keys at max capacity. 167 | if len(b.keyToIndexes) >= b.maxKeys { 168 | return false 169 | } 170 | 171 | indexes, exists := b.keyToIndexes[key] 172 | if exists && len(indexes) > 0 && indexes[0] == keyIndex { 173 | return true 174 | } 175 | 176 | if exists { 177 | b.keyToIndexes[key] = append([]int64{keyIndex}, indexes...) 178 | } else { 179 | b.keyToIndexes[key] = []int64{keyIndex} 180 | } 181 | return true 182 | } 183 | 184 | func (b *BoundedBlockList) AggregateCounts( 185 | currentIndex int64, 186 | lookbackIndex int64, 187 | ) (aggregateCounts map[string]int) { 188 | b.baseList.lock.Lock() 189 | defer b.baseList.lock.Unlock() 190 | aggregateCounts = b.baseList.doAggregation(currentIndex, lookbackIndex) 191 | 192 | startIndex := currentIndex - 1 193 | finishIndex := startIndex - lookbackIndex 194 | 195 | for key, indexes := range b.keyToIndexes { 196 | dropIdx := -1 197 | for i := 0; i < len(indexes); i++ { 198 | if indexes[i] <= finishIndex { 199 | dropIdx = i 200 | break 201 | } 202 | } 203 | if dropIdx == -1 { // Nothing needs to be dropped. 204 | continue 205 | } else if dropIdx == 0 { // Everything needs to be dropped. 206 | delete(b.keyToIndexes, key) 207 | } else { // Perform a partial drop. 208 | b.keyToIndexes[key] = indexes[0:dropIdx] 209 | } 210 | } 211 | 212 | return aggregateCounts 213 | } 214 | -------------------------------------------------------------------------------- /blocklist_test.go: -------------------------------------------------------------------------------- 1 | package dynsampler 2 | 3 | import ( 4 | "fmt" 5 | "math/rand" 6 | "sync" 7 | "testing" 8 | "time" 9 | 10 | "sync/atomic" 11 | 12 | "github.com/stretchr/testify/assert" 13 | ) 14 | 15 | // AtomicRecord is the naive implementation of blocklist that serves as the reference implementation 16 | // for our tests. 17 | // This datastructure is designed to be completely linearizable, as it has a single lock that it 18 | // acquires with every operation. 19 | 20 | type pair struct { 21 | index int64 22 | count int 23 | } 24 | 25 | type AtomicRecord struct { 26 | records map[string][]pair 27 | maxKeys int 28 | lock sync.Mutex 29 | } 30 | 31 | func NewAtomicRecord(maxKeys int) *AtomicRecord { 32 | return &AtomicRecord{ 33 | records: make(map[string][]pair), 34 | maxKeys: maxKeys, 35 | } 36 | } 37 | 38 | func (r *AtomicRecord) IncrementKey(key string, keyIndex int64, count int) error { 39 | 40 | r.lock.Lock() 41 | defer r.lock.Unlock() 42 | 43 | if len(r.records) >= r.maxKeys { 44 | return MaxSizeError{key: key} 45 | } 46 | r.records[key] = append([]pair{{index: keyIndex, count: count}}, r.records[key]...) 47 | return nil 48 | } 49 | 50 | func (r *AtomicRecord) AggregateCounts( 51 | currentIndex int64, 52 | lookbackIndex int64, 53 | ) (aggregateCounts map[string]int) { 54 | r.lock.Lock() 55 | defer r.lock.Unlock() 56 | 57 | startIndex := currentIndex - 1 58 | finishIndex := startIndex - lookbackIndex 59 | 60 | aggregateCounts = make(map[string]int) 61 | for key, record := range r.records { 62 | // Aggregate. 63 | lastIndex := -1 64 | for i, r := range record { 65 | if r.index <= startIndex && r.index > finishIndex { 66 | aggregateCounts[key] += r.count 67 | } 68 | if lastIndex == -1 && r.index <= finishIndex { 69 | lastIndex = i 70 | } 71 | } 72 | if lastIndex == -1 { 73 | continue 74 | } else if lastIndex == 0 { 75 | delete(r.records, key) 76 | continue 77 | } 78 | r.records[key] = record[0:lastIndex] 79 | } 80 | 81 | return aggregateCounts 82 | } 83 | 84 | func getSeededRandom() (*rand.Rand, int64) { 85 | seed := time.Now().UnixNano() 86 | s1 := rand.NewSource(seed) 87 | return rand.New(s1), seed 88 | } 89 | 90 | // Basic sanity test. 91 | func TestSanity(t *testing.T) { 92 | blockList := NewUnboundedBlockList() 93 | atomicRecord := NewAtomicRecord(10) 94 | testKey := "test_key" 95 | currentIndex := int64(0) 96 | 97 | for i := 0; i < 10; i++ { 98 | blockList.IncrementKey(testKey, currentIndex, 1) 99 | atomicRecord.IncrementKey(testKey, currentIndex, 1) 100 | currentIndex += 1 101 | } 102 | 103 | assert.Equal(t, atomicRecord.AggregateCounts(1, 5), blockList.AggregateCounts(1, 5)) 104 | assert.Equal(t, atomicRecord.AggregateCounts(0, 2), blockList.AggregateCounts(0, 2)) 105 | assert.Equal(t, atomicRecord.AggregateCounts(6, 5), blockList.AggregateCounts(6, 5)) 106 | } 107 | 108 | func TestBounded(t *testing.T) { 109 | blockList := NewBoundedBlockList(10) 110 | atomicRecord := NewAtomicRecord(10) 111 | 112 | currentIndex := int64(0) 113 | 114 | // Test basic dropping. 115 | for i := 0; i < 15; i++ { 116 | testKey := fmt.Sprintf("test_%d", i) 117 | actualErr := blockList.IncrementKey(testKey, currentIndex, 1) 118 | expectedErr := atomicRecord.IncrementKey(testKey, currentIndex, 1) 119 | assert.Equal(t, expectedErr, actualErr) 120 | } 121 | 122 | // Test expire. 123 | currentIndex = 10 124 | assert.Equal(t, atomicRecord.AggregateCounts(currentIndex, 5), 125 | blockList.AggregateCounts(currentIndex, 5)) 126 | 127 | // Consistent single insert per count. 128 | for i := 0; i < 15; i++ { 129 | testKey := fmt.Sprintf("test_%d", i) 130 | actualErr := blockList.IncrementKey(testKey, currentIndex, 1) 131 | expectedErr := atomicRecord.IncrementKey(testKey, currentIndex, 1) 132 | assert.Equal(t, expectedErr, actualErr) 133 | assert.Equal(t, atomicRecord.AggregateCounts(currentIndex, 10), 134 | blockList.AggregateCounts(currentIndex, 10)) 135 | currentIndex += 1 136 | } 137 | 138 | // Random insert number of each key. 139 | random, _ := getSeededRandom() 140 | for i := 0; i < 30; i++ { 141 | for j := 0; j < 10; j++ { 142 | keySuffix := random.Intn(20) 143 | testKey := fmt.Sprintf("test_%d", keySuffix) 144 | actualErr := blockList.IncrementKey(testKey, currentIndex, 1) 145 | expectedErr := atomicRecord.IncrementKey(testKey, currentIndex, 1) 146 | assert.Equal(t, expectedErr, actualErr) 147 | } 148 | 149 | assert.Equal(t, atomicRecord.AggregateCounts(currentIndex, 10), 150 | blockList.AggregateCounts(currentIndex, 10)) 151 | currentIndex += 1 152 | } 153 | } 154 | 155 | // Simulate a real world use case and compare it against our reference implementation. 156 | func compareConcurrency(t *testing.T, reference BlockList, actual BlockList) { 157 | globalIndex := int64(0) 158 | testKey := "test_key" 159 | 160 | done := make(chan bool) 161 | iterations := 50 162 | lock := sync.Mutex{} 163 | 164 | random, _ := getSeededRandom() 165 | 166 | // Index Ticker 167 | indexTicker := time.NewTicker(50 * time.Millisecond) 168 | go func() { 169 | for { 170 | select { 171 | case <-done: 172 | return 173 | case <-indexTicker.C: 174 | atomic.AddInt64(&globalIndex, 1) 175 | } 176 | } 177 | }() 178 | 179 | // Update and aggregation ticker 180 | updateTicker := time.NewTicker(55 * time.Millisecond) 181 | go func() { 182 | for { 183 | select { 184 | case <-done: 185 | return 186 | case <-updateTicker.C: 187 | 188 | lock.Lock() 189 | currentIndex := atomic.LoadInt64(&globalIndex) 190 | referenceAggregate := reference.AggregateCounts(currentIndex, 10) 191 | actualAggregate := actual.AggregateCounts(currentIndex, 10) 192 | 193 | assert.Equal(t, referenceAggregate, actualAggregate) 194 | lock.Unlock() 195 | } 196 | } 197 | }() 198 | 199 | wg := sync.WaitGroup{} 200 | for i := 0; i < 50; i++ { 201 | wg.Add(1) 202 | go func() { 203 | defer wg.Done() 204 | for j := 0; j < iterations; j++ { 205 | 206 | // These need to be performed atomically. 207 | lock.Lock() 208 | currentIndex := atomic.LoadInt64(&globalIndex) 209 | referenceErr := reference.IncrementKey(testKey, currentIndex, 1) 210 | actualErr := actual.IncrementKey(testKey, currentIndex, 1) 211 | assert.Equal(t, referenceErr, actualErr) 212 | 213 | sleepTime := time.Duration(random.Intn(100)) * time.Millisecond 214 | lock.Unlock() 215 | 216 | time.Sleep(sleepTime) 217 | } 218 | }() 219 | } 220 | wg.Wait() 221 | done <- true 222 | } 223 | 224 | func concurrentUpdates(t *testing.T, blockList BlockList) { 225 | start := make(chan bool) 226 | globalIndex := int64(0) 227 | 228 | // Concurrent inserts. 229 | go func() { 230 | <-start 231 | for i := 0; i < 1000; i++ { 232 | for j := 0; j < 15; j++ { 233 | currentIndex := atomic.LoadInt64(&globalIndex) 234 | testKey := fmt.Sprintf("test_%d", j) 235 | blockList.IncrementKey(testKey, currentIndex, 1) 236 | } 237 | } 238 | }() 239 | // Concurrent aggregations. 240 | go func() { 241 | <-start 242 | for i := 0; i < 1000; i++ { 243 | currentIndex := atomic.LoadInt64(&globalIndex) 244 | blockList.AggregateCounts(currentIndex, 10) 245 | atomic.AddInt64(&globalIndex, 1) 246 | } 247 | }() 248 | close(start) 249 | } 250 | 251 | func TestAllConcurrency(t *testing.T) { 252 | compareConcurrency(t, NewUnboundedBlockList(), NewAtomicRecord(10)) 253 | compareConcurrency(t, NewBoundedBlockList(10), NewAtomicRecord(10)) 254 | 255 | concurrentUpdates(t, NewUnboundedBlockList()) 256 | concurrentUpdates(t, NewBoundedBlockList(10)) 257 | } 258 | -------------------------------------------------------------------------------- /doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | Package dynsampler contains several sampling algorithms to help you select a representative set of events instead of a full stream. 3 | 4 | This package is intended to help sample a stream of tracking events, where events are typically created in response to a stream of traffic (for the purposes of logging or debugging). In general, sampling is used to reduce the total volume of events necessary to represent the stream of traffic in a meaningful way. 5 | 6 | For the purposes of these examples, the "traffic" will be a set of HTTP requests being handled by a server, and "event" will be a blob of metadata about a given HTTP request that might be useful to keep track of later. A "sample rate" of 100 means that for every 100 requests, we capture a single event and indicate that it represents 100 similar requests. 7 | 8 | Use 9 | 10 | Use the `Sampler` interface in your code. Each different sampling algorithm implements the Sampler interface. 11 | 12 | The following guidelines can help you choose a sampler. Depending on the shape of your traffic, one may serve better than another, or you may need to write a new one! Please consider contributing it back to this package if you do. 13 | 14 | * If your system has a completely homogeneous stream of requests: use `Static` to use a constant sample rate. 15 | 16 | * If your system has a steady stream of requests and a well-known low cardinality partition key (e.g. http status): use `Static` and override sample rates on a per-key basis (e.g. if you know want to sample `HTTP 200/OK` events at a different rate from `HTTP 503/Server Error`). 17 | 18 | * If your logging system has a strict cap on the rate it can receive events, use `TotalThroughput`, which will calculate sample rates based on keeping *the entire system's* representative event throughput right around (or under) particular cap. 19 | 20 | * If your system has a rough cap on the rate it can receive events and your partitioned keyspace is fairly steady, use `PerKeyThroughput`, which will calculate sample rates based on keeping the event throughput roughly constant *per key/partition* (e.g. per user id) 21 | 22 | * The best choice for a system with a large key space and a large disparity between the highest volume and lowest volume keys is `AvgSampleRateWithMin` - it will increase the sample rate of higher volume traffic proportionally to the logarithm of the specific key's volume. If total traffic falls below a configured minimum, it stops sampling to avoid any sampling when the traffic is too low to warrant it. 23 | 24 | * `EMASampleRate` works like `AvgSampleRate`, but calculates sample rates based on a moving average (Exponential Moving Average) of many measurement intervals rather than a single isolated interval. In addition, it can detect large bursts in traffic and will trigger a recalculation of sample rates before the regular interval. 25 | 26 | Each sampler implementation below has additional configuration parameters and a 27 | detailed description of how it chooses a sample rate. 28 | 29 | Some implementations implement `SaveState` and `LoadState` - enabling you to serialize the Sampler's internal state 30 | and load it back. This is useful, for example, if you want to avoid losing calculated sample rates between process 31 | restarts. 32 | 33 | */ 34 | package dynsampler 35 | -------------------------------------------------------------------------------- /dynsampler.go: -------------------------------------------------------------------------------- 1 | package dynsampler 2 | 3 | // Sampler is the interface to samplers using different methods to determine 4 | // sample rate. You should instantiate one of the actual samplers in this 5 | // package, depending on the sample method you'd like to use. Each sampling 6 | // method has its own set of struct variables you should set before Start()ing 7 | // the sampler. 8 | type Sampler interface { 9 | // Start initializes the sampler. You should call Start() before using the 10 | // sampler. 11 | Start() error 12 | 13 | // Stop halts the sampler and any background goroutines 14 | Stop() error 15 | 16 | // GetSampleRate will return the sample rate to use for the given key 17 | // string. You should call it with whatever key you choose to use to 18 | // partition traffic into different sample rates. It assumes that you're 19 | // calling it for a single item to be sampled (typically a span from a 20 | // trace), and simply calls GetSampleRateMulti with 1 for the second 21 | // parameter. 22 | GetSampleRate(string) int 23 | 24 | // GetSampleRateMulti will return the sample rate to use for the given key 25 | // string. You should call it with whatever key you choose to use to 26 | // partition traffic into different sample rates. It assumes you're calling 27 | // it for a group of samples. The second parameter is the number of samples 28 | // this call represents. 29 | GetSampleRateMulti(string, int) int 30 | 31 | // SaveState returns a byte array containing the state of the Sampler implementation. 32 | // It can be used to persist state between process restarts. 33 | SaveState() ([]byte, error) 34 | 35 | // LoadState accepts a byte array containing the serialized, previous state of the sampler 36 | // implementation. It should be called before `Start`. 37 | LoadState([]byte) error 38 | 39 | // GetMetrics returns a map of metrics about the sampler's performance. 40 | // All values are returned as int64; counters are cumulative and the names 41 | // always end with "_count", while gauges are instantaneous with no particular naming convention. 42 | // All names are prefixed with the given string. 43 | GetMetrics(prefix string) map[string]int64 44 | } 45 | -------------------------------------------------------------------------------- /emasamplerate.go: -------------------------------------------------------------------------------- 1 | package dynsampler 2 | 3 | import ( 4 | "encoding/json" 5 | "errors" 6 | "fmt" 7 | "math" 8 | "sync" 9 | "time" 10 | ) 11 | 12 | // EMASampleRate implements Sampler and attempts to average a given sample rate, 13 | // weighting rare traffic and frequent traffic differently so as to end up with 14 | // the correct average. This method breaks down when total traffic is low 15 | // because it will be excessively sampled. 16 | // 17 | // Based on the AvgSampleRate implementation, EMASampleRate differs in that rather 18 | // than compute rate based on a periodic sample of traffic, it maintains an Exponential 19 | // Moving Average of counts seen per key, and adjusts this average at regular intervals. 20 | // The weight applied to more recent intervals is defined by `weight`, a number between 21 | // (0, 1) - larger values weight the average more toward recent observations. In other words, 22 | // a larger weight will cause sample rates more quickly adapt to traffic patterns, 23 | // while a smaller weight will result in sample rates that are less sensitive to bursts or drops 24 | // in traffic and thus more consistent over time. 25 | // 26 | // Keys that are not found in the EMA will always have a sample 27 | // rate of 1. Keys that occur more frequently will be sampled on a logarithmic 28 | // curve. In other words, every key will be represented at least once in any 29 | // given window and more frequent keys will have their sample rate 30 | // increased proportionally to wind up with the goal sample rate. 31 | type EMASampleRate struct { 32 | // DEPRECATED -- use AdjustmentIntervalDuration 33 | // AdjustmentInterval defines how often (in seconds) we adjust the moving average from 34 | // recent observations. 35 | AdjustmentInterval int 36 | 37 | // AdjustmentIntervalDuration is how often we adjust the moving average from 38 | // recent observations. 39 | // Note that either this or AdjustmentInterval can be specified, but not both. 40 | // If neither one is set, the default is 15s. 41 | AdjustmentIntervalDuration time.Duration 42 | 43 | // Weight is a value between (0, 1) indicating the weighting factor used to adjust 44 | // the EMA. With larger values, newer data will influence the average more, and older 45 | // values will be factored out more quickly. In mathematical literature concerning EMA, 46 | // this is referred to as the `alpha` constant. 47 | // Default is 0.5 48 | Weight float64 49 | 50 | // GoalSampleRate is the average sample rate we're aiming for, across all 51 | // events. Default 10 52 | GoalSampleRate int 53 | 54 | // MaxKeys, if greater than 0, limits the number of distinct keys tracked in EMA. 55 | // Once MaxKeys is reached, new keys will not be included in the sample rate map, but 56 | // existing keys will continue to be be counted. 57 | MaxKeys int 58 | 59 | // AgeOutValue indicates the threshold for removing keys from the EMA. The EMA of any key will approach 0 60 | // if it is not repeatedly observed, but will never truly reach it, so we have to decide what constitutes "zero". 61 | // Keys with averages below this threshold will be removed from the EMA. Default is the same as Weight, as this prevents 62 | // a key with the smallest integer value (1) from being aged out immediately. This value should generally be <= Weight, 63 | // unless you have very specific reasons to set it higher. 64 | AgeOutValue float64 65 | 66 | // BurstMultiple, if set, is multiplied by the sum of the running average of counts to define 67 | // the burst detection threshold. If total counts observed for a given interval exceed the threshold 68 | // EMA is updated immediately, rather than waiting on the AdjustmentIntervalDuration. 69 | // Defaults to 2; negative value disables. With a default of 2, if your traffic suddenly doubles, 70 | // burst detection will kick in. 71 | BurstMultiple float64 72 | 73 | // BurstDetectionDelay indicates the number of intervals to run after Start is called before burst detection kicks in. 74 | // Defaults to 3 75 | BurstDetectionDelay uint 76 | 77 | savedSampleRates map[string]int 78 | currentCounts map[string]float64 79 | movingAverage map[string]float64 80 | burstThreshold float64 81 | currentBurstSum float64 82 | intervalCount uint 83 | burstSignal chan struct{} 84 | 85 | // haveData indicates that we have gotten a sample of traffic. Before we've 86 | // gotten any samples of traffic, we should we should use the default goal 87 | // sample rate for all events instead of sampling everything at 1 88 | haveData bool 89 | updating bool 90 | done chan struct{} 91 | 92 | lock sync.Mutex 93 | 94 | // used only in tests 95 | testSignalMapsDone chan struct{} 96 | 97 | // metrics 98 | requestCount int64 99 | eventCount int64 100 | burstCount int64 101 | } 102 | 103 | // Ensure we implement the sampler interface 104 | var _ Sampler = (*EMASampleRate)(nil) 105 | 106 | func (e *EMASampleRate) Start() error { 107 | // apply defaults 108 | if e.AdjustmentIntervalDuration != 0 && e.AdjustmentInterval != 0 { 109 | return fmt.Errorf("the AdjustmentInterval configuration value is deprecated; use only AdjustmentIntervalDuration") 110 | } 111 | 112 | if e.AdjustmentIntervalDuration == 0 && e.AdjustmentInterval == 0 { 113 | e.AdjustmentIntervalDuration = 15 * time.Second 114 | } else if e.AdjustmentInterval != 0 { 115 | e.AdjustmentIntervalDuration = time.Duration(e.AdjustmentInterval) * time.Second 116 | } 117 | 118 | if e.GoalSampleRate == 0 { 119 | e.GoalSampleRate = 10 120 | } 121 | if e.Weight == 0 { 122 | e.Weight = 0.5 123 | } 124 | if e.AgeOutValue == 0 { 125 | e.AgeOutValue = e.Weight 126 | } 127 | if e.BurstMultiple == 0 { 128 | e.BurstMultiple = 2 129 | } 130 | if e.BurstDetectionDelay == 0 { 131 | e.BurstDetectionDelay = 3 132 | } 133 | 134 | // Don't override these maps at startup in case they were loaded from a previous state 135 | e.currentCounts = make(map[string]float64) 136 | if e.savedSampleRates == nil { 137 | e.savedSampleRates = make(map[string]int) 138 | } 139 | if e.movingAverage == nil { 140 | e.movingAverage = make(map[string]float64) 141 | } 142 | e.burstSignal = make(chan struct{}) 143 | e.done = make(chan struct{}) 144 | 145 | go func() { 146 | ticker := time.NewTicker(e.AdjustmentIntervalDuration) 147 | defer ticker.Stop() 148 | for { 149 | select { 150 | case <-e.burstSignal: 151 | // reset ticker when we get a burst 152 | ticker.Stop() 153 | ticker = time.NewTicker(e.AdjustmentIntervalDuration) 154 | e.updateMaps() 155 | case <-ticker.C: 156 | e.updateMaps() 157 | e.intervalCount++ 158 | case <-e.done: 159 | return 160 | } 161 | } 162 | }() 163 | return nil 164 | } 165 | 166 | func (e *EMASampleRate) Stop() error { 167 | close(e.done) 168 | return nil 169 | } 170 | 171 | // updateMaps calculates a new saved rate map based on the contents of the 172 | // counter map 173 | func (e *EMASampleRate) updateMaps() { 174 | e.lock.Lock() 175 | if e.testSignalMapsDone != nil { 176 | defer func() { 177 | e.testSignalMapsDone <- struct{}{} 178 | }() 179 | } 180 | // short circuit if no traffic 181 | if len(e.currentCounts) == 0 { 182 | // No traffic the last interval, don't update anything. This is deliberate to avoid 183 | // the average decaying when there's no traffic (comes in bursts, or there's some kind of outage). 184 | e.lock.Unlock() 185 | return 186 | } 187 | // If there is another updateMaps going, bail 188 | if e.updating { 189 | e.lock.Unlock() 190 | return 191 | } 192 | e.updating = true 193 | // make a local copy of the sample counters for calculation 194 | tmpCounts := e.currentCounts 195 | e.currentCounts = make(map[string]float64) 196 | e.currentBurstSum = 0 197 | e.lock.Unlock() 198 | 199 | e.updateEMA(tmpCounts) 200 | 201 | // Goal events to send this interval is the total count of events in the EMA 202 | // divided by the desired average sample rate 203 | var sumEvents float64 204 | for _, count := range e.movingAverage { 205 | sumEvents += math.Max(1, count) 206 | } 207 | 208 | // Store this for burst detection. This is checked in GetSampleRate 209 | // so we need to grab the lock when we update it. 210 | e.lock.Lock() 211 | e.burstThreshold = sumEvents * e.BurstMultiple 212 | e.lock.Unlock() 213 | 214 | goalCount := float64(sumEvents) / float64(e.GoalSampleRate) 215 | // goalRatio is the goalCount divided by the sum of all the log values - it 216 | // determines what percentage of the total event space belongs to each key 217 | var logSum float64 218 | for _, count := range e.movingAverage { 219 | // We take the max of (1, count) because count * weight is < 1 for 220 | // very small counts, which throws off the logSum and can cause 221 | // incorrect samples rates to be computed when throughput is low 222 | logSum += math.Log10(math.Max(1, count)) 223 | } 224 | goalRatio := goalCount / logSum 225 | 226 | newSavedSampleRates := calculateSampleRates(goalRatio, e.movingAverage) 227 | e.lock.Lock() 228 | defer e.lock.Unlock() 229 | e.savedSampleRates = newSavedSampleRates 230 | e.haveData = true 231 | e.updating = false 232 | } 233 | 234 | // GetSampleRate takes a key and returns the appropriate sample rate for that 235 | // key. 236 | func (e *EMASampleRate) GetSampleRate(key string) int { 237 | return e.GetSampleRateMulti(key, 1) 238 | } 239 | 240 | // GetSampleRateMulti takes a key representing count spans and returns the 241 | // appropriate sample rate for that key. 242 | func (e *EMASampleRate) GetSampleRateMulti(key string, count int) int { 243 | e.lock.Lock() 244 | defer e.lock.Unlock() 245 | 246 | e.requestCount++ 247 | e.eventCount += int64(count) 248 | 249 | // Enforce MaxKeys limit on the size of the map 250 | if e.MaxKeys > 0 { 251 | // If a key already exists, increment it. If not, but we're under the limit, store a new key 252 | if _, found := e.currentCounts[key]; found || len(e.currentCounts) < e.MaxKeys { 253 | e.currentCounts[key] += float64(count) 254 | e.currentBurstSum += float64(count) 255 | } 256 | } else { 257 | e.currentCounts[key] += float64(count) 258 | e.currentBurstSum += float64(count) 259 | } 260 | 261 | // Enforce the burst threshold 262 | if e.burstThreshold > 0 && e.currentBurstSum >= e.burstThreshold && e.intervalCount >= e.BurstDetectionDelay { 263 | // reset the burst sum to prevent additional burst updates from occurring while updateMaps is running 264 | e.currentBurstSum = 0 265 | e.burstCount++ 266 | // send but don't block - consuming is blocked on updateMaps, which takes the same lock we're holding 267 | select { 268 | case e.burstSignal <- struct{}{}: 269 | default: 270 | } 271 | } 272 | 273 | if !e.haveData { 274 | return e.GoalSampleRate 275 | } 276 | if rate, found := e.savedSampleRates[key]; found { 277 | return rate 278 | } 279 | return 1 280 | } 281 | 282 | func (e *EMASampleRate) updateEMA(newCounts map[string]float64) { 283 | keysToUpdate := make([]string, 0, len(e.movingAverage)) 284 | for key := range e.movingAverage { 285 | keysToUpdate = append(keysToUpdate, key) 286 | } 287 | 288 | // Update any existing keys with new values 289 | for _, key := range keysToUpdate { 290 | var newAvg float64 291 | // Was this key seen in the last interval? Adjust by that amount 292 | if val, found := newCounts[key]; found { 293 | newAvg = adjustAverage(e.movingAverage[key], val, e.Weight) 294 | } else { 295 | // Otherwise adjust by zero 296 | newAvg = adjustAverage(e.movingAverage[key], 0, e.Weight) 297 | } 298 | 299 | // Age out this value if it's too small to care about for calculating sample rates 300 | // This is also necessary to keep our map from going forever. 301 | if newAvg < e.AgeOutValue { 302 | delete(e.movingAverage, key) 303 | } else { 304 | e.movingAverage[key] = newAvg 305 | } 306 | // We've processed this key - don't process it again when we look at new counts 307 | delete(newCounts, key) 308 | } 309 | 310 | for key := range newCounts { 311 | newAvg := adjustAverage(0, newCounts[key], e.Weight) 312 | if newAvg >= e.AgeOutValue { 313 | e.movingAverage[key] = newAvg 314 | } 315 | } 316 | } 317 | 318 | type emaSampleRateState struct { 319 | // These fields are exported for use by `JSON.Marshal` and `JSON.Unmarshal` 320 | SavedSampleRates map[string]int `json:"saved_sample_rates"` 321 | MovingAverage map[string]float64 `json:"moving_average"` 322 | } 323 | 324 | // SaveState returns a byte array with a JSON representation of the sampler state 325 | func (e *EMASampleRate) SaveState() ([]byte, error) { 326 | e.lock.Lock() 327 | defer e.lock.Unlock() 328 | 329 | if e.savedSampleRates == nil { 330 | return nil, errors.New("saved sample rate map is nil") 331 | } 332 | if e.movingAverage == nil { 333 | return nil, errors.New("moving average map is nil") 334 | } 335 | s := &emaSampleRateState{SavedSampleRates: e.savedSampleRates, MovingAverage: e.movingAverage} 336 | return json.Marshal(s) 337 | } 338 | 339 | // LoadState accepts a byte array with a JSON representation of a previous instance's 340 | // state 341 | func (e *EMASampleRate) LoadState(state []byte) error { 342 | e.lock.Lock() 343 | defer e.lock.Unlock() 344 | 345 | s := emaSampleRateState{} 346 | err := json.Unmarshal(state, &s) 347 | if err != nil { 348 | return err 349 | } 350 | 351 | // Load the previously calculated sample rates 352 | e.savedSampleRates = s.SavedSampleRates 353 | e.movingAverage = s.MovingAverage 354 | // Allow GetSampleRate to return calculated sample rates from the loaded map 355 | e.haveData = true 356 | 357 | return nil 358 | } 359 | 360 | func (e *EMASampleRate) GetMetrics(prefix string) map[string]int64 { 361 | e.lock.Lock() 362 | defer e.lock.Unlock() 363 | mets := map[string]int64{ 364 | prefix + "request_count": e.requestCount, 365 | prefix + "event_count": e.eventCount, 366 | prefix + "burst_count": e.burstCount, 367 | prefix + "interval_count": int64(e.intervalCount), 368 | prefix + "keyspace_size": int64(len(e.currentCounts)), 369 | } 370 | return mets 371 | } 372 | 373 | func adjustAverage(oldAvg, value float64, alpha float64) float64 { 374 | adjustedNewVal := value * alpha 375 | adjustedOldAvg := (1.0 - alpha) * oldAvg 376 | 377 | return adjustedNewVal + adjustedOldAvg 378 | } 379 | -------------------------------------------------------------------------------- /emasamplerate_test.go: -------------------------------------------------------------------------------- 1 | package dynsampler 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | mrand "math/rand" 7 | "testing" 8 | "time" 9 | 10 | "github.com/stretchr/testify/assert" 11 | ) 12 | 13 | func TestUpdateEMA(t *testing.T) { 14 | e := &EMASampleRate{ 15 | movingAverage: make(map[string]float64), 16 | Weight: 0.2, 17 | AgeOutValue: 0.2, 18 | } 19 | 20 | tests := []struct { 21 | keyAValue float64 22 | keyAExpected float64 23 | keyBValue float64 24 | keyBExpected float64 25 | keyCValue float64 26 | keyCExpected float64 27 | }{ 28 | {463, 93, 235, 47, 0, 0}, 29 | {176, 109, 458, 129, 0, 0}, 30 | {345, 156, 470, 197, 0, 0}, 31 | {339, 193, 317, 221, 0, 0}, 32 | {197, 194, 165, 210, 0, 0}, 33 | {387, 232, 95, 187, 6960, 1392}, 34 | } 35 | 36 | for _, tt := range tests { 37 | counts := make(map[string]float64) 38 | counts["a"] = tt.keyAValue 39 | counts["b"] = tt.keyBValue 40 | counts["c"] = tt.keyCValue 41 | e.updateEMA(counts) 42 | assert.Equal(t, tt.keyAExpected, math.Round(e.movingAverage["a"])) 43 | assert.Equal(t, tt.keyBExpected, math.Round(e.movingAverage["b"])) 44 | assert.Equal(t, tt.keyCExpected, math.Round(e.movingAverage["c"])) 45 | } 46 | } 47 | 48 | func TestEMASampleGetSampleRateStartup(t *testing.T) { 49 | e := &EMASampleRate{ 50 | GoalSampleRate: 10, 51 | currentCounts: map[string]float64{}, 52 | } 53 | rate := e.GetSampleRate("key") 54 | assert.Equal(t, rate, 10) 55 | assert.Equal(t, e.currentCounts["key"], float64(1)) 56 | } 57 | 58 | func TestEMASampleUpdateMaps(t *testing.T) { 59 | e := &EMASampleRate{ 60 | GoalSampleRate: 20, 61 | Weight: 0.2, 62 | AgeOutValue: 0.2, 63 | } 64 | tsts := []struct { 65 | inputSampleCount map[string]float64 66 | expectedSavedSampleRates map[string]int 67 | }{ 68 | { 69 | map[string]float64{ 70 | "one": 1, 71 | "two": 1, 72 | "three": 2, 73 | "four": 5, 74 | "five": 8, 75 | "six": 15, 76 | "seven": 45, 77 | "eight": 612, 78 | "nine": 2000, 79 | "ten": 10000, 80 | }, 81 | map[string]int{ 82 | "one": 1, 83 | "two": 1, 84 | "three": 1, 85 | "four": 1, 86 | "five": 1, 87 | "six": 1, 88 | "seven": 1, 89 | "eight": 6, 90 | "nine": 14, 91 | "ten": 47, 92 | }, 93 | }, 94 | { 95 | map[string]float64{ 96 | "one": 1, 97 | "two": 1, 98 | "three": 2, 99 | "four": 5, 100 | "five": 8, 101 | "six": 15, 102 | "seven": 45, 103 | "eight": 50, 104 | "nine": 60, 105 | }, 106 | map[string]int{ 107 | "one": 1, 108 | "two": 1, 109 | "three": 2, 110 | "four": 5, 111 | "five": 8, 112 | "six": 11, 113 | "seven": 24, 114 | "eight": 26, 115 | "nine": 30, 116 | }, 117 | }, 118 | { 119 | map[string]float64{ 120 | "one": 1, 121 | "two": 1, 122 | "three": 2, 123 | "four": 5, 124 | "five": 7, 125 | }, 126 | map[string]int{ 127 | "one": 1, 128 | "two": 1, 129 | "three": 2, 130 | "four": 5, 131 | "five": 7, 132 | }, 133 | }, 134 | { 135 | map[string]float64{ 136 | "one": 1000, 137 | "two": 1000, 138 | "three": 2000, 139 | "four": 5000, 140 | "five": 7000, 141 | }, 142 | map[string]int{ 143 | "one": 7, 144 | "two": 7, 145 | "three": 13, 146 | "four": 29, 147 | "five": 39, 148 | }, 149 | }, 150 | { 151 | map[string]float64{ 152 | "one": 6000, 153 | "two": 6000, 154 | "three": 6000, 155 | "four": 6000, 156 | "five": 6000, 157 | }, 158 | map[string]int{ 159 | "one": 20, 160 | "two": 20, 161 | "three": 20, 162 | "four": 20, 163 | "five": 20, 164 | }, 165 | }, 166 | { 167 | map[string]float64{ 168 | "one": 12000, 169 | }, 170 | map[string]int{ 171 | "one": 20, 172 | }, 173 | }, 174 | { 175 | map[string]float64{}, 176 | map[string]int{}, 177 | }, 178 | { 179 | map[string]float64{ 180 | "one": 10, 181 | "two": 1, 182 | "three": 1, 183 | "four": 1, 184 | "five": 1, 185 | "six": 1, 186 | "seven": 1, 187 | "eight": 1, 188 | "nine": 1, 189 | "ten": 1, 190 | "eleven": 1, 191 | "twelve": 1, 192 | "thirteen": 1, 193 | "fourteen": 1, 194 | "fifteen": 1, 195 | "sixteen": 1, 196 | "seventeen": 1, 197 | "eighteen": 1, 198 | "nineteen": 1, 199 | "twenty": 1, 200 | }, 201 | map[string]int{ 202 | "one": 7, 203 | "two": 1, 204 | "three": 1, 205 | "four": 1, 206 | "five": 1, 207 | "six": 1, 208 | "seven": 1, 209 | "eight": 1, 210 | "nine": 1, 211 | "ten": 1, 212 | "eleven": 1, 213 | "twelve": 1, 214 | "thirteen": 1, 215 | "fourteen": 1, 216 | "fifteen": 1, 217 | "sixteen": 1, 218 | "seventeen": 1, 219 | "eighteen": 1, 220 | "nineteen": 1, 221 | "twenty": 1, 222 | }, 223 | }, 224 | } 225 | for i, tst := range tsts { 226 | e.movingAverage = make(map[string]float64) 227 | e.savedSampleRates = make(map[string]int) 228 | 229 | // Test data is based on `TestAvgSampleUpdateMaps` for AvgSampleRate. 230 | // To get the same sample rates though, we must reach averages that match 231 | // the inputs - for the EMA, the way to do this is to just apply the same 232 | // input values over and over and converge on that average 233 | for i := 0; i <= 100; i++ { 234 | input := make(map[string]float64) 235 | for k, v := range tst.inputSampleCount { 236 | input[k] = v 237 | } 238 | e.currentCounts = input 239 | e.updateMaps() 240 | } 241 | assert.Equal(t, 0, len(e.currentCounts)) 242 | assert.Equal(t, tst.expectedSavedSampleRates, e.savedSampleRates, fmt.Sprintf("test %d failed", i)) 243 | } 244 | } 245 | 246 | func TestEMASampleUpdateMapsSparseCounts(t *testing.T) { 247 | e := &EMASampleRate{ 248 | GoalSampleRate: 20, 249 | Weight: 0.2, 250 | AgeOutValue: 0.2, 251 | } 252 | 253 | e.movingAverage = make(map[string]float64) 254 | e.savedSampleRates = make(map[string]int) 255 | 256 | for i := 0; i <= 100; i++ { 257 | input := make(map[string]float64) 258 | // simulate steady stream of input from one key 259 | input["largest_count"] = 20 260 | // sporadic keys with single counts that come and go with each interval 261 | for j := 0; j < 5; j++ { 262 | key := randomString(8) 263 | input[key] = 1 264 | } 265 | e.currentCounts = input 266 | e.updateMaps() 267 | } 268 | assert.Equal(t, 16, e.savedSampleRates["largest_count"]) 269 | } 270 | 271 | func TestEMAAgesOutSmallValues(t *testing.T) { 272 | e := &EMASampleRate{ 273 | GoalSampleRate: 20, 274 | Weight: 0.2, 275 | AgeOutValue: 0.2, 276 | } 277 | e.movingAverage = make(map[string]float64) 278 | for i := 0; i < 100; i++ { 279 | e.currentCounts = map[string]float64{"foo": 500.0} 280 | e.updateMaps() 281 | } 282 | assert.Equal(t, 1, len(e.movingAverage)) 283 | assert.Equal(t, float64(500), math.Round(e.movingAverage["foo"])) 284 | for i := 0; i < 100; i++ { 285 | // "observe" no occurrences of foo for many iterations 286 | e.currentCounts = map[string]float64{"asdf": 1} 287 | e.updateMaps() 288 | } 289 | _, found := e.movingAverage["foo"] 290 | assert.Equal(t, false, found) 291 | _, found = e.movingAverage["asdf"] 292 | assert.Equal(t, true, found) 293 | } 294 | 295 | func TestEMABurstDetection(t *testing.T) { 296 | // Set the adjustment interval very high so that we never run the regular interval 297 | e := &EMASampleRate{AdjustmentIntervalDuration: 1 * time.Hour} 298 | err := e.Start() 299 | assert.Nil(t, err) 300 | 301 | // set some counts and compute the EMA 302 | e.currentCounts = map[string]float64{"foo": 1000} 303 | e.updateMaps() 304 | // should have a burst threshold computed now from this average 305 | // 1000 = 0.5 (weight) * 1000 * 2 (threshold multiplier) 306 | assert.Equal(t, float64(1000), e.burstThreshold) 307 | 308 | // Let's try and trigger a burst: 309 | for i := 0; i <= 1000; i++ { 310 | e.GetSampleRate("bar") 311 | } 312 | // burst sum isn't reset even though we're above our burst threshold 313 | // This is because we haven't processed enough intervals to do burst detection yet 314 | assert.Equal(t, float64(1001), e.currentBurstSum) 315 | // Now let's cheat and say we have 316 | e.intervalCount = e.BurstDetectionDelay 317 | e.testSignalMapsDone = make(chan struct{}) 318 | e.GetSampleRate("bar") 319 | // wait on updateMaps to complete 320 | <-e.testSignalMapsDone 321 | // currentBurstSum has been reset 322 | assert.Equal(t, float64(0), e.currentBurstSum) 323 | 324 | // ensure EMA is updated 325 | assert.Equal(t, float64(501), e.movingAverage["bar"]) 326 | } 327 | 328 | func TestEMAUpdateMapsRace(t *testing.T) { 329 | e := &EMASampleRate{AdjustmentIntervalDuration: 1 * time.Hour} 330 | e.testSignalMapsDone = make(chan struct{}, 1000) 331 | err := e.Start() 332 | assert.Nil(t, err) 333 | for i := 0; i < 1000; i++ { 334 | e.GetSampleRate("foo") 335 | go e.updateMaps() 336 | } 337 | done := 0 338 | for done != 1000 { 339 | <-e.testSignalMapsDone 340 | done++ 341 | } 342 | } 343 | 344 | func TestEMASampleRateSaveState(t *testing.T) { 345 | var sampler Sampler 346 | esr := &EMASampleRate{} 347 | // ensure the interface is implemented 348 | sampler = esr 349 | err := sampler.Start() 350 | assert.Nil(t, err) 351 | 352 | esr.lock.Lock() 353 | esr.savedSampleRates = map[string]int{"foo": 2, "bar": 4} 354 | esr.movingAverage = map[string]float64{"foo": 500.1234, "bar": 9999.99} 355 | esr.haveData = true 356 | esr.lock.Unlock() 357 | 358 | assert.Equal(t, 2, sampler.GetSampleRate("foo")) 359 | assert.Equal(t, 4, sampler.GetSampleRate("bar")) 360 | 361 | state, err := sampler.SaveState() 362 | assert.Nil(t, err) 363 | 364 | var newSampler Sampler 365 | esr2 := &EMASampleRate{} 366 | newSampler = esr2 367 | 368 | err = newSampler.LoadState(state) 369 | assert.Nil(t, err) 370 | err = newSampler.Start() 371 | assert.Nil(t, err) 372 | 373 | assert.Equal(t, 2, newSampler.GetSampleRate("foo")) 374 | assert.Equal(t, 4, newSampler.GetSampleRate("bar")) 375 | esr2.lock.Lock() 376 | defer esr2.lock.Unlock() 377 | assert.Equal(t, float64(500.1234), esr2.movingAverage["foo"]) 378 | assert.Equal(t, float64(9999.99), esr2.movingAverage["bar"]) 379 | } 380 | 381 | // This is a long test because we generate a lot of random data and run it through the sampler 382 | // The goal is to determine if we actually hit the specified target rate (within a tolerance) an acceptable 383 | // number of times. Most of the time, the average sample rate of observations kept should be close 384 | // to the target rate 385 | func TestEMASampleRateHitsTargetRate(t *testing.T) { 386 | mrand.Seed(time.Now().Unix()) 387 | testRates := []int{50, 100} 388 | testKeyCount := []int{10, 100} 389 | tolerancePct := float64(0.2) 390 | 391 | for _, rate := range testRates { 392 | tolerance := float64(rate) * tolerancePct 393 | toleranceUpper := float64(rate) + tolerance 394 | toleranceLower := float64(rate) - tolerance 395 | 396 | for _, keyCount := range testKeyCount { 397 | sampler := &EMASampleRate{GoalSampleRate: rate, Weight: 0.5, AgeOutValue: 0.5, currentCounts: make(map[string]float64), movingAverage: make(map[string]float64)} 398 | 399 | // build a consistent set of keys to use 400 | keys := make([]string, keyCount) 401 | for i := 0; i < keyCount; i++ { 402 | keys[i] = randomString(8) 403 | } 404 | 405 | for i, key := range keys { 406 | // generate key counts of different magnitudes 407 | base := math.Pow10(i%3 + 1) 408 | count := float64(((i%10)+1))*base + float64(mrand.Intn(int(base))) 409 | sampler.currentCounts[key] = count 410 | } 411 | 412 | // build an initial set of sample rates so we don't just return the target rate 413 | sampler.updateMaps() 414 | 415 | var success float64 416 | 417 | for i := 0; i < 100; i++ { 418 | totalSampleRate := 0 419 | totalKeptObservations := 0 420 | for j, key := range keys { 421 | base := math.Pow10(j%3 + 1) 422 | count := float64(((j%10)+1))*base + float64(mrand.Intn(int(base))) 423 | for k := 0; k < int(count); k++ { 424 | rate := sampler.GetSampleRate(key) 425 | if mrand.Intn(rate) == 0 { 426 | totalSampleRate += rate 427 | totalKeptObservations++ 428 | } 429 | } 430 | } 431 | 432 | avgSampleRate := float64(totalSampleRate) / float64(totalKeptObservations) 433 | if avgSampleRate <= toleranceUpper && avgSampleRate >= toleranceLower { 434 | success++ 435 | } 436 | sampler.updateMaps() 437 | } 438 | 439 | assert.True(t, success/100.0 >= 0.95, "target rate test %d with key count %d failed with success rate of only %f", rate, keyCount, success/100.0) 440 | } 441 | } 442 | } 443 | 444 | // This is very like the above test, but it uses GetSampleRateMulti with a random value between 1 and 100. 445 | func TestEMASampleRateMultiHitsTargetRate(t *testing.T) { 446 | mrand.Seed(time.Now().Unix()) 447 | testRates := []int{50, 100} 448 | testKeyCount := []int{10, 50} 449 | tolerancePct := float64(0.2) 450 | 451 | for _, rate := range testRates { 452 | tolerance := float64(rate) * tolerancePct 453 | toleranceUpper := float64(rate) + tolerance 454 | toleranceLower := float64(rate) - tolerance 455 | 456 | for _, keyCount := range testKeyCount { 457 | sampler := &EMASampleRate{GoalSampleRate: rate, Weight: 0.5, AgeOutValue: 0.5, currentCounts: make(map[string]float64), movingAverage: make(map[string]float64)} 458 | 459 | // build a consistent set of keys to use 460 | keys := make([]string, keyCount) 461 | for i := 0; i < keyCount; i++ { 462 | keys[i] = randomString(8) 463 | } 464 | 465 | for i, key := range keys { 466 | // generate key counts of different magnitudes 467 | base := math.Pow10(i%3 + 1) 468 | count := float64(((i%10)+1))*base + float64(mrand.Intn(int(base))) 469 | sampler.currentCounts[key] = count 470 | } 471 | 472 | // build an initial set of sample rates so we don't just return the target rate 473 | sampler.updateMaps() 474 | 475 | var success float64 476 | 477 | for i := 0; i < 100; i++ { 478 | totalSampleRate := 0 479 | totalKeptObservations := 0 480 | for j, key := range keys { 481 | base := math.Pow10(j%3 + 1) 482 | count := float64(((j%10)+1))*base + float64(mrand.Intn(int(base))) 483 | for k := 0; k < int(count); k++ { 484 | rate := sampler.GetSampleRateMulti(key, mrand.Intn(100)) 485 | if mrand.Intn(rate) == 0 { 486 | totalSampleRate += rate 487 | totalKeptObservations++ 488 | } 489 | } 490 | } 491 | 492 | avgSampleRate := float64(totalSampleRate) / float64(totalKeptObservations) 493 | if avgSampleRate <= toleranceUpper && avgSampleRate >= toleranceLower { 494 | success++ 495 | } 496 | sampler.updateMaps() 497 | } 498 | 499 | assert.True(t, success/100.0 >= 0.95, "target rate test %d with key count %d failed with success rate of only %f", rate, keyCount, success/100.0) 500 | } 501 | } 502 | } 503 | 504 | func TestEMASampleRate_Start(t *testing.T) { 505 | tests := []struct { 506 | name string 507 | AdjustmentInterval int 508 | AdjustmentIntervalDuration time.Duration 509 | wantDuration time.Duration 510 | wantErr bool 511 | }{ 512 | {"sec only", 2, 0, 2 * time.Second, false}, 513 | {"dur only", 0, 1003 * time.Millisecond, 1003 * time.Millisecond, false}, 514 | {"default", 0, 0, 15 * time.Second, false}, 515 | {"both", 2, 2 * time.Second, 0, true}, 516 | } 517 | for _, tt := range tests { 518 | t.Run(tt.name, func(t *testing.T) { 519 | a := &EMASampleRate{ 520 | AdjustmentInterval: tt.AdjustmentInterval, 521 | AdjustmentIntervalDuration: tt.AdjustmentIntervalDuration, 522 | } 523 | err := a.Start() 524 | if (err != nil) != tt.wantErr { 525 | t.Errorf("EMASampleRate error = %v, wantErr %v", err, tt.wantErr) 526 | } 527 | if err == nil { 528 | defer a.Stop() 529 | if tt.wantDuration != a.AdjustmentIntervalDuration { 530 | t.Errorf("EMASampleRate duration mismatch = want %v, got %v", tt.wantDuration, a.AdjustmentIntervalDuration) 531 | } 532 | } 533 | }) 534 | } 535 | } 536 | -------------------------------------------------------------------------------- /emathroughput.go: -------------------------------------------------------------------------------- 1 | package dynsampler 2 | 3 | import ( 4 | "encoding/json" 5 | "errors" 6 | "fmt" 7 | "math" 8 | "sync" 9 | "time" 10 | ) 11 | 12 | // EMAThroughput implements Sampler and attempts to achieve a given throughput 13 | // rate, weighting rare traffic and frequent traffic differently so as to end up 14 | // with the the desired throughput. 15 | // 16 | // Based on the EMASampleRate implementation, EMAThroughput differs in that 17 | // instead of trying to achieve a given sample rate, it tries to reach a given 18 | // throughput of events. During bursts of traffic, it will reduce sample 19 | // rates so as to keep the number of events per second roughly constant. 20 | // 21 | // Like the EMA sampler, it maintains an Exponential Moving Average of counts 22 | // seen per key, and adjusts this average at regular intervals. The weight 23 | // applied to more recent intervals is defined by `weight`, a number between (0, 24 | // 1) - larger values weight the average more toward recent observations. In 25 | // other words, a larger weight will cause sample rates to more quickly adapt to 26 | // traffic patterns, while a smaller weight will result in sample rates that are 27 | // less sensitive to bursts or drops in traffic and thus more consistent over 28 | // time. 29 | // 30 | // New keys that are not found in the EMA will always have a sample 31 | // rate of 1. Keys that occur more frequently will be sampled on a logarithmic 32 | // curve. In other words, every key will be represented at least once in any 33 | // given window and more frequent keys will have their sample rate 34 | // increased proportionally to wind up with the goal throughput. 35 | type EMAThroughput struct { 36 | // AdjustmentInterval defines how often we adjust the moving average from 37 | // recent observations. Default 15s. 38 | AdjustmentInterval time.Duration 39 | 40 | // Weight is a value between (0, 1) indicating the weighting factor used to adjust 41 | // the EMA. With larger values, newer data will influence the average more, and older 42 | // values will be factored out more quickly. In mathematical literature concerning EMA, 43 | // this is referred to as the `alpha` constant. 44 | // Default is 0.5 45 | Weight float64 46 | 47 | // InitialSampleRate is the sample rate to use during startup, before we 48 | // have accumulated enough data to calculate a reasonable desired sample 49 | // rate. This is mainly useful in situations where unsampled throughput is 50 | // high enough to cause problems. 51 | // Default 10. 52 | InitialSampleRate int 53 | 54 | // GoalThroughputPerSec is the target number of events to send per second. 55 | // Sample rates are generated to squash the total throughput down to match the 56 | // goal throughput. Actual throughput may exceed goal throughput. default 100 57 | GoalThroughputPerSec int 58 | 59 | // MaxKeys, if greater than 0, limits the number of distinct keys tracked in EMA. 60 | // Once MaxKeys is reached, new keys will not be included in the sample rate map, but 61 | // existing keys will continue to be be counted. 62 | // Defaults to 0 63 | MaxKeys int 64 | 65 | // AgeOutValue indicates the threshold for removing keys from the EMA. The EMA of any key will approach 0 66 | // if it is not repeatedly observed, but will never truly reach it, so we have to decide what constitutes "zero". 67 | // Keys with averages below this threshold will be removed from the EMA. Default is the same as Weight, as this prevents 68 | // a key with the smallest integer value (1) from being aged out immediately. This value should generally be <= Weight, 69 | // unless you have very specific reasons to set it higher. 70 | AgeOutValue float64 71 | 72 | // BurstMultiple, if set, is multiplied by the sum of the running average of counts to define 73 | // the burst detection threshold. If total counts observed for a given interval exceed the threshold 74 | // EMA is updated immediately, rather than waiting on the AdjustmentInterval. 75 | // Defaults to 2; negative value disables. With a default of 2, if your traffic suddenly doubles, 76 | // burst detection will kick in. 77 | BurstMultiple float64 78 | 79 | // BurstDetectionDelay indicates the number of intervals to run after Start is called before burst detection kicks in. 80 | // Defaults to 3 81 | BurstDetectionDelay uint 82 | 83 | savedSampleRates map[string]int 84 | currentCounts map[string]float64 85 | movingAverage map[string]float64 86 | burstThreshold float64 87 | currentBurstSum float64 88 | intervalCount uint 89 | burstSignal chan struct{} 90 | 91 | // haveData indicates that we have gotten a sample of traffic. Before we've 92 | // gotten any samples of traffic, we should use the default goal sample rate 93 | // for all events instead of sampling everything at 1 94 | haveData bool 95 | updating bool 96 | done chan struct{} 97 | 98 | lock sync.Mutex 99 | 100 | // used only in tests 101 | testSignalMapsDone chan struct{} 102 | 103 | // metrics 104 | requestCount int64 105 | eventCount int64 106 | burstCount int64 107 | } 108 | 109 | // Ensure we implement the sampler interface 110 | var _ Sampler = (*EMAThroughput)(nil) 111 | 112 | func (e *EMAThroughput) Start() error { 113 | // apply defaults 114 | if e.AdjustmentInterval == 0 { 115 | e.AdjustmentInterval = 15 * time.Second 116 | } 117 | if e.AdjustmentInterval < 1*time.Millisecond { 118 | return fmt.Errorf("the AdjustmentInterval %v is unreasonably short for a throughput sampler", e.AdjustmentInterval) 119 | } 120 | if e.InitialSampleRate == 0 { 121 | e.InitialSampleRate = 10 122 | } 123 | if e.GoalThroughputPerSec == 0 { 124 | e.GoalThroughputPerSec = 100 125 | } 126 | if e.Weight == 0 { 127 | e.Weight = 0.5 128 | } 129 | if e.AgeOutValue == 0 { 130 | e.AgeOutValue = e.Weight 131 | } 132 | if e.BurstMultiple == 0 { 133 | e.BurstMultiple = 2 134 | } 135 | if e.BurstDetectionDelay == 0 { 136 | e.BurstDetectionDelay = 3 137 | } 138 | 139 | // Don't override these maps at startup in case they were loaded from a previous state 140 | e.currentCounts = make(map[string]float64) 141 | if e.savedSampleRates == nil { 142 | e.savedSampleRates = make(map[string]int) 143 | } 144 | if e.movingAverage == nil { 145 | e.movingAverage = make(map[string]float64) 146 | } 147 | e.burstSignal = make(chan struct{}) 148 | e.done = make(chan struct{}) 149 | 150 | go func() { 151 | ticker := time.NewTicker(e.AdjustmentInterval) 152 | defer ticker.Stop() 153 | for { 154 | select { 155 | case <-e.burstSignal: 156 | // reset ticker when we get a burst 157 | ticker.Stop() 158 | ticker = time.NewTicker(e.AdjustmentInterval) 159 | e.updateMaps() 160 | case <-ticker.C: 161 | e.updateMaps() 162 | e.intervalCount++ 163 | case <-e.done: 164 | return 165 | } 166 | } 167 | }() 168 | return nil 169 | } 170 | 171 | func (e *EMAThroughput) Stop() error { 172 | close(e.done) 173 | return nil 174 | } 175 | 176 | // updateMaps calculates a new saved rate map based on the contents of the 177 | // counter map 178 | func (e *EMAThroughput) updateMaps() { 179 | e.lock.Lock() 180 | if e.testSignalMapsDone != nil { 181 | defer func() { 182 | e.testSignalMapsDone <- struct{}{} 183 | }() 184 | } 185 | // short circuit if no traffic 186 | if len(e.currentCounts) == 0 { 187 | // No traffic the last interval, don't update anything. This is deliberate to avoid 188 | // the average decaying when there's no traffic (comes in bursts, or there's some kind of outage). 189 | e.lock.Unlock() 190 | return 191 | } 192 | // If there is another updateMaps going, bail 193 | if e.updating { 194 | e.lock.Unlock() 195 | return 196 | } 197 | e.updating = true 198 | // make a local copy of the sample counters for calculation 199 | tmpCounts := e.currentCounts 200 | e.currentCounts = make(map[string]float64) 201 | e.currentBurstSum = 0 202 | e.lock.Unlock() 203 | 204 | e.updateEMA(tmpCounts) 205 | 206 | // Goal events to send this interval is the total count of events in the EMA 207 | // divided by the desired average sample rate 208 | var sumEvents float64 209 | for _, count := range e.movingAverage { 210 | sumEvents += math.Max(1, count) 211 | } 212 | 213 | // Store this for burst detection. This is checked in GetSampleRate 214 | // so we need to grab the lock when we update it. 215 | e.lock.Lock() 216 | e.burstThreshold = sumEvents * e.BurstMultiple 217 | e.lock.Unlock() 218 | 219 | // Calculate the desired average sample rate per second based on the volume we've received. 220 | // This is the number of events we'd like to let through per adjustment interval. 221 | goalCount := float64(e.GoalThroughputPerSec) * e.AdjustmentInterval.Seconds() 222 | 223 | // goalRatio is the goalCount divided by the sum of all the log values - it 224 | // determines what percentage of the total event space belongs to each key 225 | var logSum float64 226 | for _, count := range e.movingAverage { 227 | // We take the max of (1, count) because count * weight is < 1 for 228 | // very small counts, which throws off the logSum and can cause 229 | // incorrect samples rates to be computed when throughput is low 230 | logSum += math.Log10(math.Max(1, count)) 231 | } 232 | goalRatio := goalCount / logSum 233 | 234 | newSavedSampleRates := calculateSampleRates(goalRatio, e.movingAverage) 235 | e.lock.Lock() 236 | defer e.lock.Unlock() 237 | e.savedSampleRates = newSavedSampleRates 238 | e.haveData = true 239 | e.updating = false 240 | } 241 | 242 | // GetSampleRate takes a key and returns the appropriate sample rate for that 243 | // key. 244 | func (e *EMAThroughput) GetSampleRate(key string) int { 245 | return e.GetSampleRateMulti(key, 1) 246 | } 247 | 248 | // GetSampleRateMulti takes a key representing count spans and returns the 249 | // appropriate sample rate for that key. 250 | func (e *EMAThroughput) GetSampleRateMulti(key string, count int) int { 251 | e.lock.Lock() 252 | defer e.lock.Unlock() 253 | 254 | e.requestCount++ 255 | e.eventCount += int64(count) 256 | 257 | // Enforce MaxKeys limit on the size of the map 258 | if e.MaxKeys > 0 { 259 | // If a key already exists, increment it. If not, but we're under the limit, store a new key 260 | if _, found := e.currentCounts[key]; found || len(e.currentCounts) < e.MaxKeys { 261 | e.currentCounts[key] += float64(count) 262 | e.currentBurstSum += float64(count) 263 | } 264 | } else { 265 | e.currentCounts[key] += float64(count) 266 | e.currentBurstSum += float64(count) 267 | } 268 | 269 | // Enforce the burst threshold 270 | if e.burstThreshold > 0 && e.currentBurstSum >= e.burstThreshold && e.intervalCount >= e.BurstDetectionDelay { 271 | // reset the burst sum to prevent additional burst updates from occurring while updateMaps is running 272 | e.currentBurstSum = 0 273 | e.burstCount++ 274 | // send but don't block - consuming is blocked on updateMaps, which takes the same lock we're holding 275 | select { 276 | case e.burstSignal <- struct{}{}: 277 | default: 278 | } 279 | } 280 | 281 | if !e.haveData { 282 | return e.InitialSampleRate 283 | } 284 | if rate, found := e.savedSampleRates[key]; found { 285 | return rate 286 | } 287 | return 1 288 | } 289 | 290 | func (e *EMAThroughput) updateEMA(newCounts map[string]float64) { 291 | keysToUpdate := make([]string, 0, len(e.movingAverage)) 292 | for key := range e.movingAverage { 293 | keysToUpdate = append(keysToUpdate, key) 294 | } 295 | 296 | // Update any existing keys with new values 297 | for _, key := range keysToUpdate { 298 | var newAvg float64 299 | // Was this key seen in the last interval? Adjust by that amount 300 | if val, found := newCounts[key]; found { 301 | newAvg = adjustAverage(e.movingAverage[key], val, e.Weight) 302 | } else { 303 | // Otherwise adjust by zero 304 | newAvg = adjustAverage(e.movingAverage[key], 0, e.Weight) 305 | } 306 | 307 | // Age out this value if it's too small to care about for calculating sample rates 308 | // This is also necessary to keep our map from going forever. 309 | if newAvg < e.AgeOutValue { 310 | delete(e.movingAverage, key) 311 | } else { 312 | e.movingAverage[key] = newAvg 313 | } 314 | // We've processed this key - don't process it again when we look at new counts 315 | delete(newCounts, key) 316 | } 317 | 318 | for key := range newCounts { 319 | newAvg := adjustAverage(0, newCounts[key], e.Weight) 320 | if newAvg >= e.AgeOutValue { 321 | e.movingAverage[key] = newAvg 322 | } 323 | } 324 | } 325 | 326 | type emaThroughputState struct { 327 | // These fields are exported for use by `JSON.Marshal` and `JSON.Unmarshal` 328 | SavedSampleRates map[string]int `json:"saved_sample_rates"` 329 | MovingAverage map[string]float64 `json:"moving_average"` 330 | } 331 | 332 | // SaveState returns a byte array with a JSON representation of the sampler state 333 | func (e *EMAThroughput) SaveState() ([]byte, error) { 334 | e.lock.Lock() 335 | defer e.lock.Unlock() 336 | 337 | if e.savedSampleRates == nil { 338 | return nil, errors.New("saved sample rate map is nil") 339 | } 340 | if e.movingAverage == nil { 341 | return nil, errors.New("moving average map is nil") 342 | } 343 | s := &emaThroughputState{SavedSampleRates: e.savedSampleRates, MovingAverage: e.movingAverage} 344 | return json.Marshal(s) 345 | } 346 | 347 | // LoadState accepts a byte array with a JSON representation of a previous instance's 348 | // state 349 | func (e *EMAThroughput) LoadState(state []byte) error { 350 | e.lock.Lock() 351 | defer e.lock.Unlock() 352 | 353 | s := emaThroughputState{} 354 | err := json.Unmarshal(state, &s) 355 | if err != nil { 356 | return err 357 | } 358 | 359 | // Load the previously calculated sample rates 360 | e.savedSampleRates = s.SavedSampleRates 361 | e.movingAverage = s.MovingAverage 362 | // Allow GetSampleRate to return calculated sample rates from the loaded map 363 | e.haveData = true 364 | 365 | return nil 366 | } 367 | 368 | func (e *EMAThroughput) GetMetrics(prefix string) map[string]int64 { 369 | e.lock.Lock() 370 | defer e.lock.Unlock() 371 | mets := map[string]int64{ 372 | prefix + "request_count": e.requestCount, 373 | prefix + "event_count": e.eventCount, 374 | prefix + "burst_count": e.burstCount, 375 | prefix + "interval_count": int64(e.intervalCount), 376 | prefix + "keyspace_size": int64(len(e.currentCounts)), 377 | } 378 | return mets 379 | } 380 | -------------------------------------------------------------------------------- /emathroughput_test.go: -------------------------------------------------------------------------------- 1 | package dynsampler 2 | 3 | import ( 4 | "math" 5 | mrand "math/rand" 6 | "testing" 7 | "time" 8 | 9 | "github.com/stretchr/testify/assert" 10 | ) 11 | 12 | func TestUpdateEMAThroughput(t *testing.T) { 13 | e := &EMAThroughput{ 14 | movingAverage: make(map[string]float64), 15 | Weight: 0.2, 16 | AgeOutValue: 0.2, 17 | } 18 | 19 | tests := []struct { 20 | keyAValue float64 21 | keyAExpected float64 22 | keyBValue float64 23 | keyBExpected float64 24 | keyCValue float64 25 | keyCExpected float64 26 | }{ 27 | {463, 93, 235, 47, 0, 0}, 28 | {176, 109, 458, 129, 0, 0}, 29 | {345, 156, 470, 197, 0, 0}, 30 | {339, 193, 317, 221, 0, 0}, 31 | {197, 194, 165, 210, 0, 0}, 32 | {387, 232, 95, 187, 6960, 1392}, 33 | } 34 | 35 | for _, tt := range tests { 36 | counts := make(map[string]float64) 37 | counts["a"] = tt.keyAValue 38 | counts["b"] = tt.keyBValue 39 | counts["c"] = tt.keyCValue 40 | e.updateEMA(counts) 41 | assert.Equal(t, tt.keyAExpected, math.Round(e.movingAverage["a"])) 42 | assert.Equal(t, tt.keyBExpected, math.Round(e.movingAverage["b"])) 43 | assert.Equal(t, tt.keyCExpected, math.Round(e.movingAverage["c"])) 44 | } 45 | } 46 | 47 | func TestEMAThroughputSampleGetSampleRateStartup(t *testing.T) { 48 | e := &EMAThroughput{ 49 | InitialSampleRate: 10, 50 | currentCounts: map[string]float64{}, 51 | } 52 | rate := e.GetSampleRate("key") 53 | assert.Equal(t, rate, 10) 54 | assert.Equal(t, e.currentCounts["key"], float64(1)) 55 | } 56 | 57 | func TestEMAThroughputSampleUpdateMapsSparseCounts(t *testing.T) { 58 | e := &EMAThroughput{ 59 | GoalThroughputPerSec: 10, 60 | AdjustmentInterval: 1 * time.Second, 61 | Weight: 0.2, 62 | AgeOutValue: 0.2, 63 | } 64 | 65 | e.movingAverage = make(map[string]float64) 66 | e.savedSampleRates = make(map[string]int) 67 | 68 | for i := 0; i <= 100; i++ { 69 | input := make(map[string]float64) 70 | // simulate steady stream of input from one key 71 | input["largest_count"] = 40 72 | // sporadic keys with single counts that come and go with each interval 73 | for j := 0; j < 5; j++ { 74 | key := randomString(8) 75 | input[key] = 1 76 | } 77 | e.currentCounts = input 78 | e.updateMaps() 79 | } 80 | assert.Equal(t, 4, e.savedSampleRates["largest_count"]) 81 | } 82 | 83 | func TestEMAThroughputAgesOutSmallValues(t *testing.T) { 84 | e := &EMAThroughput{ 85 | GoalThroughputPerSec: 10, 86 | AdjustmentInterval: 1 * time.Second, 87 | Weight: 0.2, 88 | AgeOutValue: 0.2, 89 | } 90 | e.movingAverage = make(map[string]float64) 91 | for i := 0; i < 100; i++ { 92 | e.currentCounts = map[string]float64{"foo": 500.0} 93 | e.updateMaps() 94 | } 95 | assert.Equal(t, 1, len(e.movingAverage)) 96 | assert.Equal(t, float64(500), math.Round(e.movingAverage["foo"])) 97 | for i := 0; i < 100; i++ { 98 | // "observe" no occurrences of foo for many iterations 99 | e.currentCounts = map[string]float64{"asdf": 1} 100 | e.updateMaps() 101 | } 102 | _, found := e.movingAverage["foo"] 103 | assert.Equal(t, false, found) 104 | _, found = e.movingAverage["asdf"] 105 | assert.Equal(t, true, found) 106 | } 107 | 108 | func TestEMAThroughputBurstDetection(t *testing.T) { 109 | // Set the adjustment interval very high so that we never run the regular interval 110 | e := &EMAThroughput{AdjustmentInterval: 1 * time.Hour} 111 | err := e.Start() 112 | assert.Nil(t, err) 113 | 114 | // set some counts and compute the EMA 115 | e.currentCounts = map[string]float64{"foo": 1000} 116 | e.updateMaps() 117 | // should have a burst threshold computed now from this average 118 | // 1000 = 0.5 (weight) * 1000 * 2 (threshold multiplier) 119 | assert.Equal(t, float64(1000), e.burstThreshold) 120 | 121 | // Let's try and trigger a burst: 122 | for i := 0; i <= 1000; i++ { 123 | e.GetSampleRate("bar") 124 | } 125 | // burst sum isn't reset even though we're above our burst threshold 126 | // This is because we haven't processed enough intervals to do burst detection yet 127 | assert.Equal(t, float64(1001), e.currentBurstSum) 128 | // Now let's cheat and say we have 129 | e.intervalCount = e.BurstDetectionDelay 130 | e.testSignalMapsDone = make(chan struct{}) 131 | e.GetSampleRate("bar") 132 | // wait on updateMaps to complete 133 | <-e.testSignalMapsDone 134 | // currentBurstSum has been reset 135 | assert.Equal(t, float64(0), e.currentBurstSum) 136 | 137 | // ensure EMA is updated 138 | assert.Equal(t, float64(501), e.movingAverage["bar"]) 139 | } 140 | 141 | func TestEMAThroughputUpdateMapsRace(t *testing.T) { 142 | e := &EMAThroughput{AdjustmentInterval: 1 * time.Hour} 143 | e.testSignalMapsDone = make(chan struct{}, 1000) 144 | err := e.Start() 145 | assert.Nil(t, err) 146 | for i := 0; i < 1000; i++ { 147 | e.GetSampleRate("foo") 148 | go e.updateMaps() 149 | } 150 | done := 0 151 | for done != 1000 { 152 | <-e.testSignalMapsDone 153 | done++ 154 | } 155 | } 156 | 157 | func TestEMAThroughputSampleRateSaveState(t *testing.T) { 158 | var sampler Sampler 159 | esr := &EMAThroughput{} 160 | // ensure the interface is implemented 161 | sampler = esr 162 | err := sampler.Start() 163 | assert.Nil(t, err) 164 | 165 | esr.lock.Lock() 166 | esr.savedSampleRates = map[string]int{"foo": 2, "bar": 4} 167 | esr.movingAverage = map[string]float64{"foo": 500.1234, "bar": 9999.99} 168 | esr.haveData = true 169 | esr.lock.Unlock() 170 | 171 | assert.Equal(t, 2, sampler.GetSampleRate("foo")) 172 | assert.Equal(t, 4, sampler.GetSampleRate("bar")) 173 | 174 | state, err := sampler.SaveState() 175 | assert.Nil(t, err) 176 | 177 | var newSampler Sampler 178 | esr2 := &EMAThroughput{} 179 | newSampler = esr2 180 | 181 | err = newSampler.LoadState(state) 182 | assert.Nil(t, err) 183 | err = newSampler.Start() 184 | assert.Nil(t, err) 185 | 186 | assert.Equal(t, 2, newSampler.GetSampleRate("foo")) 187 | assert.Equal(t, 4, newSampler.GetSampleRate("bar")) 188 | esr2.lock.Lock() 189 | defer esr2.lock.Unlock() 190 | assert.Equal(t, float64(500.1234), esr2.movingAverage["foo"]) 191 | assert.Equal(t, float64(9999.99), esr2.movingAverage["bar"]) 192 | } 193 | 194 | // This is a long test that generates a lot of random data and run it through the sampler 195 | // The goal is to determine if we actually hit the specified target throughput (within a tolerance) an acceptable 196 | // number of times. Most of the time, the throughput of observations kept should be close 197 | // to the target rate. 198 | func TestEMAThroughputSampleRateHitsTargetRate(t *testing.T) { 199 | mrand.Seed(time.Now().Unix()) 200 | testThroughputs := []int{100, 1000} 201 | testKeyCount := []int{10, 30} 202 | toleranceFraction := float64(0.2) 203 | 204 | for _, throughput := range testThroughputs { 205 | tolerance := float64(throughput) * toleranceFraction 206 | toleranceUpper := float64(throughput) + tolerance 207 | toleranceLower := float64(throughput) - tolerance 208 | 209 | for _, keyCount := range testKeyCount { 210 | sampler := &EMAThroughput{ 211 | AdjustmentInterval: 1 * time.Second, 212 | GoalThroughputPerSec: throughput, 213 | Weight: 0.5, 214 | AgeOutValue: 0.5, 215 | currentCounts: make(map[string]float64), 216 | movingAverage: make(map[string]float64), 217 | } 218 | 219 | // build a consistent set of keys to use 220 | keys := make([]string, keyCount) 221 | for i := 0; i < keyCount; i++ { 222 | keys[i] = randomString(8) 223 | } 224 | 225 | for i, key := range keys { 226 | // generate key counts of different magnitudes 227 | base := math.Pow10(i%3 + 1) 228 | count := float64(((i%10)+1))*base + float64(mrand.Intn(int(base))) 229 | sampler.currentCounts[key] = count 230 | } 231 | 232 | // build an initial set of sample values so we don't just return the target 233 | sampler.updateMaps() 234 | 235 | var success int 236 | 237 | grandTotalKept := 0 238 | // each tick is 1 second 239 | for i := 0; i < 100; i++ { 240 | totalKeptObservations := 0 241 | for j, key := range keys { 242 | base := math.Pow10(j%3 + 1) 243 | count := float64(((j%10)+1))*base + float64(mrand.Intn(int(base))) 244 | for k := 0; k < int(count); k++ { 245 | rate := sampler.GetSampleRate(key) 246 | if mrand.Intn(rate) == 0 { 247 | totalKeptObservations++ 248 | } 249 | } 250 | } 251 | grandTotalKept += totalKeptObservations 252 | 253 | if totalKeptObservations <= int(toleranceUpper) && totalKeptObservations >= int(toleranceLower) { 254 | success++ 255 | } 256 | sampler.updateMaps() 257 | } 258 | assert.GreaterOrEqual(t, grandTotalKept, throughput*90, "totalKept too low: %d expected: %d\n", grandTotalKept, throughput*100) 259 | assert.LessOrEqual(t, grandTotalKept, throughput*110, "totalKept too high: %d expected: %d\n", grandTotalKept, throughput*100) 260 | 261 | assert.True(t, success >= 90, "target throughput test %d with key count %d failed with success rate of %d%%", throughput, keyCount, success) 262 | } 263 | } 264 | } 265 | -------------------------------------------------------------------------------- /genericsampler_test.go: -------------------------------------------------------------------------------- 1 | package dynsampler_test 2 | 3 | import ( 4 | "math" 5 | "testing" 6 | "time" 7 | 8 | "github.com/honeycombio/dynsampler-go" 9 | ) 10 | 11 | // If given consistent data, the samplers very quickly settle to their target 12 | // rates and we can expect exact results. This test specifically hands different 13 | // samplers identical data each time and expects them to find the right values 14 | // quickly. This is a slightly higher-level test that only depends on the public 15 | // interface of samplers. 16 | func TestGenericSamplerBehavior(t *testing.T) { 17 | tests := []struct { 18 | name string 19 | sampler dynsampler.Sampler 20 | want []int 21 | }{ 22 | {"AvgSampleRate", 23 | &dynsampler.AvgSampleRate{ 24 | ClearFrequencyDuration: 1 * time.Second, 25 | }, []int{1, 1, 1, 1, 2, 4, 9, 21}, 26 | }, 27 | {"AvgSampleWithMin", 28 | &dynsampler.AvgSampleWithMin{ 29 | ClearFrequencyDuration: 1 * time.Second, 30 | }, []int{1, 1, 1, 1, 1, 2, 4, 9, 21}, 31 | }, 32 | {"EMASampler", 33 | &dynsampler.EMASampleRate{ 34 | AdjustmentInterval: 1, 35 | }, []int{1, 1, 1, 1, 2, 4, 9, 21}, 36 | }, 37 | {"OnlyOnce", 38 | &dynsampler.OnlyOnce{ 39 | ClearFrequencyDuration: 1 * time.Second, 40 | }, []int{1, 1, 1, 1, 1, 1, 1, 1}, 41 | }, 42 | {"PerKeyThroughput", 43 | &dynsampler.PerKeyThroughput{ 44 | ClearFrequencyDuration: 1 * time.Second, 45 | }, []int{1, 1, 1, 2, 8, 24, 72, 218}, 46 | }, 47 | {"TotalThroughput", 48 | &dynsampler.TotalThroughput{ 49 | ClearFrequencyDuration: 1 * time.Second, 50 | GoalThroughputPerSec: 5, 51 | }, []int{1, 4, 14, 43, 129, 388, 1166, 3499}, 52 | }, 53 | {"WindowedThroughput", 54 | &dynsampler.WindowedThroughput{ 55 | UpdateFrequencyDuration: 100 * time.Millisecond, 56 | LookbackFrequencyDuration: 1 * time.Second, 57 | }, []int{1, 1, 1, 2, 6, 19, 58, 174}, 58 | }, 59 | {"EMAThroughput", 60 | &dynsampler.EMAThroughput{ 61 | AdjustmentInterval: 1 * time.Second, 62 | GoalThroughputPerSec: 100, 63 | }, []int{1, 1, 2, 3, 6, 13, 31, 77}, 64 | }, 65 | {"EMAThroughputLowTraffic", 66 | &dynsampler.EMAThroughput{ 67 | AdjustmentInterval: 1 * time.Second, 68 | GoalThroughputPerSec: 100000, 69 | }, []int{1, 1, 1, 1, 1, 1, 1, 1}, 70 | }, 71 | } 72 | 73 | const ( 74 | NRounds = 8 75 | ) 76 | 77 | keys := []string{ 78 | "arm", "bag", "bed", "bee", "box", "boy", "cat", "cow", "cup", "dog", 79 | "ear", "egg", "eye", "fly", "gun", "hat", "key", "leg", "lip", "map", 80 | "net", "nut", "pen", "pig", "pin", "pot", "rat", "rod", "sun", "toe", 81 | } 82 | 83 | for i := range tests { 84 | tt := tests[i] 85 | t.Run(tt.name, func(t *testing.T) { 86 | // we can run all of these at once 87 | t.Parallel() 88 | s := tt.sampler 89 | 90 | err := s.Start() 91 | if err != nil { 92 | t.Errorf("%v starting sampler", err) 93 | } 94 | 95 | nkeys := len(tt.want) 96 | results := make([]int, nkeys) 97 | for round := 0; round < NRounds; round++ { 98 | for k := 0; k < nkeys; k++ { 99 | key := keys[k%nkeys] 100 | nsamples := int(math.Pow(3, float64(k%9))) // up to 6K 101 | results[k] = s.GetSampleRateMulti(key, nsamples) 102 | } 103 | time.Sleep(1010 * time.Millisecond) // just over the 1 second clear time 104 | } 105 | s.Stop() 106 | 107 | for k := 0; k < nkeys; k++ { 108 | // if !isCloseTo(tt.want[k], results[k]) { 109 | if tt.want[k] != results[k] { 110 | t.Errorf("results not = for key %s (%d) want %d, got %d\n", keys[k], k, tt.want[k], results[k]) 111 | } 112 | } 113 | }) 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/honeycombio/dynsampler-go 2 | 3 | go 1.17 4 | 5 | require github.com/stretchr/testify v1.10.0 6 | 7 | require ( 8 | github.com/davecgh/go-spew v1.1.1 // indirect 9 | github.com/pmezard/go-difflib v1.0.0 // indirect 10 | gopkg.in/yaml.v3 v3.0.1 // indirect 11 | ) 12 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 2 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 3 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 4 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 5 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 6 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 7 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= 8 | github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= 9 | github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= 10 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 11 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= 12 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= 13 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= 14 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 15 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 16 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 17 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 18 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 19 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 20 | -------------------------------------------------------------------------------- /keyCalculation.go: -------------------------------------------------------------------------------- 1 | package dynsampler 2 | 3 | import ( 4 | "math" 5 | "sort" 6 | ) 7 | 8 | // This is an extraction of common calculation logic for all the key-based samplers. 9 | func calculateSampleRates(goalRatio float64, buckets map[string]float64) map[string]int { 10 | // must go through the keys in a fixed order to prevent rounding from changing 11 | // results 12 | keys := make([]string, len(buckets)) 13 | var i int 14 | for k := range buckets { 15 | keys[i] = k 16 | i++ 17 | } 18 | sort.Strings(keys) 19 | 20 | // goal number of events per key is goalRatio * key count, but never less than 21 | // one. If a key falls below its goal, it gets a sample rate of 1 and the 22 | // extra available events get passed on down the line. 23 | newSampleRates := make(map[string]int) 24 | keysRemaining := len(buckets) 25 | var extra float64 26 | for _, key := range keys { 27 | count := math.Max(1, buckets[key]) 28 | // take the max of 1 or my log10 share of the total 29 | goalForKey := math.Max(1, math.Log10(count)*goalRatio) 30 | // take this key's share of the extra and pass the rest along 31 | extraForKey := extra / float64(keysRemaining) 32 | goalForKey += extraForKey 33 | extra -= extraForKey 34 | keysRemaining-- 35 | if count <= goalForKey { 36 | // there are fewer samples than the allotted number for this key. set 37 | // sample rate to 1 and redistribute the unused slots for future keys 38 | newSampleRates[key] = 1 39 | extra += goalForKey - count 40 | } else { 41 | // there are more samples than the allotted number. Sample this key enough 42 | // to knock it under the limit (aka round up) 43 | rate := math.Ceil(count / goalForKey) 44 | // if counts are <= 1 we can get values for goalForKey that are +Inf 45 | // and subsequent division ends up with NaN. If that's the case, 46 | // fall back to 1 47 | if math.IsNaN(rate) { 48 | newSampleRates[key] = 1 49 | } else { 50 | newSampleRates[key] = int(rate) 51 | } 52 | extra += goalForKey - (count / float64(newSampleRates[key])) 53 | } 54 | } 55 | return newSampleRates 56 | } 57 | -------------------------------------------------------------------------------- /onlyonce.go: -------------------------------------------------------------------------------- 1 | package dynsampler 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | "time" 7 | ) 8 | 9 | // OnlyOnce implements Sampler and returns a sample rate of 1 the first time a 10 | // key is seen and 1,000,000,000 every subsequent time. Essentially, this means 11 | // that every key will be reported the first time it's seen during each 12 | // ClearFrequencySec and never again. Set ClearFrequencySec to a negative 13 | // number to report each key only once for the life of the process. 14 | // 15 | // (Note that it's not guaranteed that each key will be reported exactly once, 16 | // just that the first seen event will be reported and subsequent events are 17 | // unlikely to be reported. It is probable that an additional event will be 18 | // reported for every billion times the key appears.) 19 | // 20 | // This emulates what you might expect from something catching stack traces - 21 | // the first one is important but every subsequent one just repeats the same 22 | // information. 23 | type OnlyOnce struct { 24 | // DEPRECATED -- use ClearFrequencyDuration. 25 | // ClearFrequencySec is how often the counters reset in seconds. 26 | ClearFrequencySec int 27 | 28 | // ClearFrequencyDuration is how often the counters reset as a Duration. 29 | // Note that either this or ClearFrequencySec can be specified, but not both. 30 | // If neither one is set, the default is 30s. 31 | ClearFrequencyDuration time.Duration 32 | 33 | seen map[string]bool 34 | done chan struct{} 35 | 36 | // metrics 37 | requestCount int64 38 | eventCount int64 39 | 40 | lock sync.Mutex 41 | } 42 | 43 | // Ensure we implement the sampler interface 44 | var _ Sampler = (*OnlyOnce)(nil) 45 | 46 | // Start initializes the static dynsampler 47 | func (o *OnlyOnce) Start() error { 48 | if o.ClearFrequencyDuration != 0 && o.ClearFrequencySec != 0 { 49 | return fmt.Errorf("the ClearFrequencySec configuration value is deprecated; use only ClearFrequencyDuration") 50 | } 51 | 52 | if o.ClearFrequencyDuration == 0 && o.ClearFrequencySec == 0 { 53 | o.ClearFrequencyDuration = 30 * time.Second 54 | } else if o.ClearFrequencySec != 0 { 55 | o.ClearFrequencyDuration = time.Duration(o.ClearFrequencySec) * time.Second 56 | } 57 | 58 | // if it's negative, we don't even start something 59 | if o.ClearFrequencyDuration < 0 { 60 | return nil 61 | } 62 | 63 | o.seen = make(map[string]bool) 64 | o.done = make(chan struct{}) 65 | 66 | // spin up calculator 67 | go func() { 68 | ticker := time.NewTicker(o.ClearFrequencyDuration) 69 | defer ticker.Stop() 70 | for { 71 | select { 72 | case <-ticker.C: 73 | o.updateMaps() 74 | case <-o.done: 75 | return 76 | } 77 | } 78 | }() 79 | return nil 80 | } 81 | 82 | func (o *OnlyOnce) Stop() error { 83 | if o.done != nil { 84 | close(o.done) 85 | } 86 | return nil 87 | } 88 | 89 | func (o *OnlyOnce) updateMaps() { 90 | o.lock.Lock() 91 | defer o.lock.Unlock() 92 | o.seen = make(map[string]bool) 93 | } 94 | 95 | // GetSampleRate takes a key and returns the appropriate sample rate for that 96 | // key. 97 | func (o *OnlyOnce) GetSampleRate(key string) int { 98 | return o.GetSampleRateMulti(key, 1) 99 | } 100 | 101 | // GetSampleRateMulti takes a key representing count spans and returns the 102 | // appropriate sample rate for that key. 103 | func (o *OnlyOnce) GetSampleRateMulti(key string, count int) int { 104 | o.lock.Lock() 105 | defer o.lock.Unlock() 106 | o.requestCount++ 107 | o.eventCount += int64(count) 108 | 109 | if _, found := o.seen[key]; found { 110 | return 1000000000 111 | } 112 | o.seen[key] = true 113 | return 1 114 | } 115 | 116 | // SaveState is not implemented 117 | func (o *OnlyOnce) SaveState() ([]byte, error) { 118 | return nil, nil 119 | } 120 | 121 | // LoadState is not implemented 122 | func (o *OnlyOnce) LoadState(state []byte) error { 123 | return nil 124 | } 125 | 126 | func (o *OnlyOnce) GetMetrics(prefix string) map[string]int64 { 127 | o.lock.Lock() 128 | defer o.lock.Unlock() 129 | mets := map[string]int64{ 130 | prefix + "request_count": o.requestCount, 131 | prefix + "event_count": o.eventCount, 132 | prefix + "keyspace_size": int64(len(o.seen)), 133 | } 134 | return mets 135 | } 136 | -------------------------------------------------------------------------------- /onlyonce_test.go: -------------------------------------------------------------------------------- 1 | package dynsampler 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | "time" 7 | 8 | "github.com/stretchr/testify/assert" 9 | ) 10 | 11 | func TestOnlyOnceUpdateMaps(t *testing.T) { 12 | o := &OnlyOnce{ 13 | ClearFrequencyDuration: 30 * time.Second, 14 | } 15 | tsts := []struct { 16 | inputSeen map[string]bool 17 | expectedSeen map[string]bool 18 | }{ 19 | { 20 | map[string]bool{ 21 | "one": true, 22 | "two": true, 23 | "three": true, 24 | }, 25 | map[string]bool{}, 26 | }, 27 | { 28 | map[string]bool{}, 29 | map[string]bool{}, 30 | }, 31 | } 32 | for i, tst := range tsts { 33 | o.seen = tst.inputSeen 34 | o.updateMaps() 35 | assert.Equal(t, o.seen, tst.expectedSeen, fmt.Sprintf("test %d failed", i)) 36 | } 37 | } 38 | 39 | func TestOnlyOnceGetSampleRate(t *testing.T) { 40 | o := &OnlyOnce{} 41 | o.seen = map[string]bool{ 42 | "one": true, 43 | "two": true, 44 | } 45 | tsts := []struct { 46 | inputKey string 47 | expectedSampleRate int 48 | expectedCurrentCountForKeyBefore bool 49 | expectedCurrentCountForKeyAfter bool 50 | }{ 51 | {"one", 1000000000, true, true}, 52 | {"two", 1000000000, true, true}, 53 | {"two", 1000000000, true, true}, 54 | {"three", 1, false, true}, // key missing from seen 55 | {"three", 1000000000, true, true}, 56 | {"four", 1, false, true}, // key missing from seen 57 | {"four", 1000000000, true, true}, 58 | } 59 | for _, tst := range tsts { 60 | assert.Equal(t, o.seen[tst.inputKey], tst.expectedCurrentCountForKeyBefore) 61 | rate := o.GetSampleRate(tst.inputKey) 62 | assert.Equal(t, rate, tst.expectedSampleRate) 63 | assert.Equal(t, o.seen[tst.inputKey], tst.expectedCurrentCountForKeyAfter) 64 | } 65 | } 66 | 67 | func TestOnlyOnce_Start(t *testing.T) { 68 | tests := []struct { 69 | name string 70 | ClearFrequencySec int 71 | ClearFrequencyDuration time.Duration 72 | wantDuration time.Duration 73 | wantErr bool 74 | }{ 75 | {"sec only", 2, 0, 2 * time.Second, false}, 76 | {"dur only", 0, 1003 * time.Millisecond, 1003 * time.Millisecond, false}, 77 | {"default", 0, 0, 30 * time.Second, false}, 78 | {"both", 2, 2 * time.Second, 0, true}, 79 | {"negative sec", -1, 0, -1 * time.Second, false}, 80 | {"negative dur", 0, -1 * time.Second, -1 * time.Second, false}, 81 | } 82 | for _, tt := range tests { 83 | t.Run(tt.name, func(t *testing.T) { 84 | a := &OnlyOnce{ 85 | ClearFrequencySec: tt.ClearFrequencySec, 86 | ClearFrequencyDuration: tt.ClearFrequencyDuration, 87 | } 88 | err := a.Start() 89 | if (err != nil) != tt.wantErr { 90 | t.Errorf("OnlyOnce error = %v, wantErr %v", err, tt.wantErr) 91 | } 92 | if err == nil { 93 | defer a.Stop() 94 | if tt.wantDuration != a.ClearFrequencyDuration { 95 | t.Errorf("OnlyOnce duration mismatch = want %v, got %v", tt.wantDuration, a.ClearFrequencyDuration) 96 | } 97 | } 98 | }) 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /perkeythroughput.go: -------------------------------------------------------------------------------- 1 | package dynsampler 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | "sync" 7 | "time" 8 | ) 9 | 10 | // PerKeyThroughput implements Sampler and attempts to meet a goal of a fixed 11 | // number of events per key per second sent to Honeycomb. 12 | // 13 | // This method is to guarantee that at most a certain number of events per key 14 | // get transmitted, no matter how many keys you have or how much traffic comes 15 | // through. In other words, if capturing a minimum amount of traffic per key is 16 | // important but beyond that doesn't matter much, this is the best method. 17 | type PerKeyThroughput struct { 18 | // DEPRECATED -- use ClearFrequencyDuration. 19 | // ClearFrequencySec is how often the counters reset in seconds. 20 | ClearFrequencySec int 21 | 22 | // ClearFrequencyDuration is how often the counters reset as a Duration. 23 | // Note that either this or ClearFrequencySec can be specified, but not both. 24 | // If neither one is set, the default is 30s. 25 | ClearFrequencyDuration time.Duration 26 | 27 | // PerKeyThroughputPerSec is the target number of events to send per second 28 | // per key. Sample rates are generated on a per key basis to squash the 29 | // throughput down to match the goal throughput. default 10 30 | PerKeyThroughputPerSec int 31 | 32 | // MaxKeys, if greater than 0, limits the number of distinct keys used to build 33 | // the sample rate map within the interval defined by `ClearFrequencyDuration`. Once 34 | // MaxKeys is reached, new keys will not be included in the sample rate map, but 35 | // existing keys will continue to be be counted. 36 | MaxKeys int 37 | 38 | savedSampleRates map[string]int 39 | currentCounts map[string]int 40 | done chan struct{} 41 | 42 | lock sync.Mutex 43 | 44 | // metrics 45 | requestCount int64 46 | eventCount int64 47 | } 48 | 49 | // Ensure we implement the sampler interface 50 | var _ Sampler = (*PerKeyThroughput)(nil) 51 | 52 | func (p *PerKeyThroughput) Start() error { 53 | // apply defaults 54 | if p.ClearFrequencyDuration != 0 && p.ClearFrequencySec != 0 { 55 | return fmt.Errorf("the ClearFrequencySec configuration value is deprecated; use only ClearFrequencyDuration") 56 | } 57 | 58 | if p.ClearFrequencyDuration == 0 && p.ClearFrequencySec == 0 { 59 | p.ClearFrequencyDuration = 30 * time.Second 60 | } else if p.ClearFrequencySec != 0 { 61 | p.ClearFrequencyDuration = time.Duration(p.ClearFrequencySec) * time.Second 62 | } 63 | 64 | if p.PerKeyThroughputPerSec == 0 { 65 | p.PerKeyThroughputPerSec = 10 66 | } 67 | 68 | // initialize internal variables 69 | p.savedSampleRates = make(map[string]int) 70 | p.currentCounts = make(map[string]int) 71 | p.done = make(chan struct{}) 72 | 73 | // spin up calculator 74 | go func() { 75 | ticker := time.NewTicker(p.ClearFrequencyDuration) 76 | defer ticker.Stop() 77 | for { 78 | select { 79 | case <-ticker.C: 80 | p.updateMaps() 81 | case <-p.done: 82 | return 83 | } 84 | } 85 | }() 86 | return nil 87 | } 88 | 89 | func (p *PerKeyThroughput) Stop() error { 90 | close(p.done) 91 | return nil 92 | } 93 | 94 | // updateMaps calculates a new saved rate map based on the contents of the 95 | // counter map 96 | func (p *PerKeyThroughput) updateMaps() { 97 | // make a local copy of the sample counters for calculation 98 | p.lock.Lock() 99 | tmpCounts := p.currentCounts 100 | p.currentCounts = make(map[string]int) 101 | p.lock.Unlock() 102 | // short circuit if no traffic 103 | numKeys := len(tmpCounts) 104 | if numKeys == 0 { 105 | // no traffic the last 30s. clear the result map 106 | p.lock.Lock() 107 | defer p.lock.Unlock() 108 | p.savedSampleRates = make(map[string]int) 109 | return 110 | } 111 | actualPerKeyRate := p.PerKeyThroughputPerSec * int(p.ClearFrequencyDuration.Seconds()) 112 | // for each key, calculate sample rate by dividing counted events by the 113 | // desired number of events 114 | newSavedSampleRates := make(map[string]int) 115 | for k, v := range tmpCounts { 116 | rate := int(math.Max(1, (float64(v) / float64(actualPerKeyRate)))) 117 | newSavedSampleRates[k] = rate 118 | } 119 | // save newly calculated sample rates 120 | p.lock.Lock() 121 | defer p.lock.Unlock() 122 | p.savedSampleRates = newSavedSampleRates 123 | } 124 | 125 | // GetSampleRate takes a key and returns the appropriate sample rate for that 126 | // key. 127 | func (p *PerKeyThroughput) GetSampleRate(key string) int { 128 | return p.GetSampleRateMulti(key, 1) 129 | } 130 | 131 | // GetSampleRateMulti takes a key representing count spans and returns the 132 | // appropriate sample rate for that key. 133 | func (p *PerKeyThroughput) GetSampleRateMulti(key string, count int) int { 134 | p.lock.Lock() 135 | defer p.lock.Unlock() 136 | 137 | p.requestCount++ 138 | p.eventCount += int64(count) 139 | 140 | // Enforce MaxKeys limit on the size of the map 141 | if p.MaxKeys > 0 { 142 | // If a key already exists, add the count. If not, but we're under the limit, store a new key 143 | if _, found := p.currentCounts[key]; found || len(p.currentCounts) < p.MaxKeys { 144 | p.currentCounts[key] += count 145 | } 146 | } else { 147 | p.currentCounts[key] += count 148 | } 149 | if rate, found := p.savedSampleRates[key]; found { 150 | return rate 151 | } 152 | return 1 153 | } 154 | 155 | // SaveState is not implemented 156 | func (p *PerKeyThroughput) SaveState() ([]byte, error) { 157 | return nil, nil 158 | } 159 | 160 | // LoadState is not implemented 161 | func (p *PerKeyThroughput) LoadState(state []byte) error { 162 | return nil 163 | } 164 | 165 | func (p *PerKeyThroughput) GetMetrics(prefix string) map[string]int64 { 166 | p.lock.Lock() 167 | defer p.lock.Unlock() 168 | mets := map[string]int64{ 169 | prefix + "request_count": p.requestCount, 170 | prefix + "event_count": p.eventCount, 171 | prefix + "keyspace_size": int64(len(p.currentCounts)), 172 | } 173 | return mets 174 | } 175 | -------------------------------------------------------------------------------- /perkeythroughput_test.go: -------------------------------------------------------------------------------- 1 | package dynsampler 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | "sync" 7 | "testing" 8 | "time" 9 | 10 | "github.com/stretchr/testify/assert" 11 | ) 12 | 13 | func TestPerKeyThroughputUpdateMaps(t *testing.T) { 14 | p := &PerKeyThroughput{ 15 | ClearFrequencyDuration: 30 * time.Second, 16 | PerKeyThroughputPerSec: 5, 17 | } 18 | tsts := []struct { 19 | inputCount map[string]int 20 | expectedSavedSampleRates map[string]int 21 | }{ 22 | { 23 | map[string]int{ 24 | "one": 1, 25 | "two": 1, 26 | "three": 2, 27 | "four": 5, 28 | "five": 8, 29 | "six": 15, 30 | "seven": 45, 31 | "eight": 612, 32 | "nine": 2000, 33 | "ten": 10000, 34 | }, 35 | map[string]int{ 36 | "one": 1, 37 | "two": 1, 38 | "three": 1, 39 | "four": 1, 40 | "five": 1, 41 | "six": 1, 42 | "seven": 1, 43 | "eight": 4, 44 | "nine": 13, 45 | "ten": 66, 46 | }, 47 | }, 48 | { 49 | map[string]int{ 50 | "one": 1, 51 | "two": 1, 52 | "three": 2, 53 | "four": 5, 54 | "five": 8, 55 | "six": 15, 56 | "seven": 45, 57 | "eight": 50, 58 | "nine": 60, 59 | }, 60 | map[string]int{ 61 | "one": 1, 62 | "two": 1, 63 | "three": 1, 64 | "four": 1, 65 | "five": 1, 66 | "six": 1, 67 | "seven": 1, 68 | "eight": 1, 69 | "nine": 1, 70 | }, 71 | }, 72 | { 73 | map[string]int{ 74 | "one": 1, 75 | "two": 1, 76 | "three": 2, 77 | "four": 5, 78 | "five": 7, 79 | }, 80 | map[string]int{ 81 | "one": 1, 82 | "two": 1, 83 | "three": 1, 84 | "four": 1, 85 | "five": 1, 86 | }, 87 | }, 88 | { 89 | map[string]int{ 90 | "one": 1000, 91 | "two": 1000, 92 | "three": 2000, 93 | "four": 5000, 94 | "five": 7000, 95 | }, 96 | map[string]int{ 97 | "one": 6, 98 | "two": 6, 99 | "three": 13, 100 | "four": 33, 101 | "five": 46, 102 | }, 103 | }, 104 | { 105 | map[string]int{ 106 | "one": 1000, 107 | "two": 1000, 108 | "three": 2000, 109 | "four": 5000, 110 | "five": 70000, 111 | }, 112 | map[string]int{ 113 | "one": 6, 114 | "two": 6, 115 | "three": 13, 116 | "four": 33, 117 | "five": 466, 118 | }, 119 | }, 120 | { 121 | map[string]int{ 122 | "one": 6000, 123 | "two": 6000, 124 | "three": 6000, 125 | "four": 6000, 126 | "five": 6000, 127 | }, 128 | map[string]int{ 129 | "one": 40, 130 | "two": 40, 131 | "three": 40, 132 | "four": 40, 133 | "five": 40, 134 | }, 135 | }, 136 | { 137 | map[string]int{ 138 | "one": 12000, 139 | }, 140 | map[string]int{ 141 | "one": 80, 142 | }, 143 | }, 144 | { 145 | map[string]int{}, 146 | map[string]int{}, 147 | }, 148 | } 149 | for i, tst := range tsts { 150 | p.currentCounts = tst.inputCount 151 | p.updateMaps() 152 | assert.Equal(t, len(p.currentCounts), 0) 153 | assert.Equal(t, p.savedSampleRates, tst.expectedSavedSampleRates, fmt.Sprintf("test %d failed", i)) 154 | } 155 | } 156 | 157 | func TestPerKeyThroughputGetSampleRate(t *testing.T) { 158 | p := &PerKeyThroughput{} 159 | p.currentCounts = map[string]int{ 160 | "one": 5, 161 | "two": 8, 162 | } 163 | p.savedSampleRates = map[string]int{ 164 | "one": 10, 165 | "two": 1, 166 | "three": 5, 167 | } 168 | tsts := []struct { 169 | inputKey string 170 | expectedSampleRate int 171 | expectedCurrentCountForKey int 172 | }{ 173 | {"one", 10, 6}, 174 | {"two", 1, 9}, 175 | {"two", 1, 10}, 176 | {"three", 5, 1}, // key missing from current counts 177 | {"three", 5, 2}, 178 | {"four", 1, 1}, // key missing from current and saved counts 179 | {"four", 1, 2}, 180 | } 181 | for _, tst := range tsts { 182 | rate := p.GetSampleRate(tst.inputKey) 183 | assert.Equal(t, rate, tst.expectedSampleRate) 184 | assert.Equal(t, p.currentCounts[tst.inputKey], tst.expectedCurrentCountForKey) 185 | } 186 | } 187 | 188 | func TestPerKeyThroughputRace(t *testing.T) { 189 | p := &PerKeyThroughput{ 190 | PerKeyThroughputPerSec: 2, 191 | currentCounts: map[string]int{}, 192 | savedSampleRates: map[string]int{}, 193 | } 194 | wg := sync.WaitGroup{} 195 | wg.Add(1) 196 | wg.Add(1) 197 | // set up 100 parallel readers, each reading 1000 times 198 | go func() { 199 | for i := 0; i < 100; i++ { 200 | wg.Add(1) 201 | go func(i int) { 202 | for j := 0; j < 1000; j++ { 203 | rate := p.GetSampleRate("key" + strconv.Itoa(i)) 204 | assert.NotEqual(t, rate, 0, "rate should never be zero") 205 | } 206 | wg.Done() 207 | }(i) 208 | } 209 | wg.Done() 210 | }() 211 | go func() { 212 | for i := 0; i < 100; i++ { 213 | p.updateMaps() 214 | } 215 | wg.Done() 216 | }() 217 | wg.Wait() 218 | } 219 | 220 | func TestPerKeyThroughputMaxKeys(t *testing.T) { 221 | p := &PerKeyThroughput{ 222 | MaxKeys: 3, 223 | } 224 | p.currentCounts = map[string]int{ 225 | "one": 1, 226 | "two": 1, 227 | } 228 | p.savedSampleRates = map[string]int{} 229 | 230 | // with MaxKeys 3, we are under the key limit, so three should get added 231 | p.GetSampleRate("three") 232 | assert.Equal(t, 3, len(p.currentCounts)) 233 | assert.Equal(t, 1, p.currentCounts["three"]) 234 | // Now we're at 3 keys - four should not be added 235 | p.GetSampleRate("four") 236 | assert.Equal(t, 3, len(p.currentCounts)) 237 | _, found := p.currentCounts["four"] 238 | assert.Equal(t, false, found) 239 | // We should still support bumping counts for existing keys 240 | p.GetSampleRate("one") 241 | assert.Equal(t, 3, len(p.currentCounts)) 242 | assert.Equal(t, 2, p.currentCounts["one"]) 243 | } 244 | 245 | func TestPerKeyThroughput_Start(t *testing.T) { 246 | tests := []struct { 247 | name string 248 | ClearFrequencySec int 249 | ClearFrequencyDuration time.Duration 250 | wantDuration time.Duration 251 | wantErr bool 252 | }{ 253 | {"sec only", 2, 0, 2 * time.Second, false}, 254 | {"dur only", 0, 1003 * time.Millisecond, 1003 * time.Millisecond, false}, 255 | {"default", 0, 0, 30 * time.Second, false}, 256 | {"both", 2, 2 * time.Second, 0, true}, 257 | } 258 | for _, tt := range tests { 259 | t.Run(tt.name, func(t *testing.T) { 260 | a := &PerKeyThroughput{ 261 | ClearFrequencySec: tt.ClearFrequencySec, 262 | ClearFrequencyDuration: tt.ClearFrequencyDuration, 263 | } 264 | err := a.Start() 265 | if (err != nil) != tt.wantErr { 266 | t.Errorf("PerKeyThroughput error = %v, wantErr %v", err, tt.wantErr) 267 | } 268 | if err == nil { 269 | defer a.Stop() 270 | if tt.wantDuration != a.ClearFrequencyDuration { 271 | t.Errorf("PerKeyThroughput duration mismatch = want %v, got %v", tt.wantDuration, a.ClearFrequencyDuration) 272 | } 273 | } 274 | }) 275 | } 276 | } 277 | -------------------------------------------------------------------------------- /static.go: -------------------------------------------------------------------------------- 1 | package dynsampler 2 | 3 | import "sync" 4 | 5 | // Static implements Sampler with a static mapping for sample rates. This is 6 | // useful if you have a known set of keys that you want to sample at specific 7 | // rates and apply a default to everything else. 8 | type Static struct { 9 | // Rates is the set of sample rates to use 10 | Rates map[string]int 11 | // Default is the value to use if the key is not whitelisted in Rates 12 | Default int 13 | 14 | lock sync.Mutex 15 | 16 | // metrics 17 | requestCount int64 18 | eventCount int64 19 | } 20 | 21 | // Ensure we implement the sampler interface 22 | var _ Sampler = (*Static)(nil) 23 | 24 | // Start initializes the static dynsampler 25 | func (s *Static) Start() error { 26 | if s.Default == 0 { 27 | s.Default = 1 28 | } 29 | return nil 30 | } 31 | 32 | func (s *Static) Stop() error { 33 | return nil 34 | } 35 | 36 | // GetSampleRate takes a key and returns the appropriate sample rate for that 37 | // key. 38 | func (s *Static) GetSampleRate(key string) int { 39 | return s.GetSampleRateMulti(key, 1) 40 | } 41 | 42 | // GetSampleRateMulti takes a key representing count spans and returns the 43 | // appropriate sample rate for that key. 44 | func (s *Static) GetSampleRateMulti(key string, count int) int { 45 | s.lock.Lock() 46 | defer s.lock.Unlock() 47 | 48 | s.requestCount++ 49 | s.eventCount += int64(count) 50 | if rate, found := s.Rates[key]; found { 51 | return rate 52 | } 53 | return s.Default 54 | } 55 | 56 | // SaveState is not implemented 57 | func (s *Static) SaveState() ([]byte, error) { 58 | return nil, nil 59 | } 60 | 61 | // LoadState is not implemented 62 | func (s *Static) LoadState(state []byte) error { 63 | return nil 64 | } 65 | 66 | func (s *Static) GetMetrics(prefix string) map[string]int64 { 67 | s.lock.Lock() 68 | defer s.lock.Unlock() 69 | mets := map[string]int64{ 70 | prefix + "request_count": s.requestCount, 71 | prefix + "event_count": s.eventCount, 72 | prefix + "keyspace_size": int64(len(s.Rates)), 73 | } 74 | return mets 75 | } 76 | -------------------------------------------------------------------------------- /static_test.go: -------------------------------------------------------------------------------- 1 | package dynsampler 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestStaticGetSampleRate(t *testing.T) { 10 | s := &Static{ 11 | Rates: map[string]int{ 12 | "one": 5, 13 | "two": 10, 14 | }, 15 | Default: 3, 16 | } 17 | assert.Equal(t, s.GetSampleRate("one"), 5) 18 | assert.Equal(t, s.GetSampleRate("two"), 10) 19 | assert.Equal(t, s.GetSampleRate("three"), 3) 20 | 21 | } 22 | -------------------------------------------------------------------------------- /totalthroughput.go: -------------------------------------------------------------------------------- 1 | package dynsampler 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | "sync" 7 | "time" 8 | ) 9 | 10 | // TotalThroughput implements Sampler and attempts to meet a goal of a fixed 11 | // number of events per second sent to Honeycomb. 12 | // 13 | // If your key space is sharded across different servers, this is a good method 14 | // for making sure each server sends roughly the same volume of content to 15 | // Honeycomb. It performs poorly when the active keyspace is very large. 16 | // 17 | // GoalThroughputSec * ClearFrequencyDuration (in seconds) defines the upper 18 | // limit of the number of keys that can be reported and stay under the goal, but 19 | // with that many keys, you'll only get one event per key per ClearFrequencySec, 20 | // which is very coarse. You should aim for at least 1 event per key per sec to 21 | // 1 event per key per 10sec to get reasonable data. In other words, the number 22 | // of active keys should be less than 10*GoalThroughputSec. 23 | type TotalThroughput struct { 24 | // DEPRECATED -- use ClearFrequencyDuration. 25 | // ClearFrequencySec is how often the counters reset in seconds. 26 | ClearFrequencySec int 27 | 28 | // ClearFrequencyDuration is how often the counters reset as a Duration. 29 | // Note that either this or ClearFrequencySec can be specified, but not both. 30 | // If neither one is set, the default is 30s. 31 | ClearFrequencyDuration time.Duration 32 | 33 | // GoalThroughputPerSec is the target number of events to send per second. 34 | // Sample rates are generated to squash the total throughput down to match the 35 | // goal throughput. Actual throughput may exceed goal throughput. default 100 36 | GoalThroughputPerSec int 37 | 38 | // MaxKeys, if greater than 0, limits the number of distinct keys used to build 39 | // the sample rate map within the interval defined by `ClearFrequencySec`. Once 40 | // MaxKeys is reached, new keys will not be included in the sample rate map, but 41 | // existing keys will continue to be be counted. 42 | MaxKeys int 43 | 44 | savedSampleRates map[string]int 45 | currentCounts map[string]int 46 | done chan struct{} 47 | 48 | lock sync.Mutex 49 | 50 | // metrics 51 | requestCount int64 52 | eventCount int64 53 | } 54 | 55 | // Ensure we implement the sampler interface 56 | var _ Sampler = (*TotalThroughput)(nil) 57 | 58 | func (t *TotalThroughput) Start() error { 59 | // apply defaults 60 | if t.ClearFrequencyDuration != 0 && t.ClearFrequencySec != 0 { 61 | return fmt.Errorf("the ClearFrequencySec configuration value is deprecated; use only ClearFrequencyDuration") 62 | } 63 | 64 | if t.ClearFrequencyDuration == 0 && t.ClearFrequencySec == 0 { 65 | t.ClearFrequencyDuration = 30 * time.Second 66 | } else if t.ClearFrequencySec != 0 { 67 | t.ClearFrequencyDuration = time.Duration(t.ClearFrequencySec) * time.Second 68 | } 69 | 70 | if t.GoalThroughputPerSec == 0 { 71 | t.GoalThroughputPerSec = 100 72 | } 73 | 74 | // initialize internal variables 75 | t.savedSampleRates = make(map[string]int) 76 | t.currentCounts = make(map[string]int) 77 | t.done = make(chan struct{}) 78 | 79 | // spin up calculator 80 | go func() { 81 | ticker := time.NewTicker(t.ClearFrequencyDuration) 82 | defer ticker.Stop() 83 | for { 84 | select { 85 | case <-ticker.C: 86 | t.updateMaps() 87 | case <-t.done: 88 | return 89 | } 90 | } 91 | }() 92 | return nil 93 | } 94 | 95 | func (t *TotalThroughput) Stop() error { 96 | close(t.done) 97 | return nil 98 | } 99 | 100 | // updateMaps calculates a new saved rate map based on the contents of the 101 | // counter map 102 | func (t *TotalThroughput) updateMaps() { 103 | // make a local copy of the sample counters for calculation 104 | t.lock.Lock() 105 | tmpCounts := t.currentCounts 106 | t.currentCounts = make(map[string]int) 107 | t.lock.Unlock() 108 | // short circuit if no traffic 109 | numKeys := len(tmpCounts) 110 | if numKeys == 0 { 111 | // no traffic the last 30s. clear the result map 112 | t.lock.Lock() 113 | defer t.lock.Unlock() 114 | t.savedSampleRates = make(map[string]int) 115 | return 116 | } 117 | // figure out our target throughput per key over ClearFrequencyDuration 118 | totalGoalThroughput := float64(t.GoalThroughputPerSec) * t.ClearFrequencyDuration.Seconds() 119 | // split the total throughput equally across the number of keys. 120 | throughputPerKey := float64(totalGoalThroughput) / float64(numKeys) 121 | // for each key, calculate sample rate by dividing counted events by the 122 | // desired number of events 123 | newSavedSampleRates := make(map[string]int) 124 | for k, v := range tmpCounts { 125 | rate := int(math.Max(1, (float64(v) / float64(throughputPerKey)))) 126 | newSavedSampleRates[k] = rate 127 | } 128 | // save newly calculated sample rates 129 | t.lock.Lock() 130 | defer t.lock.Unlock() 131 | t.savedSampleRates = newSavedSampleRates 132 | } 133 | 134 | // GetSampleRate takes a key and returns the appropriate sample rate for that 135 | // key. 136 | func (t *TotalThroughput) GetSampleRate(key string) int { 137 | return t.GetSampleRateMulti(key, 1) 138 | } 139 | 140 | // GetSampleRateMulti takes a key representing count spans and returns the 141 | // appropriate sample rate for that key. 142 | func (t *TotalThroughput) GetSampleRateMulti(key string, count int) int { 143 | t.lock.Lock() 144 | defer t.lock.Unlock() 145 | 146 | t.requestCount++ 147 | t.eventCount += int64(count) 148 | 149 | // Enforce MaxKeys limit on the size of the map 150 | if t.MaxKeys > 0 { 151 | // If a key already exists, increment it. If not, but we're under the limit, store a new key 152 | if _, found := t.currentCounts[key]; found || len(t.currentCounts) < t.MaxKeys { 153 | t.currentCounts[key] += count 154 | } 155 | } else { 156 | t.currentCounts[key] += count 157 | } 158 | if rate, found := t.savedSampleRates[key]; found { 159 | return rate 160 | } 161 | return 1 162 | } 163 | 164 | // SaveState is not implemented 165 | func (t *TotalThroughput) SaveState() ([]byte, error) { 166 | return nil, nil 167 | } 168 | 169 | // LoadState is not implemented 170 | func (t *TotalThroughput) LoadState(state []byte) error { 171 | return nil 172 | } 173 | 174 | func (t *TotalThroughput) GetMetrics(prefix string) map[string]int64 { 175 | t.lock.Lock() 176 | defer t.lock.Unlock() 177 | mets := map[string]int64{ 178 | prefix + "request_count": t.requestCount, 179 | prefix + "event_count": t.eventCount, 180 | prefix + "keyspace_size": int64(len(t.currentCounts)), 181 | } 182 | return mets 183 | } 184 | -------------------------------------------------------------------------------- /totalthroughput_test.go: -------------------------------------------------------------------------------- 1 | package dynsampler 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | "sync" 7 | "testing" 8 | "time" 9 | 10 | "github.com/stretchr/testify/assert" 11 | ) 12 | 13 | func TestTotalThroughputUpdateMaps(t *testing.T) { 14 | s := &TotalThroughput{ 15 | ClearFrequencyDuration: 30 * time.Second, 16 | GoalThroughputPerSec: 20, 17 | } 18 | tsts := []struct { 19 | inputSampleCount map[string]int 20 | expectedSavedSampleRates map[string]int 21 | }{ 22 | { 23 | map[string]int{ 24 | "one": 1, 25 | "two": 1, 26 | "three": 2, 27 | "four": 5, 28 | "five": 8, 29 | "six": 15, 30 | "seven": 45, 31 | "eight": 612, 32 | "nine": 2000, 33 | "ten": 10000, 34 | }, 35 | map[string]int{ 36 | "one": 1, 37 | "two": 1, 38 | "three": 1, 39 | "four": 1, 40 | "five": 1, 41 | "six": 1, 42 | "seven": 1, 43 | "eight": 10, 44 | "nine": 33, 45 | "ten": 166, 46 | }, 47 | }, 48 | { 49 | map[string]int{ 50 | "one": 1, 51 | "two": 1, 52 | "three": 2, 53 | "four": 5, 54 | "five": 8, 55 | "six": 15, 56 | "seven": 45, 57 | "eight": 50, 58 | "nine": 60, 59 | }, 60 | map[string]int{ 61 | "one": 1, 62 | "two": 1, 63 | "three": 1, 64 | "four": 1, 65 | "five": 1, 66 | "six": 1, 67 | "seven": 1, 68 | "eight": 1, 69 | "nine": 1, 70 | }, 71 | }, 72 | { 73 | map[string]int{ 74 | "one": 1, 75 | "two": 1, 76 | "three": 2, 77 | "four": 5, 78 | "five": 7, 79 | }, 80 | map[string]int{ 81 | "one": 1, 82 | "two": 1, 83 | "three": 1, 84 | "four": 1, 85 | "five": 1, 86 | }, 87 | }, 88 | { 89 | map[string]int{ 90 | "one": 1000, 91 | "two": 1000, 92 | "three": 2000, 93 | "four": 5000, 94 | "five": 7000, 95 | }, 96 | map[string]int{ 97 | "one": 8, 98 | "two": 8, 99 | "three": 16, 100 | "four": 41, 101 | "five": 58, 102 | }, 103 | }, 104 | { 105 | map[string]int{ 106 | "one": 6000, 107 | "two": 6000, 108 | "three": 6000, 109 | "four": 6000, 110 | "five": 6000, 111 | }, 112 | map[string]int{ 113 | "one": 50, 114 | "two": 50, 115 | "three": 50, 116 | "four": 50, 117 | "five": 50, 118 | }, 119 | }, 120 | { 121 | map[string]int{ 122 | "one": 12000, 123 | }, 124 | map[string]int{ 125 | "one": 20, 126 | }, 127 | }, 128 | { 129 | map[string]int{}, 130 | map[string]int{}, 131 | }, 132 | } 133 | for i, tst := range tsts { 134 | s.currentCounts = tst.inputSampleCount 135 | s.updateMaps() 136 | assert.Equal(t, len(s.currentCounts), 0) 137 | assert.Equal(t, s.savedSampleRates, tst.expectedSavedSampleRates, fmt.Sprintf("test %d failed", i)) 138 | } 139 | } 140 | 141 | func TestTotalThroughputGetSampleRate(t *testing.T) { 142 | s := &TotalThroughput{} 143 | s.currentCounts = map[string]int{ 144 | "one": 5, 145 | "two": 8, 146 | } 147 | s.savedSampleRates = map[string]int{ 148 | "one": 10, 149 | "two": 1, 150 | "three": 5, 151 | } 152 | tsts := []struct { 153 | inputKey string 154 | expectedSampleRate int 155 | expectedCurrentCountForKey int 156 | }{ 157 | {"one", 10, 6}, 158 | {"two", 1, 9}, 159 | {"two", 1, 10}, 160 | {"three", 5, 1}, // key missing from current counts 161 | {"three", 5, 2}, 162 | {"four", 1, 1}, // key missing from current and saved counts 163 | {"four", 1, 2}, 164 | } 165 | for _, tst := range tsts { 166 | rate := s.GetSampleRate(tst.inputKey) 167 | assert.Equal(t, rate, tst.expectedSampleRate) 168 | assert.Equal(t, s.currentCounts[tst.inputKey], tst.expectedCurrentCountForKey) 169 | } 170 | } 171 | 172 | func TestTotalThroughputRace(t *testing.T) { 173 | s := &TotalThroughput{ 174 | GoalThroughputPerSec: 2, 175 | currentCounts: map[string]int{}, 176 | savedSampleRates: map[string]int{}, 177 | } 178 | wg := sync.WaitGroup{} 179 | wg.Add(1) 180 | wg.Add(1) 181 | // set up 100 parallel readers, each reading 1000 times 182 | go func() { 183 | for i := 0; i < 100; i++ { 184 | wg.Add(1) 185 | go func(i int) { 186 | for j := 0; j < 1000; j++ { 187 | rate := s.GetSampleRate("key" + strconv.Itoa(i)) 188 | assert.NotEqual(t, rate, 0, "rate should never be zero") 189 | } 190 | wg.Done() 191 | }(i) 192 | } 193 | wg.Done() 194 | }() 195 | go func() { 196 | for i := 0; i < 100; i++ { 197 | s.updateMaps() 198 | } 199 | wg.Done() 200 | }() 201 | wg.Wait() 202 | } 203 | 204 | func TestTotalThroughputMaxKeys(t *testing.T) { 205 | tt := &TotalThroughput{ 206 | MaxKeys: 3, 207 | } 208 | tt.currentCounts = map[string]int{ 209 | "one": 1, 210 | "two": 1, 211 | } 212 | tt.savedSampleRates = map[string]int{} 213 | 214 | // with MaxKeys 3, we are under the key limit, so three should get added 215 | tt.GetSampleRate("three") 216 | assert.Equal(t, 3, len(tt.currentCounts)) 217 | assert.Equal(t, 1, tt.currentCounts["three"]) 218 | // Now we're at 3 keys - four should not be added 219 | tt.GetSampleRate("four") 220 | assert.Equal(t, 3, len(tt.currentCounts)) 221 | _, found := tt.currentCounts["four"] 222 | assert.Equal(t, false, found) 223 | // We should still support bumping counts for existing keys 224 | tt.GetSampleRate("one") 225 | assert.Equal(t, 3, len(tt.currentCounts)) 226 | assert.Equal(t, 2, tt.currentCounts["one"]) 227 | } 228 | 229 | func TestTotalThroughput_Start(t *testing.T) { 230 | tests := []struct { 231 | name string 232 | ClearFrequencySec int 233 | ClearFrequencyDuration time.Duration 234 | wantDuration time.Duration 235 | wantErr bool 236 | }{ 237 | {"sec only", 2, 0, 2 * time.Second, false}, 238 | {"dur only", 0, 1003 * time.Millisecond, 1003 * time.Millisecond, false}, 239 | {"default", 0, 0, 30 * time.Second, false}, 240 | {"both", 2, 2 * time.Second, 0, true}, 241 | } 242 | for _, tt := range tests { 243 | t.Run(tt.name, func(t *testing.T) { 244 | a := &TotalThroughput{ 245 | ClearFrequencySec: tt.ClearFrequencySec, 246 | ClearFrequencyDuration: tt.ClearFrequencyDuration, 247 | } 248 | err := a.Start() 249 | if (err != nil) != tt.wantErr { 250 | t.Errorf("TotalThroughput error = %v, wantErr %v", err, tt.wantErr) 251 | } 252 | if err == nil { 253 | defer a.Stop() 254 | if tt.wantDuration != a.ClearFrequencyDuration { 255 | t.Errorf("TotalThroughput duration mismatch = want %v, got %v", tt.wantDuration, a.ClearFrequencyDuration) 256 | } 257 | } 258 | }) 259 | } 260 | } 261 | -------------------------------------------------------------------------------- /windowedthroughput.go: -------------------------------------------------------------------------------- 1 | package dynsampler 2 | 3 | import ( 4 | "math" 5 | "sync" 6 | "time" 7 | ) 8 | 9 | // Windowed Throughput sampling is an enhanced version of total throughput sampling. 10 | // Just like the original throughput sampler, it attempts to meet the goal of fixed number of events 11 | // per second sent to Honeycomb. 12 | // 13 | // The original throughput sampler updates the sampling rate every "ClearFrequency" seconds. While 14 | // this parameter is configurable, it suffers from the following tradeoff: 15 | // - Decreasing it makes you more responsive to load spikes, but with the cost of making the 16 | // sampling decision on less data. 17 | // - Increasing it makes you less responsive to load spikes, but your sample rates will be more 18 | // stable because they are made with more data. 19 | // 20 | // The windowed throughput sampler resolves this by introducing two different, tunable parameters: 21 | // - UpdateFrequency: how often the sampling rate is recomputed 22 | // - LookbackFrequency: how far back we look back in time to recompute our sampling rate. 23 | // 24 | // A standard configuration would be to set UpdateFrequency to 1s and LookbackFrequency to 30s. In 25 | // this configuration, every second, we lookback at the last 30s of data in order to compute the new 26 | // sampling rate. The actual sampling rate computation is nearly identical to the original 27 | // throughput sampler, but this variant has better support for floating point numbers. 28 | // 29 | // Because our lookback window is _rolling_ instead of static, we need a special datastructure to 30 | // quickly and efficiently store our data. The code and additional information for this 31 | // datastructure can be found in blocklist.go. 32 | type WindowedThroughput struct { 33 | // UpdateFrequency is how often the sampling rate is recomputed, default is 1s. 34 | UpdateFrequencyDuration time.Duration 35 | 36 | // LookbackFrequency is how far back in time we lookback to dynamically adjust our sampling 37 | // rate. Default is 30 * UpdateFrequencyDuration. This will be 30s assuming the default 38 | // configuration of UpdateFrequencyDuration. We enforce this to be an _integer multiple_ of 39 | // UpdateFrequencyDuration. 40 | LookbackFrequencyDuration time.Duration 41 | 42 | // Target throughput per second. 43 | GoalThroughputPerSec float64 44 | 45 | // MaxKeys, if greater than 0, limits the number of distinct keys used to build 46 | // the sample rate map within the interval defined by `LookbackFrequencyDuration`. Once 47 | // MaxKeys is reached, new keys will not be included in the sample rate map, but 48 | // existing keys will continue to be be counted. 49 | // If MaxKeys is set to 0 (default), there is no upper bound on the number of distinct keys. 50 | MaxKeys int 51 | 52 | savedSampleRates map[string]int 53 | done chan struct{} 54 | countList BlockList 55 | 56 | indexGenerator IndexGenerator 57 | 58 | lock sync.Mutex 59 | 60 | // metrics 61 | requestCount int64 62 | eventCount int64 63 | numKeys int 64 | } 65 | 66 | // Ensure we implement the sampler interface 67 | var _ Sampler = (*WindowedThroughput)(nil) 68 | 69 | // An index generator turns timestamps into indexes. This is essentially a bucketing mechanism. 70 | type IndexGenerator interface { 71 | // Get the index corresponding to the current time. 72 | GetCurrentIndex() int64 73 | 74 | // Return the index differential for a particular duration -- i.e. 5 seconds = how many ticks of 75 | // the index. 76 | DurationToIndexes(duration time.Duration) int64 77 | } 78 | 79 | // The standard implementation of the index generator. 80 | type UnixSecondsIndexGenerator struct { 81 | DurationPerIndex time.Duration 82 | } 83 | 84 | func (g *UnixSecondsIndexGenerator) GetCurrentIndex() int64 { 85 | nsec := time.Now().UnixNano() 86 | return nsec / g.DurationPerIndex.Nanoseconds() 87 | } 88 | 89 | func (g *UnixSecondsIndexGenerator) DurationToIndexes(duration time.Duration) int64 { 90 | return duration.Nanoseconds() / g.DurationPerIndex.Nanoseconds() 91 | } 92 | 93 | func (t *WindowedThroughput) Start() error { 94 | // apply defaults 95 | if t.UpdateFrequencyDuration == 0 { 96 | t.UpdateFrequencyDuration = time.Second 97 | } 98 | if t.LookbackFrequencyDuration == 0 { 99 | t.LookbackFrequencyDuration = 30 * t.UpdateFrequencyDuration 100 | } 101 | // Floor LookbackFrequencyDuration to be an integer multiple of UpdateFrequencyDuration. 102 | t.LookbackFrequencyDuration = t.UpdateFrequencyDuration * 103 | (t.LookbackFrequencyDuration / t.UpdateFrequencyDuration) 104 | 105 | if t.GoalThroughputPerSec == 0 { 106 | t.GoalThroughputPerSec = 100 107 | } 108 | 109 | // Initialize countList. 110 | if t.MaxKeys > 0 { 111 | t.countList = NewBoundedBlockList(t.MaxKeys) 112 | } else { 113 | t.countList = NewUnboundedBlockList() 114 | } 115 | 116 | // Initialize internal variables. 117 | t.savedSampleRates = make(map[string]int) 118 | t.done = make(chan struct{}) 119 | // Initialize the index generator. Each UpdateFrequencyDuration represents a single tick of the 120 | // index. 121 | t.indexGenerator = &UnixSecondsIndexGenerator{ 122 | DurationPerIndex: t.UpdateFrequencyDuration, 123 | } 124 | 125 | // Spin up calculator. 126 | go func() { 127 | ticker := time.NewTicker(t.UpdateFrequencyDuration) 128 | defer ticker.Stop() 129 | for { 130 | select { 131 | case <-ticker.C: 132 | t.updateMaps() 133 | case <-t.done: 134 | return 135 | } 136 | } 137 | }() 138 | return nil 139 | } 140 | 141 | func (t *WindowedThroughput) Stop() error { 142 | close(t.done) 143 | return nil 144 | } 145 | 146 | // updateMaps recomputes the sample rate based on the countList. 147 | func (t *WindowedThroughput) updateMaps() { 148 | currentIndex := t.indexGenerator.GetCurrentIndex() 149 | lookbackIndexes := t.indexGenerator.DurationToIndexes(t.LookbackFrequencyDuration) 150 | aggregateCounts := t.countList.AggregateCounts(currentIndex, lookbackIndexes) 151 | 152 | // Apply the same aggregation algorithm as total throughput 153 | // Short circuit if no traffic 154 | numKeys := len(aggregateCounts) 155 | if numKeys == 0 { 156 | // no traffic during the last period. 157 | t.lock.Lock() 158 | defer t.lock.Unlock() 159 | t.numKeys = 0 160 | t.savedSampleRates = make(map[string]int) 161 | return 162 | } 163 | // figure out our target throughput per key over the lookback window. 164 | totalGoalThroughput := t.GoalThroughputPerSec * t.LookbackFrequencyDuration.Seconds() 165 | // split the total throughput equally across the number of keys. 166 | throughputPerKey := float64(totalGoalThroughput) / float64(numKeys) 167 | // for each key, calculate sample rate by dividing counted events by the 168 | // desired number of events 169 | newSavedSampleRates := make(map[string]int) 170 | for k, v := range aggregateCounts { 171 | rate := int(math.Max(1, (float64(v) / float64(throughputPerKey)))) 172 | newSavedSampleRates[k] = rate 173 | } 174 | // save newly calculated sample rates 175 | t.lock.Lock() 176 | defer t.lock.Unlock() 177 | t.savedSampleRates = newSavedSampleRates 178 | t.numKeys = numKeys 179 | } 180 | 181 | // GetSampleRate takes a key and returns the appropriate sample rate for that 182 | // key. 183 | func (t *WindowedThroughput) GetSampleRate(key string) int { 184 | return t.GetSampleRateMulti(key, 1) 185 | } 186 | 187 | // GetSampleRateMulti takes a key representing count spans and returns the 188 | // appropriate sample rate for that key. 189 | func (t *WindowedThroughput) GetSampleRateMulti(key string, count int) int { 190 | t.requestCount++ 191 | t.eventCount += int64(count) 192 | 193 | // Insert the new key into the map. 194 | current := t.indexGenerator.GetCurrentIndex() 195 | err := t.countList.IncrementKey(key, current, count) 196 | 197 | // We've reached MaxKeys, return 0. 198 | if err != nil { 199 | return 0 200 | } 201 | 202 | t.lock.Lock() 203 | defer t.lock.Unlock() 204 | if rate, found := t.savedSampleRates[key]; found { 205 | return rate 206 | } 207 | return 0 208 | } 209 | 210 | // SaveState is not implemented 211 | func (t *WindowedThroughput) SaveState() ([]byte, error) { 212 | return nil, nil 213 | } 214 | 215 | // LoadState is not implemented 216 | func (t *WindowedThroughput) LoadState(state []byte) error { 217 | return nil 218 | } 219 | 220 | func (t *WindowedThroughput) GetMetrics(prefix string) map[string]int64 { 221 | t.lock.Lock() 222 | defer t.lock.Unlock() 223 | mets := map[string]int64{ 224 | prefix + "request_count": t.requestCount, 225 | prefix + "event_count": t.eventCount, 226 | prefix + "keyspace_size": int64(t.numKeys), 227 | } 228 | return mets 229 | } 230 | -------------------------------------------------------------------------------- /windowedthroughput_test.go: -------------------------------------------------------------------------------- 1 | package dynsampler 2 | 3 | import ( 4 | "testing" 5 | 6 | "time" 7 | 8 | "github.com/stretchr/testify/assert" 9 | ) 10 | 11 | type TestIndexGenerator struct { 12 | CurrentIndex int64 13 | } 14 | 15 | func (g *TestIndexGenerator) GetCurrentIndex() int64 { 16 | return g.CurrentIndex 17 | } 18 | 19 | func (g *TestIndexGenerator) DurationToIndexes(duration time.Duration) int64 { 20 | return int64(duration.Seconds()) 21 | } 22 | 23 | func TestHappyPath(t *testing.T) { 24 | indexGenerator := &TestIndexGenerator{} 25 | sampler := WindowedThroughput{ 26 | UpdateFrequencyDuration: 1 * time.Second, 27 | LookbackFrequencyDuration: 5 * time.Second, 28 | GoalThroughputPerSec: 2, 29 | indexGenerator: indexGenerator, 30 | countList: NewUnboundedBlockList(), 31 | } 32 | key := "test_key" 33 | 34 | // Time 0: 20 traces seen. 35 | for i := 0; i < 20; i++ { 36 | assert.Equal(t, 0, sampler.GetSampleRate(key)) 37 | } 38 | indexGenerator.CurrentIndex += 1 39 | sampler.updateMaps() 40 | 41 | // Time 1: 10 traces seen 42 | for i := 0; i < 10; i++ { 43 | assert.Equal(t, 2, sampler.GetSampleRate(key)) 44 | } 45 | indexGenerator.CurrentIndex += 1 46 | sampler.updateMaps() 47 | 48 | // Time 2: 50 traces seen 49 | for i := 0; i < 50; i++ { 50 | assert.Equal(t, 3, sampler.GetSampleRate(key)) 51 | } 52 | indexGenerator.CurrentIndex += 1 53 | sampler.updateMaps() 54 | 55 | // Time 3 & 4 & 5: 0 traces seen 56 | for i := 0; i < 3; i++ { 57 | indexGenerator.CurrentIndex += 1 58 | sampler.updateMaps() 59 | } 60 | 61 | // Time 6: 40 traces seen. 62 | for i := 0; i < 40; i++ { 63 | // This should look back from time (0 - 5] 64 | assert.Equal(t, 6, sampler.GetSampleRate(key)) 65 | } 66 | indexGenerator.CurrentIndex += 1 67 | sampler.updateMaps() 68 | 69 | // Time 7: 5 traces seen. 70 | for i := 0; i < 5; i++ { 71 | // This should look back from time (1 - 6] 72 | assert.Equal(t, 9, sampler.GetSampleRate(key)) 73 | } 74 | } 75 | 76 | func TestDropsOldBlocks(t *testing.T) { 77 | indexGenerator := &TestIndexGenerator{} 78 | sampler := WindowedThroughput{ 79 | UpdateFrequencyDuration: 1 * time.Second, 80 | LookbackFrequencyDuration: 5 * time.Second, 81 | GoalThroughputPerSec: 2, 82 | indexGenerator: indexGenerator, 83 | countList: NewUnboundedBlockList(), 84 | } 85 | key := "test_key" 86 | 87 | // Time 0: 20 traces seen. 88 | for i := 0; i < 20; i++ { 89 | assert.Equal(t, 0, sampler.GetSampleRate(key)) 90 | } 91 | 92 | for i := 0; i < 7; i++ { 93 | indexGenerator.CurrentIndex += 1 94 | sampler.updateMaps() 95 | } 96 | 97 | // Time 6: 20 traces seen. 98 | for i := 0; i < 20; i++ { 99 | assert.Equal(t, 0, sampler.GetSampleRate(key)) 100 | } 101 | } 102 | 103 | func TestSetsDefaultsCorrectly(t *testing.T) { 104 | sampler1 := WindowedThroughput{} 105 | sampler1.Start() 106 | 107 | assert.Equal(t, time.Second, sampler1.UpdateFrequencyDuration) 108 | assert.Equal(t, 30*time.Second, sampler1.LookbackFrequencyDuration) 109 | 110 | sampler2 := WindowedThroughput{ 111 | UpdateFrequencyDuration: 5 * time.Second, 112 | LookbackFrequencyDuration: 18 * time.Second, 113 | } 114 | sampler2.Start() 115 | assert.Equal(t, 5*time.Second, sampler2.UpdateFrequencyDuration) 116 | assert.Equal(t, 15*time.Second, sampler2.LookbackFrequencyDuration) 117 | } 118 | --------------------------------------------------------------------------------