├── .github
    ├── CODEOWNERS
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   ├── config.yml
    │   └── feature_request.md
    ├── PULL_REQUEST_TEMPLATE.md
    ├── actionlint.yml
    ├── renovate.json
    └── workflows
    │   ├── ci-ristretto-tests.yml
    │   └── trunk.yml
├── .gitignore
├── .trunk
    ├── .gitignore
    ├── configs
    │   ├── .checkov.yaml
    │   ├── .golangci.json
    │   ├── .markdownlint.json
    │   ├── .prettierrc
    │   ├── .yamllint.yaml
    │   └── svgo.config.mjs
    └── trunk.yaml
├── .vscode
    ├── extensions.json
    └── settings.json
├── CHANGELOG.md
├── LICENSE
├── README.md
├── SECURITY.md
├── benchmarks
    ├── Hit Ratios - CODASYL (ARC-OLTP).svg
    ├── Hit Ratios - Database (ARC-DS1).svg
    ├── Hit Ratios - Glimpse (LIRS-GLI).svg
    ├── Hit Ratios - Search (ARC-S3).svg
    ├── Throughput - Mixed.svg
    ├── Throughput - Read (Zipfian).svg
    └── Throughput - Write (Zipfian).svg
├── cache.go
├── cache_test.go
├── contrib
    ├── demo
    │   ├── node.go
    │   ├── node_allocator.go
    │   ├── node_golang.go
    │   └── node_jemalloc.go
    ├── memtest
    │   ├── .gitignore
    │   ├── README.md
    │   ├── main.go
    │   ├── nojemalloc.go
    │   └── withjemalloc.go
    └── memtestc
    │   ├── .gitignore
    │   └── list.c
├── go.mod
├── go.sum
├── policy.go
├── policy_test.go
├── ring.go
├── ring_test.go
├── sim
    ├── gli.lirs.gz
    ├── sim.go
    └── sim_test.go
├── sketch.go
├── sketch_test.go
├── store.go
├── store_test.go
├── stress_test.go
├── ttl.go
├── ttl_test.go
└── z
    ├── LICENSE
    ├── README.md
    ├── allocator.go
    ├── allocator_test.go
    ├── bbloom.go
    ├── bbloom_test.go
    ├── btree.go
    ├── btree_test.go
    ├── buffer.go
    ├── buffer_test.go
    ├── calloc.go
    ├── calloc_32bit.go
    ├── calloc_64bit.go
    ├── calloc_jemalloc.go
    ├── calloc_nojemalloc.go
    ├── calloc_test.go
    ├── file.go
    ├── file_default.go
    ├── file_linux.go
    ├── flags.go
    ├── flags_test.go
    ├── histogram.go
    ├── histogram_test.go
    ├── mmap.go
    ├── mmap_darwin.go
    ├── mmap_js.go
    ├── mmap_linux.go
    ├── mmap_plan9.go
    ├── mmap_unix.go
    ├── mmap_wasip1.go
    ├── mmap_windows.go
    ├── mremap_nosize.go
    ├── mremap_size.go
    ├── rtutil.go
    ├── rtutil.s
    ├── rtutil_test.go
    ├── simd
        ├── add_test.go
        ├── asm2.go
        ├── baseline.go
        ├── search.go
        ├── search_amd64.s
        └── stub_search_amd64.go
    ├── z.go
    └── z_test.go


/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | # CODEOWNERS info: https://help.github.com/en/articles/about-code-owners
2 | # Owners are automatically requested for review for PRs that changes code
3 | # that they own.
4 | * @hypermodeinc/database
5 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ""
 5 | labels: bug
 6 | assignees: ""
 7 | ---
 8 | 
 9 | ## Describe the bug
10 | 
11 | A clear and concise description of what the bug is.
12 | 
13 | ## To Reproduce
14 | 
15 | Steps to reproduce the behavior:
16 | 
17 | 1. Go to '...'
18 | 2. Click on '....'
19 | 3. Scroll down to '....'
20 | 4. See error
21 | 
22 | ## Expected behavior
23 | 
24 | A clear and concise description of what you expected to happen.
25 | 
26 | ## Screenshots
27 | 
28 | If applicable, add screenshots to help explain your problem.
29 | 
30 | ## Environment
31 | 
32 | - OS: [e.g. macOS, Windows, Ubuntu]
33 | - Language [e.g. AssemblyScript, Go]
34 | - Version [e.g. v0.xx]
35 | 
36 | ## Additional context
37 | 
38 | Add any other context about the problem here.
39 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 | contact_links:
3 |   - name: Ristretto Community Support
4 |     url: https://discord.hypermode.com
5 |     about: Please ask and answer questions here
6 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ""
 5 | labels: ""
 6 | assignees: ""
 7 | ---
 8 | 
 9 | ## Is your feature request related to a problem? Please describe
10 | 
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | ## Describe the solution you'd like
14 | 
15 | A clear and concise description of what you want to happen.
16 | 
17 | ## Describe alternatives you've considered
18 | 
19 | A clear and concise description of any alternative solutions or features you've considered.
20 | 
21 | ## Additional context
22 | 
23 | Add any other context or screenshots about the feature request here.
24 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | **Description**
 2 | 
 3 | Please explain the changes you made here.
 4 | 
 5 | **Checklist**
 6 | 
 7 | - [ ] Code compiles correctly and linting passes locally
 8 | - [ ] For all _code_ changes, an entry added to the `CHANGELOG.md` file describing and linking to
 9 |       this PR
10 | - [ ] Tests added for new functionality, or regression tests for bug fixes added as applicable
11 | 
12 | **Instructions**
13 | 
14 | - The PR title should follow the [Conventional Commits](https://www.conventionalcommits.org/)
15 |   syntax, leading with `fix:`, `feat:`, `chore:`, `ci:`, etc.
16 | - The description should briefly explain what the PR is about. In the case of a bugfix, describe or
17 |   link to the bug.
18 | - In the checklist section, check the boxes in that are applicable, using `[x]` syntax.
19 |   - If not applicable, remove the entire line. Only leave the box unchecked if you intend to come
20 |     back and check the box later.
21 | - Delete the `Instructions` line and everything below it, to indicate you have read and are
22 |   following these instructions. 🙂
23 | 
24 | Thank you for your contribution to Ristretto!
25 | 


--------------------------------------------------------------------------------
/.github/actionlint.yml:
--------------------------------------------------------------------------------
1 | self-hosted-runner:
2 |   # Labels of self-hosted runner in array of string
3 |   labels:
4 |     - warp-ubuntu-latest-x64-4x
5 | 


--------------------------------------------------------------------------------
/.github/renovate.json:
--------------------------------------------------------------------------------
1 | {
2 |   "$schema": "https://docs.renovatebot.com/renovate-schema.json",
3 |   "extends": ["local>hypermodeinc/renovate-config"],
4 |   "rangeStrategy": "widen"
5 | }
6 | 


--------------------------------------------------------------------------------
/.github/workflows/ci-ristretto-tests.yml:
--------------------------------------------------------------------------------
 1 | name: ci-ristretto-tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 |     types:
 9 |       - opened
10 |       - reopened
11 |       - synchronize
12 |       - ready_for_review
13 |     branches:
14 |       - main
15 | 
16 | permissions:
17 |   contents: read
18 |   pull-requests: write
19 | 
20 | jobs:
21 |   ristretto-tests:
22 |     runs-on: warp-ubuntu-latest-x64-4x
23 |     steps:
24 |       - uses: actions/checkout@v4
25 |       - name: Setup Go
26 |         uses: actions/setup-go@v5
27 |         with:
28 |           go-version-file: go.mod
29 |       - name: Run Unit Tests
30 |         run: go test -timeout=20m -race -covermode atomic -coverprofile=covprofile ./...
31 |       - name: Save coverage profile
32 |         uses: actions/upload-artifact@v4
33 |         with:
34 |           name: covprofile
35 |           path: ./covprofile
36 | 


--------------------------------------------------------------------------------
/.github/workflows/trunk.yml:
--------------------------------------------------------------------------------
 1 | name: Trunk Code Quality
 2 | on:
 3 |   pull_request:
 4 |     branches: main
 5 | 
 6 | permissions:
 7 |   contents: read
 8 |   actions: write
 9 |   checks: write
10 | 
11 | jobs:
12 |   trunk-code-quality:
13 |     name: Trunk Code Quality
14 |     uses: hypermodeinc/.github/.github/workflows/trunk.yml@main
15 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # IDE
2 | .idea


--------------------------------------------------------------------------------
/.trunk/.gitignore:
--------------------------------------------------------------------------------
 1 | *out
 2 | *logs
 3 | *actions
 4 | *notifications
 5 | *tools
 6 | plugins
 7 | user_trunk.yaml
 8 | user.yaml
 9 | tmp
10 | 


--------------------------------------------------------------------------------
/.trunk/configs/.checkov.yaml:
--------------------------------------------------------------------------------
1 | skip-check:
2 |   - CKV_GHA_7
3 | 


--------------------------------------------------------------------------------
/.trunk/configs/.golangci.json:
--------------------------------------------------------------------------------
1 | {
2 |   "run": {
3 |     "build-tags": ["jemalloc"]
4 |   }
5 | }
6 | 


--------------------------------------------------------------------------------
/.trunk/configs/.markdownlint.json:
--------------------------------------------------------------------------------
1 | {
2 |   "line-length": { "line_length": 150, "tables": false },
3 |   "no-inline-html": false,
4 |   "no-bare-urls": false,
5 |   "no-space-in-emphasis": false,
6 |   "no-emphasis-as-heading": false,
7 |   "first-line-heading": false
8 | }
9 | 


--------------------------------------------------------------------------------
/.trunk/configs/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 |   "semi": false,
3 |   "proseWrap": "always",
4 |   "printWidth": 100
5 | }
6 | 


--------------------------------------------------------------------------------
/.trunk/configs/.yamllint.yaml:
--------------------------------------------------------------------------------
1 | rules:
2 |   quoted-strings:
3 |     required: only-when-needed
4 |     extra-allowed: ["{|}"]
5 |   key-duplicates: {}
6 |   octal-values:
7 |     forbid-implicit-octal: true
8 | 


--------------------------------------------------------------------------------
/.trunk/configs/svgo.config.mjs:
--------------------------------------------------------------------------------
 1 | export default {
 2 |   plugins: [
 3 |     {
 4 |       name: "preset-default",
 5 |       params: {
 6 |         overrides: {
 7 |           removeViewBox: false, // https://github.com/svg/svgo/issues/1128
 8 |           sortAttrs: true,
 9 |           removeOffCanvasPaths: true,
10 |         },
11 |       },
12 |     },
13 |   ],
14 | }
15 | 


--------------------------------------------------------------------------------
/.trunk/trunk.yaml:
--------------------------------------------------------------------------------
 1 | # This file controls the behavior of Trunk: https://docs.trunk.io/cli
 2 | # To learn more about the format of this file, see https://docs.trunk.io/reference/trunk-yaml
 3 | version: 0.1
 4 | 
 5 | cli:
 6 |   version: 1.22.10
 7 | 
 8 | # Trunk provides extensibility via plugins. (https://docs.trunk.io/plugins)
 9 | plugins:
10 |   sources:
11 |     - id: trunk
12 |       ref: v1.6.7
13 |       uri: https://github.com/trunk-io/plugins
14 | 
15 | # Many linters and tools depend on runtimes - configure them here. (https://docs.trunk.io/runtimes)
16 | runtimes:
17 |   enabled:
18 |     - go@1.23.5
19 |     - node@18.20.5
20 |     - python@3.10.8
21 | 
22 | # This is the section where you manage your linters. (https://docs.trunk.io/check/configuration)
23 | lint:
24 |   enabled:
25 |     - trivy@0.59.1
26 |     - renovate@39.161.0
27 |     - actionlint@1.7.7
28 |     - checkov@3.2.365
29 |     - git-diff-check
30 |     - gofmt@1.20.4
31 |     - golangci-lint@1.63.4
32 |     - markdownlint@0.44.0
33 |     - osv-scanner@1.9.2
34 |     - prettier@3.4.2
35 |     - svgo@3.3.2
36 |     - trufflehog@3.88.4
37 |     - yamllint@1.35.1
38 | actions:
39 |   enabled:
40 |     - trunk-announce
41 |     - trunk-check-pre-push
42 |     - trunk-fmt-pre-commit
43 |     - trunk-upgrade-available
44 | 


--------------------------------------------------------------------------------
/.vscode/extensions.json:
--------------------------------------------------------------------------------
1 | {
2 |   "recommendations": ["trunk.io"]
3 | }
4 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |   "editor.formatOnSave": true,
3 |   "editor.defaultFormatter": "trunk.io",
4 |   "editor.trimAutoWhitespace": true,
5 |   "trunk.autoInit": false
6 | }
7 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Ristretto
  2 | 
  3 | [![GitHub License](https://img.shields.io/github/license/hypermodeinc/ristretto)](https://github.com/hypermodeinc/ristretto?tab=Apache-2.0-1-ov-file#readme)
  4 | [![chat](https://img.shields.io/discord/1267579648657850441)](https://discord.hypermode.com)
  5 | [![GitHub Repo stars](https://img.shields.io/github/stars/hypermodeinc/ristretto)](https://github.com/hypermodeinc/ristretto/stargazers)
  6 | [![GitHub commit activity](https://img.shields.io/github/commit-activity/m/hypermodeinc/ristretto)](https://github.com/hypermodeinc/ristretto/commits/main/)
  7 | [![Go Report Card](https://img.shields.io/badge/go%20report-A%2B-brightgreen)](https://goreportcard.com/report/github.com/dgraph-io/ristretto)
  8 | 
  9 | Ristretto is a fast, concurrent cache library built with a focus on performance and correctness.
 10 | 
 11 | The motivation to build Ristretto comes from the need for a contention-free cache in [Dgraph][].
 12 | 
 13 | [Dgraph]: https://github.com/hypermodeinc/dgraph
 14 | 
 15 | ## Features
 16 | 
 17 | - **High Hit Ratios** - with our unique admission/eviction policy pairing, Ristretto's performance
 18 |   is best in class.
 19 |   - **Eviction: SampledLFU** - on par with exact LRU and better performance on Search and Database
 20 |     traces.
 21 |   - **Admission: TinyLFU** - extra performance with little memory overhead (12 bits per counter).
 22 | - **Fast Throughput** - we use a variety of techniques for managing contention and the result is
 23 |   excellent throughput.
 24 | - **Cost-Based Eviction** - any large new item deemed valuable can evict multiple smaller items
 25 |   (cost could be anything).
 26 | - **Fully Concurrent** - you can use as many goroutines as you want with little throughput
 27 |   degradation.
 28 | - **Metrics** - optional performance metrics for throughput, hit ratios, and other stats.
 29 | - **Simple API** - just figure out your ideal `Config` values and you're off and running.
 30 | 
 31 | ## Status
 32 | 
 33 | Ristretto is production-ready. See [Projects using Ristretto](#projects-using-ristretto).
 34 | 
 35 | ## Getting Started
 36 | 
 37 | ### Installing
 38 | 
 39 | To start using Ristretto, install Go 1.21 or above. Ristretto needs go modules. From your project,
 40 | run the following command
 41 | 
 42 | ```sh
 43 | go get github.com/dgraph-io/ristretto/v2
 44 | ```
 45 | 
 46 | This will retrieve the library.
 47 | 
 48 | #### Choosing a version
 49 | 
 50 | Following these rules:
 51 | 
 52 | - v1.x.x is the first version used in most programs with Ristretto dependencies.
 53 | - v2.x.x is the new version with support for generics, for which it has a slightly different
 54 |   interface. This version is designed to solve compatibility problems of programs using the old
 55 |   version of Ristretto. If you start writing a new program, it is recommended to use this version.
 56 | 
 57 | ## Usage
 58 | 
 59 | ```go
 60 | package main
 61 | 
 62 | import (
 63 |   "fmt"
 64 | 
 65 |   "github.com/dgraph-io/ristretto/v2"
 66 | )
 67 | 
 68 | func main() {
 69 |   cache, err := ristretto.NewCache(&ristretto.Config[string, string]{
 70 |     NumCounters: 1e7,     // number of keys to track frequency of (10M).
 71 |     MaxCost:     1 << 30, // maximum cost of cache (1GB).
 72 |     BufferItems: 64,      // number of keys per Get buffer.
 73 |   })
 74 |   if err != nil {
 75 |     panic(err)
 76 |   }
 77 |   defer cache.Close()
 78 | 
 79 |   // set a value with a cost of 1
 80 |   cache.Set("key", "value", 1)
 81 | 
 82 |   // wait for value to pass through buffers
 83 |   cache.Wait()
 84 | 
 85 |   // get value from cache
 86 |   value, found := cache.Get("key")
 87 |   if !found {
 88 |     panic("missing value")
 89 |   }
 90 |   fmt.Println(value)
 91 | 
 92 |   // del value from cache
 93 |   cache.Del("key")
 94 | }
 95 | ```
 96 | 
 97 | ## Benchmarks
 98 | 
 99 | The benchmarks can be found in
100 | https://github.com/hypermodeinc/dgraph-benchmarks/tree/main/cachebench/ristretto.
101 | 
102 | ### Hit Ratios for Search
103 | 
104 | This trace is described as "disk read accesses initiated by a large commercial search engine in
105 | response to various web search requests."
106 | 
107 | <p align="center">
108 |   <img src="https://raw.githubusercontent.com/hypermodeinc/ristretto/main/benchmarks/Hit%20Ratios%20-%20Search%20(ARC-S3).svg"
109 |   alt="Graph showing hit ratios comparison for search workload">
110 | </p>
111 | 
112 | ### Hit Ratio for Database
113 | 
114 | This trace is described as "a database server running at a commercial site running an ERP
115 | application on top of a commercial database."
116 | 
117 | <p align="center">
118 |   <img src="https://raw.githubusercontent.com/hypermodeinc/ristretto/main/benchmarks/Hit%20Ratios%20-%20Database%20(ARC-DS1).svg"
119 |   alt="Graph showing hit ratios comparison for database workload">
120 | </p>
121 | 
122 | ### Hit Ratio for Looping
123 | 
124 | This trace demonstrates a looping access pattern.
125 | 
126 | <p align="center">
127 |   <img src="https://raw.githubusercontent.com/hypermodeinc/ristretto/main/benchmarks/Hit%20Ratios%20-%20Glimpse%20(LIRS-GLI).svg"
128 |   alt="Graph showing hit ratios comparison for looping access pattern">
129 | </p>
130 | 
131 | ### Hit Ratio for CODASYL
132 | 
133 | This trace is described as "references to a CODASYL database for a one hour period."
134 | 
135 | <p align="center">
136 |   <img src="https://raw.githubusercontent.com/hypermodeinc/ristretto/main/benchmarks/Hit%20Ratios%20-%20CODASYL%20(ARC-OLTP).svg"
137 |   alt="Graph showing hit ratios comparison for CODASYL workload">
138 | </p>
139 | 
140 | ### Throughput for Mixed Workload
141 | 
142 | <p align="center">
143 |   <img src="https://raw.githubusercontent.com/hypermodeinc/ristretto/main/benchmarks/Throughput%20-%20Mixed.svg"
144 |   alt="Graph showing throughput comparison for mixed workload">
145 | </p>
146 | 
147 | ### Throughput ffor Read Workload
148 | 
149 | <p align="center">
150 |   <img src="https://raw.githubusercontent.com/hypermodeinc/ristretto/main/benchmarks/Throughput%20-%20Read%20(Zipfian).svg"
151 |   alt="Graph showing throughput comparison for read workload">
152 | </p>
153 | 
154 | ### Through for Write Workload
155 | 
156 | <p align="center">
157 |   <img src="https://raw.githubusercontent.com/hypermodeinc/ristretto/main/benchmarks/Throughput%20-%20Write%20(Zipfian).svg"
158 |   alt="Graph showing throughput comparison for write workload">
159 | </p>
160 | 
161 | ## Projects Using Ristretto
162 | 
163 | Below is a list of known projects that use Ristretto:
164 | 
165 | - [Badger](https://github.com/hypermodeinc/badger) - Embeddable key-value DB in Go
166 | - [Dgraph](https://github.com/hypermodeinc/dgraph) - Horizontally scalable and distributed GraphQL
167 |   database with a graph backend
168 | 
169 | ## FAQ
170 | 
171 | ### How are you achieving this performance? What shortcuts are you taking?
172 | 
173 | We go into detail in the
174 | [Ristretto blog post](https://hypermode.com/blog/introducing-ristretto-high-perf-go-cache/), but in
175 | short: our throughput performance can be attributed to a mix of batching and eventual consistency.
176 | Our hit ratio performance is mostly due to an excellent
177 | [admission policy](https://arxiv.org/abs/1512.00727) and SampledLFU eviction policy.
178 | 
179 | As for "shortcuts," the only thing Ristretto does that could be construed as one is dropping some
180 | Set calls. That means a Set call for a new item (updates are guaranteed) isn't guaranteed to make it
181 | into the cache. The new item could be dropped at two points: when passing through the Set buffer or
182 | when passing through the admission policy. However, this doesn't affect hit ratios much at all as we
183 | expect the most popular items to be Set multiple times and eventually make it in the cache.
184 | 
185 | ### Is Ristretto distributed?
186 | 
187 | No, it's just like any other Go library that you can import into your project and use in a single
188 | process.
189 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | # Reporting Security Concerns
 2 | 
 3 | We take the security of Ristretto very seriously. If you believe you have found a security vulnerability
 4 | in Ristretto, we encourage you to let us know right away.
 5 | 
 6 | We will investigate all legitimate reports and do our best to quickly fix the problem. Please report
 7 | any issues or vulnerabilities via GitHub Security Advisories instead of posting a public issue in
 8 | GitHub. You can also send security communications to security@hypermode.com.
 9 | 
10 | Please include the version identifier and details on how the vulnerability can be exploited.
11 | 


--------------------------------------------------------------------------------
/contrib/demo/node.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"runtime"
 6 | 
 7 | 	"github.com/dustin/go-humanize"
 8 | 
 9 | 	"github.com/dgraph-io/ristretto/v2/z"
10 | )
11 | 
12 | type node struct {
13 | 	val  int
14 | 	next *node
15 | }
16 | 
17 | var alloc *z.Allocator
18 | 
19 | func printNode(n *node) {
20 | 	if n == nil {
21 | 		return
22 | 	}
23 | 	if n.val%100000 == 0 {
24 | 		fmt.Printf("node: %d\n", n.val)
25 | 	}
26 | 	printNode(n.next)
27 | }
28 | 
29 | func main() {
30 | 	N := 2000001
31 | 	root := newNode(-1)
32 | 	n := root
33 | 	for i := 0; i < N; i++ {
34 | 		nn := newNode(i)
35 | 		n.next = nn
36 | 		n = nn
37 | 	}
38 | 	fmt.Printf("Allocated memory: %s Objects: %d\n",
39 | 		humanize.IBytes(uint64(z.NumAllocBytes())), N)
40 | 
41 | 	runtime.GC()
42 | 	printNode(root)
43 | 	fmt.Println("printing done")
44 | 
45 | 	if alloc != nil {
46 | 		alloc.Release()
47 | 	} else {
48 | 		n = root
49 | 		for n != nil {
50 | 			left := n
51 | 			n = n.next
52 | 			freeNode(left)
53 | 		}
54 | 	}
55 | 	fmt.Printf("After freeing. Allocated memory: %d\n", z.NumAllocBytes())
56 | 
57 | 	var ms runtime.MemStats
58 | 	runtime.ReadMemStats(&ms)
59 | 	fmt.Printf("HeapAlloc: %s\n", humanize.IBytes(ms.HeapAlloc))
60 | }
61 | 


--------------------------------------------------------------------------------
/contrib/demo/node_allocator.go:
--------------------------------------------------------------------------------
 1 | //go:build jemalloc && allocator
 2 | // +build jemalloc,allocator
 3 | 
 4 | package main
 5 | 
 6 | import (
 7 | 	"unsafe"
 8 | 
 9 | 	"github.com/dgraph-io/ristretto/v2/z"
10 | )
11 | 
12 | // Defined in node.go.
13 | func init() {
14 | 	alloc = z.NewAllocator(10<<20, "demo")
15 | }
16 | 
17 | func newNode(val int) *node {
18 | 	// b := alloc.Allocate(nodeSz)
19 | 	b := alloc.AllocateAligned(nodeSz)
20 | 	n := (*node)(unsafe.Pointer(&b[0]))
21 | 	n.val = val
22 | 	alloc.Allocate(1) // Extra allocate just to demonstrate AllocateAligned is working as expected.
23 | 	return n
24 | }
25 | 
26 | func freeNode(n *node) {
27 | 	// buf := (*[z.MaxArrayLen]byte)(unsafe.Pointer(n))[:nodeSz:nodeSz]
28 | 	// z.Free(buf)
29 | }
30 | 


--------------------------------------------------------------------------------
/contrib/demo/node_golang.go:
--------------------------------------------------------------------------------
 1 | //go:build !jemalloc
 2 | // +build !jemalloc
 3 | 
 4 | package main
 5 | 
 6 | func newNode(val int) *node {
 7 | 	return &node{val: val}
 8 | }
 9 | 
10 | func freeNode(n *node) {}
11 | 


--------------------------------------------------------------------------------
/contrib/demo/node_jemalloc.go:
--------------------------------------------------------------------------------
 1 | //go:build jemalloc && !allocator
 2 | // +build jemalloc,!allocator
 3 | 
 4 | package main
 5 | 
 6 | import (
 7 | 	"unsafe"
 8 | 
 9 | 	"github.com/dgraph-io/ristretto/v2/z"
10 | )
11 | 
12 | func newNode(val int) *node {
13 | 	b := z.Calloc(nodeSz, "demo")
14 | 	n := (*node)(unsafe.Pointer(&b[0]))
15 | 	n.val = val
16 | 	return n
17 | }
18 | 
19 | func freeNode(n *node) {
20 | 	buf := (*[z.MaxArrayLen]byte)(unsafe.Pointer(n))[:nodeSz:nodeSz]
21 | 	z.Free(buf)
22 | }
23 | 


--------------------------------------------------------------------------------
/contrib/memtest/.gitignore:
--------------------------------------------------------------------------------
1 | /list
2 | /memtest
3 | 


--------------------------------------------------------------------------------
/contrib/memtest/README.md:
--------------------------------------------------------------------------------
 1 | memtest tests the effect of the C memory allocator. The default version uses Calloc from the stdlib.
 2 | 
 3 | If the program is built using the `jemalloc` build tag, then the allocator used will be jemalloc.
 4 | 
 5 | # Monitoring
 6 | 
 7 | To monitor the memory use of this program, the following bash snippet is useful:
 8 | 
 9 | ```sh
10 | while true; do
11 | ps -C memtest -o vsz=,rss= >> memphys.csv
12 | sleep 1
13 | done
14 | ```
15 | 
16 | This is of course contingent upon the fact that the binary of this program is called `memtest`.
17 | 


--------------------------------------------------------------------------------
/contrib/memtest/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | // #include <stdlib.h>
  4 | import "C"
  5 | import (
  6 | 	"fmt"
  7 | 	"log"
  8 | 	"math/rand"
  9 | 	"net/http"
 10 | 	_ "net/http/pprof"
 11 | 	"os"
 12 | 	"os/signal"
 13 | 	"runtime"
 14 | 	"sync/atomic"
 15 | 	"syscall"
 16 | 	"time"
 17 | 	"unsafe"
 18 | 
 19 | 	"github.com/dustin/go-humanize"
 20 | 
 21 | 	"github.com/dgraph-io/ristretto/v2/z"
 22 | )
 23 | 
 24 | type S struct {
 25 | 	key  uint64
 26 | 	val  []byte
 27 | 	next *S
 28 | 	inGo bool
 29 | }
 30 | 
 31 | var (
 32 | 	ssz      = int(unsafe.Sizeof(S{}))
 33 | 	lo, hi   = int64(1 << 30), int64(16 << 30)
 34 | 	increase = true
 35 | 	stop     int32
 36 | 	fill     []byte
 37 | 	maxMB    = 32
 38 | 
 39 | 	cycles int64 = 16
 40 | )
 41 | var numbytes int64
 42 | var counter int64
 43 | 
 44 | func newS(sz int) *S {
 45 | 	var s *S
 46 | 	if b := Calloc(ssz); len(b) > 0 {
 47 | 		s = (*S)(unsafe.Pointer(&b[0]))
 48 | 	} else {
 49 | 		s = &S{inGo: true}
 50 | 	}
 51 | 
 52 | 	s.val = Calloc(sz)
 53 | 	copy(s.val, fill)
 54 | 	if s.next != nil {
 55 | 		log.Fatalf("news.next must be nil: %p", s.next)
 56 | 	}
 57 | 	return s
 58 | }
 59 | 
 60 | func freeS(s *S) {
 61 | 	Free(s.val)
 62 | 	if !s.inGo {
 63 | 		buf := (*[z.MaxArrayLen]byte)(unsafe.Pointer(s))[:ssz:ssz]
 64 | 		Free(buf)
 65 | 	}
 66 | }
 67 | 
 68 | func (s *S) allocateNext(sz int) {
 69 | 	ns := newS(sz)
 70 | 	s.next, ns.next = ns, s.next
 71 | }
 72 | 
 73 | func (s *S) deallocNext() {
 74 | 	if s.next == nil {
 75 | 		log.Fatal("next should not be nil")
 76 | 	}
 77 | 	next := s.next
 78 | 	s.next = next.next
 79 | 	freeS(next)
 80 | }
 81 | 
 82 | func memory() {
 83 | 	// In normal mode, z.NumAllocBytes would always be zero. So, this program would misbehave.
 84 | 	curMem := NumAllocBytes()
 85 | 	if increase {
 86 | 		if curMem > hi {
 87 | 			increase = false
 88 | 		}
 89 | 	} else {
 90 | 		if curMem < lo {
 91 | 			increase = true
 92 | 			runtime.GC()
 93 | 			time.Sleep(3 * time.Second)
 94 | 
 95 | 			counter++
 96 | 		}
 97 | 	}
 98 | 	var js z.MemStats
 99 | 	z.ReadMemStats(&js)
100 | 
101 | 	fmt.Printf("[%d] Current Memory: %s. Increase? %v, MemStats [Active: %s, Allocated: %s,"+
102 | 		" Resident: %s, Retained: %s]\n",
103 | 		counter, humanize.IBytes(uint64(curMem)), increase,
104 | 		humanize.IBytes(js.Active), humanize.IBytes(js.Allocated),
105 | 		humanize.IBytes(js.Resident), humanize.IBytes(js.Retained))
106 | }
107 | 
108 | func viaLL() {
109 | 	ticker := time.NewTicker(10 * time.Millisecond)
110 | 	defer ticker.Stop()
111 | 
112 | 	root := newS(1)
113 | 	for range ticker.C {
114 | 		if counter >= cycles {
115 | 			fmt.Printf("Finished %d cycles. Deallocating...\n", counter)
116 | 			break
117 | 		}
118 | 		if atomic.LoadInt32(&stop) == 1 {
119 | 			break
120 | 		}
121 | 		if increase {
122 | 			root.allocateNext(rand.Intn(maxMB) << 20)
123 | 		} else {
124 | 			root.deallocNext()
125 | 		}
126 | 		memory()
127 | 	}
128 | 	for root.next != nil {
129 | 		root.deallocNext()
130 | 		memory()
131 | 	}
132 | 	freeS(root)
133 | }
134 | 
135 | func main() {
136 | 	check()
137 | 	fill = make([]byte, maxMB<<20)
138 | 	_, _ = rand.Read(fill)
139 | 
140 | 	c := make(chan os.Signal, 10)
141 | 	signal.Notify(c, os.Interrupt, syscall.SIGTERM)
142 | 	go func() {
143 | 		<-c
144 | 		fmt.Println("Stopping")
145 | 		atomic.StoreInt32(&stop, 1)
146 | 	}()
147 | 	go func() {
148 | 		if err := http.ListenAndServe("0.0.0.0:8080", nil); err != nil {
149 | 			log.Fatalf("Error: %v", err)
150 | 		}
151 | 	}()
152 | 
153 | 	viaLL()
154 | 	if left := NumAllocBytes(); left != 0 {
155 | 		log.Fatalf("Unable to deallocate all memory: %v\n", left)
156 | 	}
157 | 	runtime.GC()
158 | 	fmt.Println("Done. Reduced to zero memory usage.")
159 | 	time.Sleep(5 * time.Second)
160 | }
161 | 


--------------------------------------------------------------------------------
/contrib/memtest/nojemalloc.go:
--------------------------------------------------------------------------------
 1 | //go:build !jemalloc
 2 | // +build !jemalloc
 3 | 
 4 | package main
 5 | 
 6 | // #include <stdlib.h>
 7 | import "C"
 8 | import (
 9 | 	"log"
10 | 	"reflect"
11 | 	"sync/atomic"
12 | 	"unsafe"
13 | )
14 | 
15 | func Calloc(size int) []byte {
16 | 	if size == 0 {
17 | 		return make([]byte, 0)
18 | 	}
19 | 	ptr := C.calloc(C.size_t(size), 1)
20 | 	if ptr == nil {
21 | 		panic("OOM")
22 | 	}
23 | 	hdr := reflect.SliceHeader{Data: uintptr(ptr), Len: size, Cap: size}
24 | 	atomic.AddInt64(&numbytes, int64(size))
25 | 	//nolint:govet
26 | 	return *(*[]byte)(unsafe.Pointer(&hdr))
27 | }
28 | 
29 | func Free(bs []byte) {
30 | 	if len(bs) == 0 {
31 | 		return
32 | 	}
33 | 
34 | 	if sz := cap(bs); sz != 0 {
35 | 		bs = bs[:cap(bs)]
36 | 		C.free(unsafe.Pointer(&bs[0]))
37 | 		atomic.AddInt64(&numbytes, -int64(sz))
38 | 	}
39 | }
40 | 
41 | func NumAllocBytes() int64 { return atomic.LoadInt64(&numbytes) }
42 | 
43 | func check() {}
44 | 
45 | func init() {
46 | 	log.Println("USING CALLOC")
47 | }
48 | 


--------------------------------------------------------------------------------
/contrib/memtest/withjemalloc.go:
--------------------------------------------------------------------------------
 1 | //go:build jemalloc
 2 | // +build jemalloc
 3 | 
 4 | package main
 5 | 
 6 | import (
 7 | 	"log"
 8 | 
 9 | 	"github.com/dgraph-io/ristretto/v2/z"
10 | )
11 | 
12 | func Calloc(size int) []byte { return z.Calloc(size, "memtest") }
13 | func Free(bs []byte)         { z.Free(bs) }
14 | func NumAllocBytes() int64   { return z.NumAllocBytes() }
15 | 
16 | func check() {
17 | 	if buf := z.CallocNoRef(1, "memtest"); len(buf) == 0 {
18 | 		log.Fatalf("Not using manual memory management. Compile with jemalloc.")
19 | 	} else {
20 | 		z.Free(buf)
21 | 	}
22 | 
23 | 	z.StatsPrint()
24 | }
25 | 
26 | func init() {
27 | 	log.Println("USING JEMALLOC")
28 | }
29 | 


--------------------------------------------------------------------------------
/contrib/memtestc/.gitignore:
--------------------------------------------------------------------------------
1 | /list
2 | 


--------------------------------------------------------------------------------
/contrib/memtestc/list.c:
--------------------------------------------------------------------------------
 1 | // A simple C program for traversal of a linked list
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include <unistd.h>
 5 | 
 6 | struct Node {
 7 | 	int data;
 8 |   char* buf;
 9 | 	struct Node* next;
10 | };
11 | 
12 | // This function prints contents of linked list starting from
13 | // the given node
14 | void printList(struct Node* n)
15 | {
16 | 	while (n != NULL) {
17 | 		printf(" %d ", n->data);
18 | 		n = n->next;
19 | 	}
20 | }
21 | 
22 | long long int lo = 1L << 30;
23 | long long int hi = 16L << 30;
24 | 
25 | struct Node* newNode(int sz) {
26 |   struct Node* n = (struct Node*)calloc(1, sizeof(struct Node));
27 |   n->buf = calloc(sz, 1);
28 |   for (int i = 0; i < sz; i++) {
29 |     n->buf[i] = 0xff;
30 |   }
31 |   n->data = sz;
32 |   n->next = NULL;
33 |   return n;
34 | }
35 | 
36 | void allocate(struct Node* n, int sz) {
37 |   struct Node* nn = newNode(sz);
38 |   struct Node* tmp = n->next;
39 |   n->next = nn;
40 |   nn->next = tmp;
41 | }
42 | 
43 | int dealloc(struct Node* n) {
44 |   if (n->next == NULL) {
45 |     printf("n->next is NULL\n");
46 |     exit(1);
47 |   }
48 |   struct Node* tmp = n->next;
49 |   n->next = tmp->next;
50 |   int sz = tmp->data;
51 |   free(tmp->buf);
52 |   free(tmp);
53 |   return sz;
54 | }
55 | 
56 | int main()
57 | {
58 |   struct Node* root = newNode(100);
59 | 
60 |   long long int total = 0;
61 |   int increase = 1;
62 |   while(1) {
63 |     if (increase == 1) {
64 |       int sz = (1 + rand() % 256) << 20;
65 |       allocate(root, sz);
66 |       if (root->next == NULL) {
67 |         printf("root->next is NULL\n");
68 |         exit(1);
69 |       }
70 |       total += sz;
71 |       if (total > hi) {
72 |         increase = 0;
73 |       }
74 |     } else {
75 |       int sz = dealloc(root);
76 |       total -= sz;
77 |       if (total < lo) {
78 |         increase = 1;
79 |         sleep(5);
80 |       } else {
81 |         usleep(10);
82 |       }
83 |     }
84 | 
85 |     long double gb = total;
86 |     gb /= (1 << 30);
87 |     printf("Total size: %.2LF\n", gb);
88 |   };
89 | 
90 | 	return 0;
91 | }
92 | 
93 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/dgraph-io/ristretto/v2
 2 | 
 3 | go 1.23.0
 4 | 
 5 | toolchain go1.24.3
 6 | 
 7 | require (
 8 | 	github.com/cespare/xxhash/v2 v2.3.0
 9 | 	github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da
10 | 	github.com/dustin/go-humanize v1.0.1
11 | 	github.com/stretchr/testify v1.10.0
12 | 	golang.org/x/sys v0.33.0
13 | )
14 | 
15 | require (
16 | 	github.com/davecgh/go-spew v1.1.1 // indirect
17 | 	github.com/pmezard/go-difflib v1.0.0 // indirect
18 | 	gopkg.in/yaml.v3 v3.0.1 // indirect
19 | )
20 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
 2 | github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 3 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 4 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 5 | github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da h1:aIftn67I1fkbMa512G+w+Pxci9hJPB8oMnkcP3iZF38=
 6 | github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
 7 | github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
 8 | github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
 9 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
10 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
11 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
12 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
13 | golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
14 | golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
15 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
16 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
17 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
18 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
19 | 


--------------------------------------------------------------------------------
/policy.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
  3 |  * SPDX-License-Identifier: Apache-2.0
  4 |  */
  5 | 
  6 | package ristretto
  7 | 
  8 | import (
  9 | 	"math"
 10 | 	"sync"
 11 | 	"sync/atomic"
 12 | 
 13 | 	"github.com/dgraph-io/ristretto/v2/z"
 14 | )
 15 | 
 16 | const (
 17 | 	// lfuSample is the number of items to sample when looking at eviction
 18 | 	// candidates. 5 seems to be the most optimal number [citation needed].
 19 | 	lfuSample = 5
 20 | )
 21 | 
 22 | func newPolicy[V any](numCounters, maxCost int64) *defaultPolicy[V] {
 23 | 	return newDefaultPolicy[V](numCounters, maxCost)
 24 | }
 25 | 
 26 | type defaultPolicy[V any] struct {
 27 | 	sync.Mutex
 28 | 	admit    *tinyLFU
 29 | 	evict    *sampledLFU
 30 | 	itemsCh  chan []uint64
 31 | 	stop     chan struct{}
 32 | 	done     chan struct{}
 33 | 	isClosed bool
 34 | 	metrics  *Metrics
 35 | }
 36 | 
 37 | func newDefaultPolicy[V any](numCounters, maxCost int64) *defaultPolicy[V] {
 38 | 	p := &defaultPolicy[V]{
 39 | 		admit:   newTinyLFU(numCounters),
 40 | 		evict:   newSampledLFU(maxCost),
 41 | 		itemsCh: make(chan []uint64, 3),
 42 | 		stop:    make(chan struct{}),
 43 | 		done:    make(chan struct{}),
 44 | 	}
 45 | 	go p.processItems()
 46 | 	return p
 47 | }
 48 | 
 49 | func (p *defaultPolicy[V]) CollectMetrics(metrics *Metrics) {
 50 | 	p.metrics = metrics
 51 | 	p.evict.metrics = metrics
 52 | }
 53 | 
 54 | type policyPair struct {
 55 | 	key  uint64
 56 | 	cost int64
 57 | }
 58 | 
 59 | func (p *defaultPolicy[V]) processItems() {
 60 | 	for {
 61 | 		select {
 62 | 		case items := <-p.itemsCh:
 63 | 			p.Lock()
 64 | 			p.admit.Push(items)
 65 | 			p.Unlock()
 66 | 		case <-p.stop:
 67 | 			p.done <- struct{}{}
 68 | 			return
 69 | 		}
 70 | 	}
 71 | }
 72 | 
 73 | func (p *defaultPolicy[V]) Push(keys []uint64) bool {
 74 | 	if p.isClosed {
 75 | 		return false
 76 | 	}
 77 | 
 78 | 	if len(keys) == 0 {
 79 | 		return true
 80 | 	}
 81 | 
 82 | 	select {
 83 | 	case p.itemsCh <- keys:
 84 | 		p.metrics.add(keepGets, keys[0], uint64(len(keys)))
 85 | 		return true
 86 | 	default:
 87 | 		p.metrics.add(dropGets, keys[0], uint64(len(keys)))
 88 | 		return false
 89 | 	}
 90 | }
 91 | 
 92 | // Add decides whether the item with the given key and cost should be accepted by
 93 | // the policy. It returns the list of victims that have been evicted and a boolean
 94 | // indicating whether the incoming item should be accepted.
 95 | func (p *defaultPolicy[V]) Add(key uint64, cost int64) ([]*Item[V], bool) {
 96 | 	p.Lock()
 97 | 	defer p.Unlock()
 98 | 
 99 | 	// Cannot add an item bigger than entire cache.
100 | 	if cost > p.evict.getMaxCost() {
101 | 		return nil, false
102 | 	}
103 | 
104 | 	// No need to go any further if the item is already in the cache.
105 | 	if has := p.evict.updateIfHas(key, cost); has {
106 | 		// An update does not count as an addition, so return false.
107 | 		return nil, false
108 | 	}
109 | 
110 | 	// If the execution reaches this point, the key doesn't exist in the cache.
111 | 	// Calculate the remaining room in the cache (usually bytes).
112 | 	room := p.evict.roomLeft(cost)
113 | 	if room >= 0 {
114 | 		// There's enough room in the cache to store the new item without
115 | 		// overflowing. Do that now and stop here.
116 | 		p.evict.add(key, cost)
117 | 		p.metrics.add(costAdd, key, uint64(cost))
118 | 		return nil, true
119 | 	}
120 | 
121 | 	// incHits is the hit count for the incoming item.
122 | 	incHits := p.admit.Estimate(key)
123 | 	// sample is the eviction candidate pool to be filled via random sampling.
124 | 	// TODO: perhaps we should use a min heap here. Right now our time
125 | 	// complexity is N for finding the min. Min heap should bring it down to
126 | 	// O(lg N).
127 | 	sample := make([]*policyPair, 0, lfuSample)
128 | 	// As items are evicted they will be appended to victims.
129 | 	victims := make([]*Item[V], 0)
130 | 
131 | 	// Delete victims until there's enough space or a minKey is found that has
132 | 	// more hits than incoming item.
133 | 	for ; room < 0; room = p.evict.roomLeft(cost) {
134 | 		// Fill up empty slots in sample.
135 | 		sample = p.evict.fillSample(sample)
136 | 
137 | 		// Find minimally used item in sample.
138 | 		minKey, minHits, minId, minCost := uint64(0), int64(math.MaxInt64), 0, int64(0)
139 | 		for i, pair := range sample {
140 | 			// Look up hit count for sample key.
141 | 			if hits := p.admit.Estimate(pair.key); hits < minHits {
142 | 				minKey, minHits, minId, minCost = pair.key, hits, i, pair.cost
143 | 			}
144 | 		}
145 | 
146 | 		// If the incoming item isn't worth keeping in the policy, reject.
147 | 		if incHits < minHits {
148 | 			p.metrics.add(rejectSets, key, 1)
149 | 			return victims, false
150 | 		}
151 | 
152 | 		// Delete the victim from metadata.
153 | 		p.evict.del(minKey)
154 | 
155 | 		// Delete the victim from sample.
156 | 		sample[minId] = sample[len(sample)-1]
157 | 		sample = sample[:len(sample)-1]
158 | 		// Store victim in evicted victims slice.
159 | 		victims = append(victims, &Item[V]{
160 | 			Key:      minKey,
161 | 			Conflict: 0,
162 | 			Cost:     minCost,
163 | 		})
164 | 	}
165 | 
166 | 	p.evict.add(key, cost)
167 | 	p.metrics.add(costAdd, key, uint64(cost))
168 | 	return victims, true
169 | }
170 | 
171 | func (p *defaultPolicy[V]) Has(key uint64) bool {
172 | 	p.Lock()
173 | 	_, exists := p.evict.keyCosts[key]
174 | 	p.Unlock()
175 | 	return exists
176 | }
177 | 
178 | func (p *defaultPolicy[V]) Del(key uint64) {
179 | 	p.Lock()
180 | 	p.evict.del(key)
181 | 	p.Unlock()
182 | }
183 | 
184 | func (p *defaultPolicy[V]) Cap() int64 {
185 | 	p.Lock()
186 | 	capacity := p.evict.getMaxCost() - p.evict.used
187 | 	p.Unlock()
188 | 	return capacity
189 | }
190 | 
191 | func (p *defaultPolicy[V]) Update(key uint64, cost int64) {
192 | 	p.Lock()
193 | 	p.evict.updateIfHas(key, cost)
194 | 	p.Unlock()
195 | }
196 | 
197 | func (p *defaultPolicy[V]) Cost(key uint64) int64 {
198 | 	p.Lock()
199 | 	if cost, found := p.evict.keyCosts[key]; found {
200 | 		p.Unlock()
201 | 		return cost
202 | 	}
203 | 	p.Unlock()
204 | 	return -1
205 | }
206 | 
207 | func (p *defaultPolicy[V]) Clear() {
208 | 	p.Lock()
209 | 	p.admit.clear()
210 | 	p.evict.clear()
211 | 	p.Unlock()
212 | }
213 | 
214 | func (p *defaultPolicy[V]) Close() {
215 | 	if p.isClosed {
216 | 		return
217 | 	}
218 | 
219 | 	// Block until the p.processItems goroutine returns.
220 | 	p.stop <- struct{}{}
221 | 	<-p.done
222 | 	close(p.stop)
223 | 	close(p.done)
224 | 	close(p.itemsCh)
225 | 	p.isClosed = true
226 | }
227 | 
228 | func (p *defaultPolicy[V]) MaxCost() int64 {
229 | 	if p == nil || p.evict == nil {
230 | 		return 0
231 | 	}
232 | 	return p.evict.getMaxCost()
233 | }
234 | 
235 | func (p *defaultPolicy[V]) UpdateMaxCost(maxCost int64) {
236 | 	if p == nil || p.evict == nil {
237 | 		return
238 | 	}
239 | 	p.evict.updateMaxCost(maxCost)
240 | }
241 | 
242 | // sampledLFU is an eviction helper storing key-cost pairs.
243 | type sampledLFU struct {
244 | 	// NOTE: align maxCost to 64-bit boundary for use with atomic.
245 | 	// As per https://golang.org/pkg/sync/atomic/: "On ARM, x86-32,
246 | 	// and 32-bit MIPS, it is the caller’s responsibility to arrange
247 | 	// for 64-bit alignment of 64-bit words accessed atomically.
248 | 	// The first word in a variable or in an allocated struct, array,
249 | 	// or slice can be relied upon to be 64-bit aligned."
250 | 	maxCost  int64
251 | 	used     int64
252 | 	metrics  *Metrics
253 | 	keyCosts map[uint64]int64
254 | }
255 | 
256 | func newSampledLFU(maxCost int64) *sampledLFU {
257 | 	return &sampledLFU{
258 | 		keyCosts: make(map[uint64]int64),
259 | 		maxCost:  maxCost,
260 | 	}
261 | }
262 | 
263 | func (p *sampledLFU) getMaxCost() int64 {
264 | 	return atomic.LoadInt64(&p.maxCost)
265 | }
266 | 
267 | func (p *sampledLFU) updateMaxCost(maxCost int64) {
268 | 	atomic.StoreInt64(&p.maxCost, maxCost)
269 | }
270 | 
271 | func (p *sampledLFU) roomLeft(cost int64) int64 {
272 | 	return p.getMaxCost() - (p.used + cost)
273 | }
274 | 
275 | func (p *sampledLFU) fillSample(in []*policyPair) []*policyPair {
276 | 	if len(in) >= lfuSample {
277 | 		return in
278 | 	}
279 | 	for key, cost := range p.keyCosts {
280 | 		in = append(in, &policyPair{key, cost})
281 | 		if len(in) >= lfuSample {
282 | 			return in
283 | 		}
284 | 	}
285 | 	return in
286 | }
287 | 
288 | func (p *sampledLFU) del(key uint64) {
289 | 	cost, ok := p.keyCosts[key]
290 | 	if !ok {
291 | 		return
292 | 	}
293 | 	p.used -= cost
294 | 	delete(p.keyCosts, key)
295 | 	p.metrics.add(costEvict, key, uint64(cost))
296 | 	p.metrics.add(keyEvict, key, 1)
297 | }
298 | 
299 | func (p *sampledLFU) add(key uint64, cost int64) {
300 | 	p.keyCosts[key] = cost
301 | 	p.used += cost
302 | }
303 | 
304 | func (p *sampledLFU) updateIfHas(key uint64, cost int64) bool {
305 | 	if prev, found := p.keyCosts[key]; found {
306 | 		// Update the cost of an existing key, but don't worry about evicting.
307 | 		// Evictions will be handled the next time a new item is added.
308 | 		p.metrics.add(keyUpdate, key, 1)
309 | 		if prev > cost {
310 | 			diff := prev - cost
311 | 			p.metrics.add(costAdd, key, ^(uint64(diff) - 1))
312 | 		} else if cost > prev {
313 | 			diff := cost - prev
314 | 			p.metrics.add(costAdd, key, uint64(diff))
315 | 		}
316 | 		p.used += cost - prev
317 | 		p.keyCosts[key] = cost
318 | 		return true
319 | 	}
320 | 	return false
321 | }
322 | 
323 | func (p *sampledLFU) clear() {
324 | 	p.used = 0
325 | 	p.keyCosts = make(map[uint64]int64)
326 | }
327 | 
328 | // tinyLFU is an admission helper that keeps track of access frequency using
329 | // tiny (4-bit) counters in the form of a count-min sketch.
330 | // tinyLFU is NOT thread safe.
331 | type tinyLFU struct {
332 | 	freq    *cmSketch
333 | 	door    *z.Bloom
334 | 	incrs   int64
335 | 	resetAt int64
336 | }
337 | 
338 | func newTinyLFU(numCounters int64) *tinyLFU {
339 | 	return &tinyLFU{
340 | 		freq:    newCmSketch(numCounters),
341 | 		door:    z.NewBloomFilter(float64(numCounters), 0.01),
342 | 		resetAt: numCounters,
343 | 	}
344 | }
345 | 
346 | func (p *tinyLFU) Push(keys []uint64) {
347 | 	for _, key := range keys {
348 | 		p.Increment(key)
349 | 	}
350 | }
351 | 
352 | func (p *tinyLFU) Estimate(key uint64) int64 {
353 | 	hits := p.freq.Estimate(key)
354 | 	if p.door.Has(key) {
355 | 		hits++
356 | 	}
357 | 	return hits
358 | }
359 | 
360 | func (p *tinyLFU) Increment(key uint64) {
361 | 	// Flip doorkeeper bit if not already done.
362 | 	if added := p.door.AddIfNotHas(key); !added {
363 | 		// Increment count-min counter if doorkeeper bit is already set.
364 | 		p.freq.Increment(key)
365 | 	}
366 | 	p.incrs++
367 | 	if p.incrs >= p.resetAt {
368 | 		p.reset()
369 | 	}
370 | }
371 | 
372 | func (p *tinyLFU) reset() {
373 | 	// Zero out incrs.
374 | 	p.incrs = 0
375 | 	// clears doorkeeper bits
376 | 	p.door.Clear()
377 | 	// halves count-min counters
378 | 	p.freq.Reset()
379 | }
380 | 
381 | func (p *tinyLFU) clear() {
382 | 	p.incrs = 0
383 | 	p.door.Clear()
384 | 	p.freq.Clear()
385 | }
386 | 


--------------------------------------------------------------------------------
/policy_test.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
  3 |  * SPDX-License-Identifier: Apache-2.0
  4 |  */
  5 | 
  6 | package ristretto
  7 | 
  8 | import (
  9 | 	"testing"
 10 | 	"time"
 11 | 
 12 | 	"github.com/stretchr/testify/require"
 13 | )
 14 | 
 15 | func TestPolicy(t *testing.T) {
 16 | 	defer func() {
 17 | 		require.Nil(t, recover())
 18 | 	}()
 19 | 	newPolicy[int](100, 10)
 20 | }
 21 | 
 22 | func TestPolicyMetrics(t *testing.T) {
 23 | 	p := newDefaultPolicy[int](100, 10)
 24 | 	p.CollectMetrics(newMetrics())
 25 | 	require.NotNil(t, p.metrics)
 26 | 	require.NotNil(t, p.evict.metrics)
 27 | }
 28 | 
 29 | func TestPolicyProcessItems(t *testing.T) {
 30 | 	p := newDefaultPolicy[int](100, 10)
 31 | 	p.itemsCh <- []uint64{1, 2, 2}
 32 | 	time.Sleep(wait)
 33 | 	p.Lock()
 34 | 	require.Equal(t, int64(2), p.admit.Estimate(2))
 35 | 	require.Equal(t, int64(1), p.admit.Estimate(1))
 36 | 	p.Unlock()
 37 | 
 38 | 	p.stop <- struct{}{}
 39 | 	<-p.done
 40 | 	p.itemsCh <- []uint64{3, 3, 3}
 41 | 	time.Sleep(wait)
 42 | 	p.Lock()
 43 | 	require.Equal(t, int64(0), p.admit.Estimate(3))
 44 | 	p.Unlock()
 45 | }
 46 | 
 47 | func TestPolicyPush(t *testing.T) {
 48 | 	p := newDefaultPolicy[int](100, 10)
 49 | 	require.True(t, p.Push([]uint64{}))
 50 | 
 51 | 	keepCount := 0
 52 | 	for i := 0; i < 10; i++ {
 53 | 		if p.Push([]uint64{1, 2, 3, 4, 5}) {
 54 | 			keepCount++
 55 | 		}
 56 | 	}
 57 | 	require.NotEqual(t, 0, keepCount)
 58 | }
 59 | 
 60 | func TestPolicyAdd(t *testing.T) {
 61 | 	p := newDefaultPolicy[int](1000, 100)
 62 | 	if victims, added := p.Add(1, 101); victims != nil || added {
 63 | 		t.Fatal("can't add an item bigger than entire cache")
 64 | 	}
 65 | 	p.Lock()
 66 | 	p.evict.add(1, 1)
 67 | 	p.admit.Increment(1)
 68 | 	p.admit.Increment(2)
 69 | 	p.admit.Increment(3)
 70 | 	p.Unlock()
 71 | 
 72 | 	victims, added := p.Add(1, 1)
 73 | 	require.Nil(t, victims)
 74 | 	require.False(t, added)
 75 | 
 76 | 	victims, added = p.Add(2, 20)
 77 | 	require.Nil(t, victims)
 78 | 	require.True(t, added)
 79 | 
 80 | 	victims, added = p.Add(3, 90)
 81 | 	require.NotNil(t, victims)
 82 | 	require.True(t, added)
 83 | 
 84 | 	victims, added = p.Add(4, 20)
 85 | 	require.NotNil(t, victims)
 86 | 	require.False(t, added)
 87 | }
 88 | 
 89 | func TestPolicyHas(t *testing.T) {
 90 | 	p := newDefaultPolicy[int](100, 10)
 91 | 	p.Add(1, 1)
 92 | 	require.True(t, p.Has(1))
 93 | 	require.False(t, p.Has(2))
 94 | }
 95 | 
 96 | func TestPolicyDel(t *testing.T) {
 97 | 	p := newDefaultPolicy[int](100, 10)
 98 | 	p.Add(1, 1)
 99 | 	p.Del(1)
100 | 	p.Del(2)
101 | 	require.False(t, p.Has(1))
102 | 	require.False(t, p.Has(2))
103 | }
104 | 
105 | func TestPolicyCap(t *testing.T) {
106 | 	p := newDefaultPolicy[int](100, 10)
107 | 	p.Add(1, 1)
108 | 	require.Equal(t, int64(9), p.Cap())
109 | }
110 | 
111 | func TestPolicyUpdate(t *testing.T) {
112 | 	p := newDefaultPolicy[int](100, 10)
113 | 	p.Add(1, 1)
114 | 	p.Update(1, 2)
115 | 	p.Lock()
116 | 	require.Equal(t, int64(2), p.evict.keyCosts[1])
117 | 	p.Unlock()
118 | }
119 | 
120 | func TestPolicyCost(t *testing.T) {
121 | 	p := newDefaultPolicy[int](100, 10)
122 | 	p.Add(1, 2)
123 | 	require.Equal(t, int64(2), p.Cost(1))
124 | 	require.Equal(t, int64(-1), p.Cost(2))
125 | }
126 | 
127 | func TestPolicyClear(t *testing.T) {
128 | 	p := newDefaultPolicy[int](100, 10)
129 | 	p.Add(1, 1)
130 | 	p.Add(2, 2)
131 | 	p.Add(3, 3)
132 | 	p.Clear()
133 | 	require.Equal(t, int64(10), p.Cap())
134 | 	require.False(t, p.Has(1))
135 | 	require.False(t, p.Has(2))
136 | 	require.False(t, p.Has(3))
137 | }
138 | 
139 | func TestPolicyClose(t *testing.T) {
140 | 	defer func() {
141 | 		require.NotNil(t, recover())
142 | 	}()
143 | 
144 | 	p := newDefaultPolicy[int](100, 10)
145 | 	p.Add(1, 1)
146 | 	p.Close()
147 | 	p.itemsCh <- []uint64{1}
148 | }
149 | 
150 | func TestPushAfterClose(t *testing.T) {
151 | 	p := newDefaultPolicy[int](100, 10)
152 | 	p.Close()
153 | 	require.False(t, p.Push([]uint64{1, 2}))
154 | }
155 | 
156 | func TestAddAfterClose(t *testing.T) {
157 | 	p := newDefaultPolicy[int](100, 10)
158 | 	p.Close()
159 | 	p.Add(1, 1)
160 | }
161 | 
162 | func TestSampledLFUAdd(t *testing.T) {
163 | 	e := newSampledLFU(4)
164 | 	e.add(1, 1)
165 | 	e.add(2, 2)
166 | 	e.add(3, 1)
167 | 	require.Equal(t, int64(4), e.used)
168 | 	require.Equal(t, int64(2), e.keyCosts[2])
169 | }
170 | 
171 | func TestSampledLFUDel(t *testing.T) {
172 | 	e := newSampledLFU(4)
173 | 	e.add(1, 1)
174 | 	e.add(2, 2)
175 | 	e.del(2)
176 | 	require.Equal(t, int64(1), e.used)
177 | 	_, ok := e.keyCosts[2]
178 | 	require.False(t, ok)
179 | 	e.del(4)
180 | }
181 | 
182 | func TestSampledLFUUpdate(t *testing.T) {
183 | 	e := newSampledLFU(4)
184 | 	e.add(1, 1)
185 | 	require.True(t, e.updateIfHas(1, 2))
186 | 	require.Equal(t, int64(2), e.used)
187 | 	require.False(t, e.updateIfHas(2, 2))
188 | }
189 | 
190 | func TestSampledLFUClear(t *testing.T) {
191 | 	e := newSampledLFU(4)
192 | 	e.add(1, 1)
193 | 	e.add(2, 2)
194 | 	e.add(3, 1)
195 | 	e.clear()
196 | 	require.Equal(t, 0, len(e.keyCosts))
197 | 	require.Equal(t, int64(0), e.used)
198 | }
199 | 
200 | func TestSampledLFURoom(t *testing.T) {
201 | 	e := newSampledLFU(16)
202 | 	e.add(1, 1)
203 | 	e.add(2, 2)
204 | 	e.add(3, 3)
205 | 	require.Equal(t, int64(6), e.roomLeft(4))
206 | }
207 | 
208 | func TestSampledLFUSample(t *testing.T) {
209 | 	e := newSampledLFU(16)
210 | 	e.add(4, 4)
211 | 	e.add(5, 5)
212 | 	sample := e.fillSample([]*policyPair{
213 | 		{1, 1},
214 | 		{2, 2},
215 | 		{3, 3},
216 | 	})
217 | 	k := sample[len(sample)-1].key
218 | 	require.Equal(t, 5, len(sample))
219 | 	require.NotEqual(t, 1, k)
220 | 	require.NotEqual(t, 2, k)
221 | 	require.NotEqual(t, 3, k)
222 | 	require.Equal(t, len(sample), len(e.fillSample(sample)))
223 | 	e.del(5)
224 | 	sample = e.fillSample(sample[:len(sample)-2])
225 | 	require.Equal(t, 4, len(sample))
226 | }
227 | 
228 | func TestTinyLFUIncrement(t *testing.T) {
229 | 	a := newTinyLFU(4)
230 | 	a.Increment(1)
231 | 	a.Increment(1)
232 | 	a.Increment(1)
233 | 	require.True(t, a.door.Has(1))
234 | 	require.Equal(t, int64(2), a.freq.Estimate(1))
235 | 
236 | 	a.Increment(1)
237 | 	require.False(t, a.door.Has(1))
238 | 	require.Equal(t, int64(1), a.freq.Estimate(1))
239 | }
240 | 
241 | func TestTinyLFUEstimate(t *testing.T) {
242 | 	a := newTinyLFU(8)
243 | 	a.Increment(1)
244 | 	a.Increment(1)
245 | 	a.Increment(1)
246 | 	require.Equal(t, int64(3), a.Estimate(1))
247 | 	require.Equal(t, int64(0), a.Estimate(2))
248 | }
249 | 
250 | func TestTinyLFUPush(t *testing.T) {
251 | 	a := newTinyLFU(16)
252 | 	a.Push([]uint64{1, 2, 2, 3, 3, 3})
253 | 	require.Equal(t, int64(1), a.Estimate(1))
254 | 	require.Equal(t, int64(2), a.Estimate(2))
255 | 	require.Equal(t, int64(3), a.Estimate(3))
256 | 	require.Equal(t, int64(6), a.incrs)
257 | }
258 | 
259 | func TestTinyLFUClear(t *testing.T) {
260 | 	a := newTinyLFU(16)
261 | 	a.Push([]uint64{1, 3, 3, 3})
262 | 	a.clear()
263 | 	require.Equal(t, int64(0), a.incrs)
264 | 	require.Equal(t, int64(0), a.Estimate(3))
265 | }
266 | 


--------------------------------------------------------------------------------
/ring.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | 
 6 | package ristretto
 7 | 
 8 | import (
 9 | 	"sync"
10 | )
11 | 
12 | // ringConsumer is the user-defined object responsible for receiving and
13 | // processing items in batches when buffers are drained.
14 | type ringConsumer interface {
15 | 	Push([]uint64) bool
16 | }
17 | 
18 | // ringStripe is a singular ring buffer that is not concurrent safe.
19 | type ringStripe struct {
20 | 	cons ringConsumer
21 | 	data []uint64
22 | 	capa int
23 | }
24 | 
25 | func newRingStripe(cons ringConsumer, capa int64) *ringStripe {
26 | 	return &ringStripe{
27 | 		cons: cons,
28 | 		data: make([]uint64, 0, capa),
29 | 		capa: int(capa),
30 | 	}
31 | }
32 | 
33 | // Push appends an item in the ring buffer and drains (copies items and
34 | // sends to Consumer) if full.
35 | func (s *ringStripe) Push(item uint64) {
36 | 	s.data = append(s.data, item)
37 | 	// Decide if the ring buffer should be drained.
38 | 	if len(s.data) >= s.capa {
39 | 		// Send elements to consumer and create a new ring stripe.
40 | 		if s.cons.Push(s.data) {
41 | 			s.data = make([]uint64, 0, s.capa)
42 | 		} else {
43 | 			s.data = s.data[:0]
44 | 		}
45 | 	}
46 | }
47 | 
48 | // ringBuffer stores multiple buffers (stripes) and distributes Pushed items
49 | // between them to lower contention.
50 | //
51 | // This implements the "batching" process described in the BP-Wrapper paper
52 | // (section III part A).
53 | type ringBuffer struct {
54 | 	pool *sync.Pool
55 | }
56 | 
57 | // newRingBuffer returns a striped ring buffer. The Consumer in ringConfig will
58 | // be called when individual stripes are full and need to drain their elements.
59 | func newRingBuffer(cons ringConsumer, capa int64) *ringBuffer {
60 | 	// LOSSY buffers use a very simple sync.Pool for concurrently reusing
61 | 	// stripes. We do lose some stripes due to GC (unheld items in sync.Pool
62 | 	// are cleared), but the performance gains generally outweigh the small
63 | 	// percentage of elements lost. The performance primarily comes from
64 | 	// low-level runtime functions used in the standard library that aren't
65 | 	// available to us (such as runtime_procPin()).
66 | 	return &ringBuffer{
67 | 		pool: &sync.Pool{
68 | 			New: func() interface{} { return newRingStripe(cons, capa) },
69 | 		},
70 | 	}
71 | }
72 | 
73 | // Push adds an element to one of the internal stripes and possibly drains if
74 | // the stripe becomes full.
75 | func (b *ringBuffer) Push(item uint64) {
76 | 	// Reuse or create a new stripe.
77 | 	stripe := b.pool.Get().(*ringStripe)
78 | 	stripe.Push(item)
79 | 	b.pool.Put(stripe)
80 | }
81 | 


--------------------------------------------------------------------------------
/ring_test.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | 
 6 | package ristretto
 7 | 
 8 | import (
 9 | 	"sync"
10 | 	"testing"
11 | 
12 | 	"github.com/stretchr/testify/require"
13 | )
14 | 
15 | type testConsumer struct {
16 | 	push func([]uint64)
17 | 	save bool
18 | }
19 | 
20 | func (c *testConsumer) Push(items []uint64) bool {
21 | 	if c.save {
22 | 		c.push(items)
23 | 		return true
24 | 	}
25 | 	return false
26 | }
27 | 
28 | func TestRingDrain(t *testing.T) {
29 | 	drains := 0
30 | 	r := newRingBuffer(&testConsumer{
31 | 		push: func(items []uint64) {
32 | 			drains++
33 | 		},
34 | 		save: true,
35 | 	}, 1)
36 | 	for i := 0; i < 100; i++ {
37 | 		r.Push(uint64(i))
38 | 	}
39 | 	require.Equal(t, 100, drains, "buffers shouldn't be dropped with BufferItems == 1")
40 | }
41 | 
42 | func TestRingReset(t *testing.T) {
43 | 	drains := 0
44 | 	r := newRingBuffer(&testConsumer{
45 | 		push: func(items []uint64) {
46 | 			drains++
47 | 		},
48 | 		save: false,
49 | 	}, 4)
50 | 	for i := 0; i < 100; i++ {
51 | 		r.Push(uint64(i))
52 | 	}
53 | 	require.Equal(t, 0, drains, "testConsumer shouldn't be draining")
54 | }
55 | 
56 | func TestRingConsumer(t *testing.T) {
57 | 	mu := &sync.Mutex{}
58 | 	drainItems := make(map[uint64]struct{})
59 | 	r := newRingBuffer(&testConsumer{
60 | 		push: func(items []uint64) {
61 | 			mu.Lock()
62 | 			defer mu.Unlock()
63 | 			for i := range items {
64 | 				drainItems[items[i]] = struct{}{}
65 | 			}
66 | 		},
67 | 		save: true,
68 | 	}, 4)
69 | 	for i := 0; i < 100; i++ {
70 | 		r.Push(uint64(i))
71 | 	}
72 | 	l := len(drainItems)
73 | 	require.NotEqual(t, 0, l)
74 | 	require.True(t, l <= 100)
75 | }
76 | 


--------------------------------------------------------------------------------
/sim/gli.lirs.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hypermodeinc/ristretto/9bc07160ec1e5425f8ce5c7a62655896890ec53c/sim/gli.lirs.gz


--------------------------------------------------------------------------------
/sim/sim.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
  3 |  * SPDX-License-Identifier: Apache-2.0
  4 |  */
  5 | 
  6 | package sim
  7 | 
  8 | import (
  9 | 	"bufio"
 10 | 	"errors"
 11 | 	"fmt"
 12 | 	"io"
 13 | 	"math/rand"
 14 | 	"strconv"
 15 | 	"strings"
 16 | 	"time"
 17 | )
 18 | 
 19 | var (
 20 | 	// ErrDone is returned when the underlying file has ran out of lines.
 21 | 	ErrDone = errors.New("no more values in the Simulator")
 22 | 	// ErrBadLine is returned when the trace file line is unrecognizable to
 23 | 	// the Parser.
 24 | 	ErrBadLine = errors.New("bad line for trace format")
 25 | )
 26 | 
 27 | // Simulator is the central type of the `sim` package. It is a function
 28 | // returning a key from some source (composed from the other functions in this
 29 | // package, either generated or parsed). You can use these Simulators to
 30 | // approximate access distributions.
 31 | type Simulator func() (uint64, error)
 32 | 
 33 | // NewZipfian creates a Simulator returning numbers following a Zipfian [1]
 34 | // distribution infinitely. Zipfian distributions are useful for simulating real
 35 | // workloads.
 36 | //
 37 | // [1]: https://en.wikipedia.org/wiki/Zipf%27s_law
 38 | func NewZipfian(s, v float64, n uint64) Simulator {
 39 | 	z := rand.NewZipf(rand.New(rand.NewSource(time.Now().UnixNano())), s, v, n)
 40 | 	return func() (uint64, error) {
 41 | 		return z.Uint64(), nil
 42 | 	}
 43 | }
 44 | 
 45 | // NewUniform creates a Simulator returning uniformly distributed [1] (random)
 46 | // numbers [0, max) infinitely.
 47 | //
 48 | // [1]: https://en.wikipedia.org/wiki/Uniform_distribution_(continuous)
 49 | func NewUniform(max uint64) Simulator {
 50 | 	m := int64(max)
 51 | 	r := rand.New(rand.NewSource(time.Now().UnixNano()))
 52 | 	return func() (uint64, error) {
 53 | 		return uint64(r.Int63n(m)), nil
 54 | 	}
 55 | }
 56 | 
 57 | // Parser is used as a parameter to NewReader so we can create Simulators from
 58 | // varying trace file formats easily.
 59 | type Parser func(string, error) ([]uint64, error)
 60 | 
 61 | // NewReader creates a Simulator from two components: the Parser, which is a
 62 | // filetype specific function for parsing lines, and the file itself, which will
 63 | // be read from.
 64 | //
 65 | // When every line in the file has been read, ErrDone will be returned. For some
 66 | // trace formats (LIRS) there is one item per line. For others (ARC) there is a
 67 | // range of items on each line. Thus, the true number of items in each file
 68 | // is hard to determine, so it's up to the user to handle ErrDone accordingly.
 69 | func NewReader(parser Parser, file io.Reader) Simulator {
 70 | 	b := bufio.NewReader(file)
 71 | 	s := make([]uint64, 0)
 72 | 	i := -1
 73 | 	var err error
 74 | 	return func() (uint64, error) {
 75 | 		// only parse a new line when we've run out of items
 76 | 		if i++; i == len(s) {
 77 | 			// parse sequence from line
 78 | 			if s, err = parser(b.ReadString('\n')); err != nil {
 79 | 				s = []uint64{0}
 80 | 			}
 81 | 			i = 0
 82 | 		}
 83 | 		return s[i], err
 84 | 	}
 85 | }
 86 | 
 87 | // ParseLIRS takes a single line of input from a LIRS trace file as described in
 88 | // multiple papers [1] and returns a slice containing one number. A nice
 89 | // collection of LIRS trace files can be found in Ben Manes' repo [2].
 90 | //
 91 | // [1]: https://en.wikipedia.org/wiki/LIRS_caching_algorithm
 92 | // [2]: https://git.io/fj9gU
 93 | func ParseLIRS(line string, err error) ([]uint64, error) {
 94 | 	if line = strings.TrimSpace(line); line != "" {
 95 | 		// example: "1\r\n"
 96 | 		key, err := strconv.ParseUint(line, 10, 64)
 97 | 		return []uint64{key}, err
 98 | 	}
 99 | 	return nil, ErrDone
100 | }
101 | 
102 | // ParseARC takes a single line of input from an ARC trace file as described in
103 | // "ARC: a self-tuning, low overhead replacement cache" [1] by Nimrod Megiddo
104 | // and Dharmendra S. Modha [1] and returns a sequence of numbers generated from
105 | // the line and any error. For use with NewReader.
106 | //
107 | // [1]: https://scinapse.io/papers/1860107648
108 | func ParseARC(line string, err error) ([]uint64, error) {
109 | 	if line != "" {
110 | 		// example: "0 5 0 0\n"
111 | 		//
112 | 		// -  first block: starting number in sequence
113 | 		// - second block: number of items in sequence
114 | 		// -  third block: ignore
115 | 		// - fourth block: global line number (not used)
116 | 		cols := strings.Fields(line)
117 | 		if len(cols) != 4 {
118 | 			return nil, ErrBadLine
119 | 		}
120 | 		start, err := strconv.ParseUint(cols[0], 10, 64)
121 | 		if err != nil {
122 | 			return nil, err
123 | 		}
124 | 		count, err := strconv.ParseUint(cols[1], 10, 64)
125 | 		if err != nil {
126 | 			return nil, err
127 | 		}
128 | 		// populate sequence from start to start + count
129 | 		seq := make([]uint64, count)
130 | 		for i := range seq {
131 | 			seq[i] = start + uint64(i)
132 | 		}
133 | 		return seq, nil
134 | 	}
135 | 	return nil, ErrDone
136 | }
137 | 
138 | // Collection evaluates the Simulator size times and saves each item to the
139 | // returned slice.
140 | func Collection(simulator Simulator, size uint64) []uint64 {
141 | 	collection := make([]uint64, size)
142 | 	for i := range collection {
143 | 		collection[i], _ = simulator()
144 | 	}
145 | 	return collection
146 | }
147 | 
148 | // StringCollection evaluates the Simulator size times and saves each item to
149 | // the returned slice, after converting it to a string.
150 | func StringCollection(simulator Simulator, size uint64) []string {
151 | 	collection := make([]string, size)
152 | 	for i := range collection {
153 | 		n, _ := simulator()
154 | 		collection[i] = fmt.Sprintf("%d", n)
155 | 	}
156 | 	return collection
157 | }
158 | 


--------------------------------------------------------------------------------
/sim/sim_test.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
  3 |  * SPDX-License-Identifier: Apache-2.0
  4 |  */
  5 | 
  6 | package sim
  7 | 
  8 | import (
  9 | 	"bytes"
 10 | 	"compress/gzip"
 11 | 	"os"
 12 | 	"testing"
 13 | )
 14 | 
 15 | func TestZipfian(t *testing.T) {
 16 | 	s := NewZipfian(1.5, 1, 100)
 17 | 	m := make(map[uint64]uint64, 100)
 18 | 	for i := 0; i < 100; i++ {
 19 | 		k, err := s()
 20 | 		if err != nil {
 21 | 			t.Fatal(err)
 22 | 		}
 23 | 		m[k]++
 24 | 	}
 25 | 	if len(m) == 0 || len(m) == 100 {
 26 | 		t.Fatal("zipfian not skewed")
 27 | 	}
 28 | }
 29 | 
 30 | func TestUniform(t *testing.T) {
 31 | 	s := NewUniform(100)
 32 | 	for i := 0; i < 100; i++ {
 33 | 		if _, err := s(); err != nil {
 34 | 			t.Fatal(err)
 35 | 		}
 36 | 	}
 37 | }
 38 | 
 39 | func TestParseLIRS(t *testing.T) {
 40 | 	s := NewReader(ParseLIRS, bytes.NewReader([]byte{
 41 | 		'0', '\n',
 42 | 		'1', '\r', '\n',
 43 | 		'2', '\r', '\n',
 44 | 	}))
 45 | 	for i := uint64(0); i < 3; i++ {
 46 | 		v, err := s()
 47 | 		if err != nil {
 48 | 			t.Fatal(err)
 49 | 		}
 50 | 		if v != i {
 51 | 			t.Fatal("value mismatch")
 52 | 		}
 53 | 	}
 54 | }
 55 | 
 56 | func TestReadLIRS(t *testing.T) {
 57 | 	f, err := os.Open("./gli.lirs.gz")
 58 | 	if err != nil {
 59 | 		t.Fatal(err)
 60 | 	}
 61 | 	r, err := gzip.NewReader(f)
 62 | 	if err != nil {
 63 | 		t.Fatal(err)
 64 | 	}
 65 | 	s := NewReader(ParseLIRS, r)
 66 | 	for i := uint64(0); i < 100; i++ {
 67 | 		if _, err = s(); err != nil {
 68 | 			t.Fatal(err)
 69 | 		}
 70 | 	}
 71 | }
 72 | 
 73 | func TestParseARC(t *testing.T) {
 74 | 	s := NewReader(ParseARC, bytes.NewReader([]byte{
 75 | 		'1', '2', '7', ' ', '6', '4', ' ', '0', ' ', '0', '\r', '\n',
 76 | 		'1', '9', '1', ' ', '3', '6', ' ', '0', ' ', '0', '\r', '\n',
 77 | 	}))
 78 | 	for i := uint64(0); i < 100; i++ {
 79 | 		v, err := s()
 80 | 		if err != nil {
 81 | 			t.Fatal(err)
 82 | 		}
 83 | 		if v != 127+i {
 84 | 			t.Fatal("value mismatch")
 85 | 		}
 86 | 	}
 87 | }
 88 | 
 89 | func TestCollection(t *testing.T) {
 90 | 	s := NewUniform(100)
 91 | 	c := Collection(s, 100)
 92 | 	if len(c) != 100 {
 93 | 		t.Fatal("collection not full")
 94 | 	}
 95 | }
 96 | 
 97 | func TestStringCollection(t *testing.T) {
 98 | 	s := NewUniform(100)
 99 | 	c := StringCollection(s, 100)
100 | 	if len(c) != 100 {
101 | 		t.Fatal("string collection not full")
102 | 	}
103 | }
104 | 


--------------------------------------------------------------------------------
/sketch.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
  3 |  * SPDX-License-Identifier: Apache-2.0
  4 |  */
  5 | 
  6 | package ristretto
  7 | 
  8 | import (
  9 | 	"fmt"
 10 | 	"math/rand"
 11 | 	"time"
 12 | )
 13 | 
 14 | // cmSketch is a Count-Min sketch implementation with 4-bit counters, heavily
 15 | // based on Damian Gryski's CM4 [1].
 16 | //
 17 | // [1]: https://github.com/dgryski/go-tinylfu/blob/master/cm4.go
 18 | type cmSketch struct {
 19 | 	rows [cmDepth]cmRow
 20 | 	seed [cmDepth]uint64
 21 | 	mask uint64
 22 | }
 23 | 
 24 | const (
 25 | 	// cmDepth is the number of counter copies to store (think of it as rows).
 26 | 	cmDepth = 4
 27 | )
 28 | 
 29 | func newCmSketch(numCounters int64) *cmSketch {
 30 | 	if numCounters == 0 {
 31 | 		panic("cmSketch: bad numCounters")
 32 | 	}
 33 | 	// Get the next power of 2 for better cache performance.
 34 | 	numCounters = next2Power(numCounters)
 35 | 	sketch := &cmSketch{mask: uint64(numCounters - 1)}
 36 | 	// Initialize rows of counters and seeds.
 37 | 	// Cryptographic precision not needed
 38 | 	source := rand.New(rand.NewSource(time.Now().UnixNano())) //nolint:gosec
 39 | 	for i := 0; i < cmDepth; i++ {
 40 | 		sketch.seed[i] = source.Uint64()
 41 | 		sketch.rows[i] = newCmRow(numCounters)
 42 | 	}
 43 | 	return sketch
 44 | }
 45 | 
 46 | // Increment increments the count(ers) for the specified key.
 47 | func (s *cmSketch) Increment(hashed uint64) {
 48 | 	for i := range s.rows {
 49 | 		s.rows[i].increment((hashed ^ s.seed[i]) & s.mask)
 50 | 	}
 51 | }
 52 | 
 53 | // Estimate returns the value of the specified key.
 54 | func (s *cmSketch) Estimate(hashed uint64) int64 {
 55 | 	min := byte(255)
 56 | 	for i := range s.rows {
 57 | 		val := s.rows[i].get((hashed ^ s.seed[i]) & s.mask)
 58 | 		if val < min {
 59 | 			min = val
 60 | 		}
 61 | 	}
 62 | 	return int64(min)
 63 | }
 64 | 
 65 | // Reset halves all counter values.
 66 | func (s *cmSketch) Reset() {
 67 | 	for _, r := range s.rows {
 68 | 		r.reset()
 69 | 	}
 70 | }
 71 | 
 72 | // Clear zeroes all counters.
 73 | func (s *cmSketch) Clear() {
 74 | 	for _, r := range s.rows {
 75 | 		r.clear()
 76 | 	}
 77 | }
 78 | 
 79 | // cmRow is a row of bytes, with each byte holding two counters.
 80 | type cmRow []byte
 81 | 
 82 | func newCmRow(numCounters int64) cmRow {
 83 | 	return make(cmRow, numCounters/2)
 84 | }
 85 | 
 86 | func (r cmRow) get(n uint64) byte {
 87 | 	return (r[n/2] >> ((n & 1) * 4)) & 0x0f
 88 | }
 89 | 
 90 | func (r cmRow) increment(n uint64) {
 91 | 	// Index of the counter.
 92 | 	i := n / 2
 93 | 	// Shift distance (even 0, odd 4).
 94 | 	s := (n & 1) * 4
 95 | 	// Counter value.
 96 | 	v := (r[i] >> s) & 0x0f
 97 | 	// Only increment if not max value (overflow wrap is bad for LFU).
 98 | 	if v < 15 {
 99 | 		r[i] += 1 << s
100 | 	}
101 | }
102 | 
103 | func (r cmRow) reset() {
104 | 	// Halve each counter.
105 | 	for i := range r {
106 | 		r[i] = (r[i] >> 1) & 0x77
107 | 	}
108 | }
109 | 
110 | func (r cmRow) clear() {
111 | 	// Zero each counter.
112 | 	for i := range r {
113 | 		r[i] = 0
114 | 	}
115 | }
116 | 
117 | func (r cmRow) string() string {
118 | 	s := ""
119 | 	for i := uint64(0); i < uint64(len(r)*2); i++ {
120 | 		s += fmt.Sprintf("%02d ", (r[(i/2)]>>((i&1)*4))&0x0f)
121 | 	}
122 | 	s = s[:len(s)-1]
123 | 	return s
124 | }
125 | 
126 | // next2Power rounds x up to the next power of 2, if it's not already one.
127 | func next2Power(x int64) int64 {
128 | 	x--
129 | 	x |= x >> 1
130 | 	x |= x >> 2
131 | 	x |= x >> 4
132 | 	x |= x >> 8
133 | 	x |= x >> 16
134 | 	x |= x >> 32
135 | 	x++
136 | 	return x
137 | }
138 | 


--------------------------------------------------------------------------------
/sketch_test.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | 
 6 | package ristretto
 7 | 
 8 | import (
 9 | 	"testing"
10 | 
11 | 	"github.com/stretchr/testify/require"
12 | )
13 | 
14 | func TestSketch(t *testing.T) {
15 | 	defer func() {
16 | 		require.NotNil(t, recover())
17 | 	}()
18 | 
19 | 	s := newCmSketch(5)
20 | 	require.Equal(t, uint64(7), s.mask)
21 | 	newCmSketch(0)
22 | }
23 | 
24 | func TestSketchIncrement(t *testing.T) {
25 | 	s := newCmSketch(16)
26 | 	s.Increment(1)
27 | 	s.Increment(5)
28 | 	s.Increment(9)
29 | 	for i := 0; i < cmDepth; i++ {
30 | 		if s.rows[i].string() != s.rows[0].string() {
31 | 			break
32 | 		}
33 | 		require.False(t, i == cmDepth-1, "identical rows, bad seeding")
34 | 	}
35 | }
36 | 
37 | func TestSketchEstimate(t *testing.T) {
38 | 	s := newCmSketch(16)
39 | 	s.Increment(1)
40 | 	s.Increment(1)
41 | 	require.Equal(t, int64(2), s.Estimate(1))
42 | 	require.Equal(t, int64(0), s.Estimate(0))
43 | }
44 | 
45 | func TestSketchReset(t *testing.T) {
46 | 	s := newCmSketch(16)
47 | 	s.Increment(1)
48 | 	s.Increment(1)
49 | 	s.Increment(1)
50 | 	s.Increment(1)
51 | 	s.Reset()
52 | 	require.Equal(t, int64(2), s.Estimate(1))
53 | }
54 | 
55 | func TestSketchClear(t *testing.T) {
56 | 	s := newCmSketch(16)
57 | 	for i := 0; i < 16; i++ {
58 | 		s.Increment(uint64(i))
59 | 	}
60 | 	s.Clear()
61 | 	for i := 0; i < 16; i++ {
62 | 		require.Equal(t, int64(0), s.Estimate(uint64(i)))
63 | 	}
64 | }
65 | 
66 | func TestNext2Power(t *testing.T) {
67 | 	sz := 12 << 30
68 | 	szf := float64(sz) * 0.01
69 | 	val := int64(szf)
70 | 	t.Logf("szf = %.2f val = %d\n", szf, val)
71 | 	pow := next2Power(val)
72 | 	t.Logf("pow = %d. mult 4 = %d\n", pow, pow*4)
73 | }
74 | 
75 | func BenchmarkSketchIncrement(b *testing.B) {
76 | 	s := newCmSketch(16)
77 | 	b.SetBytes(1)
78 | 	for n := 0; n < b.N; n++ {
79 | 		s.Increment(1)
80 | 	}
81 | }
82 | 
83 | func BenchmarkSketchEstimate(b *testing.B) {
84 | 	s := newCmSketch(16)
85 | 	s.Increment(1)
86 | 	b.SetBytes(1)
87 | 	for n := 0; n < b.N; n++ {
88 | 		s.Estimate(1)
89 | 	}
90 | }
91 | 


--------------------------------------------------------------------------------
/store.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
  3 |  * SPDX-License-Identifier: Apache-2.0
  4 |  */
  5 | 
  6 | package ristretto
  7 | 
  8 | import (
  9 | 	"sync"
 10 | 	"time"
 11 | )
 12 | 
 13 | type updateFn[V any] func(cur, prev V) bool
 14 | 
 15 | // TODO: Do we need this to be a separate struct from Item?
 16 | type storeItem[V any] struct {
 17 | 	key        uint64
 18 | 	conflict   uint64
 19 | 	value      V
 20 | 	expiration time.Time
 21 | }
 22 | 
 23 | // store is the interface fulfilled by all hash map implementations in this
 24 | // file. Some hash map implementations are better suited for certain data
 25 | // distributions than others, so this allows us to abstract that out for use
 26 | // in Ristretto.
 27 | //
 28 | // Every store is safe for concurrent usage.
 29 | type store[V any] interface {
 30 | 	// Get returns the value associated with the key parameter.
 31 | 	Get(uint64, uint64) (V, bool)
 32 | 	// Expiration returns the expiration time for this key.
 33 | 	Expiration(uint64) time.Time
 34 | 	// Set adds the key-value pair to the Map or updates the value if it's
 35 | 	// already present. The key-value pair is passed as a pointer to an
 36 | 	// item object.
 37 | 	Set(*Item[V])
 38 | 	// Del deletes the key-value pair from the Map.
 39 | 	Del(uint64, uint64) (uint64, V)
 40 | 	// Update attempts to update the key with a new value and returns true if
 41 | 	// successful.
 42 | 	Update(*Item[V]) (V, bool)
 43 | 	// Cleanup removes items that have an expired TTL.
 44 | 	Cleanup(policy *defaultPolicy[V], onEvict func(item *Item[V]))
 45 | 	// Clear clears all contents of the store.
 46 | 	Clear(onEvict func(item *Item[V]))
 47 | 	SetShouldUpdateFn(f updateFn[V])
 48 | }
 49 | 
 50 | // newStore returns the default store implementation.
 51 | func newStore[V any]() store[V] {
 52 | 	return newShardedMap[V]()
 53 | }
 54 | 
 55 | const numShards uint64 = 256
 56 | 
 57 | type shardedMap[V any] struct {
 58 | 	shards    []*lockedMap[V]
 59 | 	expiryMap *expirationMap[V]
 60 | }
 61 | 
 62 | func newShardedMap[V any]() *shardedMap[V] {
 63 | 	sm := &shardedMap[V]{
 64 | 		shards:    make([]*lockedMap[V], int(numShards)),
 65 | 		expiryMap: newExpirationMap[V](),
 66 | 	}
 67 | 	for i := range sm.shards {
 68 | 		sm.shards[i] = newLockedMap[V](sm.expiryMap)
 69 | 	}
 70 | 	return sm
 71 | }
 72 | 
 73 | func (m *shardedMap[V]) SetShouldUpdateFn(f updateFn[V]) {
 74 | 	for i := range m.shards {
 75 | 		m.shards[i].setShouldUpdateFn(f)
 76 | 	}
 77 | }
 78 | 
 79 | func (sm *shardedMap[V]) Get(key, conflict uint64) (V, bool) {
 80 | 	return sm.shards[key%numShards].get(key, conflict)
 81 | }
 82 | 
 83 | func (sm *shardedMap[V]) Expiration(key uint64) time.Time {
 84 | 	return sm.shards[key%numShards].Expiration(key)
 85 | }
 86 | 
 87 | func (sm *shardedMap[V]) Set(i *Item[V]) {
 88 | 	if i == nil {
 89 | 		// If item is nil make this Set a no-op.
 90 | 		return
 91 | 	}
 92 | 
 93 | 	sm.shards[i.Key%numShards].Set(i)
 94 | }
 95 | 
 96 | func (sm *shardedMap[V]) Del(key, conflict uint64) (uint64, V) {
 97 | 	return sm.shards[key%numShards].Del(key, conflict)
 98 | }
 99 | 
100 | func (sm *shardedMap[V]) Update(newItem *Item[V]) (V, bool) {
101 | 	return sm.shards[newItem.Key%numShards].Update(newItem)
102 | }
103 | 
104 | func (sm *shardedMap[V]) Cleanup(policy *defaultPolicy[V], onEvict func(item *Item[V])) {
105 | 	sm.expiryMap.cleanup(sm, policy, onEvict)
106 | }
107 | 
108 | func (sm *shardedMap[V]) Clear(onEvict func(item *Item[V])) {
109 | 	for i := uint64(0); i < numShards; i++ {
110 | 		sm.shards[i].Clear(onEvict)
111 | 	}
112 | 	sm.expiryMap.clear()
113 | }
114 | 
115 | type lockedMap[V any] struct {
116 | 	sync.RWMutex
117 | 	data         map[uint64]storeItem[V]
118 | 	em           *expirationMap[V]
119 | 	shouldUpdate updateFn[V]
120 | }
121 | 
122 | func newLockedMap[V any](em *expirationMap[V]) *lockedMap[V] {
123 | 	return &lockedMap[V]{
124 | 		data: make(map[uint64]storeItem[V]),
125 | 		em:   em,
126 | 		shouldUpdate: func(cur, prev V) bool {
127 | 			return true
128 | 		},
129 | 	}
130 | }
131 | 
132 | func (m *lockedMap[V]) setShouldUpdateFn(f updateFn[V]) {
133 | 	m.shouldUpdate = f
134 | }
135 | 
136 | func (m *lockedMap[V]) get(key, conflict uint64) (V, bool) {
137 | 	m.RLock()
138 | 	item, ok := m.data[key]
139 | 	m.RUnlock()
140 | 	if !ok {
141 | 		return zeroValue[V](), false
142 | 	}
143 | 	if conflict != 0 && (conflict != item.conflict) {
144 | 		return zeroValue[V](), false
145 | 	}
146 | 
147 | 	// Handle expired items.
148 | 	if !item.expiration.IsZero() && time.Now().After(item.expiration) {
149 | 		return zeroValue[V](), false
150 | 	}
151 | 	return item.value, true
152 | }
153 | 
154 | func (m *lockedMap[V]) Expiration(key uint64) time.Time {
155 | 	m.RLock()
156 | 	defer m.RUnlock()
157 | 	return m.data[key].expiration
158 | }
159 | 
160 | func (m *lockedMap[V]) Set(i *Item[V]) {
161 | 	if i == nil {
162 | 		// If the item is nil make this Set a no-op.
163 | 		return
164 | 	}
165 | 
166 | 	m.Lock()
167 | 	defer m.Unlock()
168 | 	item, ok := m.data[i.Key]
169 | 
170 | 	if ok {
171 | 		// The item existed already. We need to check the conflict key and reject the
172 | 		// update if they do not match. Only after that the expiration map is updated.
173 | 		if i.Conflict != 0 && (i.Conflict != item.conflict) {
174 | 			return
175 | 		}
176 | 		if m.shouldUpdate != nil && !m.shouldUpdate(i.Value, item.value) {
177 | 			return
178 | 		}
179 | 		m.em.update(i.Key, i.Conflict, item.expiration, i.Expiration)
180 | 	} else {
181 | 		// The value is not in the map already. There's no need to return anything.
182 | 		// Simply add the expiration map.
183 | 		m.em.add(i.Key, i.Conflict, i.Expiration)
184 | 	}
185 | 
186 | 	m.data[i.Key] = storeItem[V]{
187 | 		key:        i.Key,
188 | 		conflict:   i.Conflict,
189 | 		value:      i.Value,
190 | 		expiration: i.Expiration,
191 | 	}
192 | }
193 | 
194 | func (m *lockedMap[V]) Del(key, conflict uint64) (uint64, V) {
195 | 	m.Lock()
196 | 	defer m.Unlock()
197 | 	item, ok := m.data[key]
198 | 	if !ok {
199 | 		return 0, zeroValue[V]()
200 | 	}
201 | 	if conflict != 0 && (conflict != item.conflict) {
202 | 		return 0, zeroValue[V]()
203 | 	}
204 | 
205 | 	if !item.expiration.IsZero() {
206 | 		m.em.del(key, item.expiration)
207 | 	}
208 | 
209 | 	delete(m.data, key)
210 | 	return item.conflict, item.value
211 | }
212 | 
213 | func (m *lockedMap[V]) Update(newItem *Item[V]) (V, bool) {
214 | 	m.Lock()
215 | 	defer m.Unlock()
216 | 	item, ok := m.data[newItem.Key]
217 | 	if !ok {
218 | 		return zeroValue[V](), false
219 | 	}
220 | 	if newItem.Conflict != 0 && (newItem.Conflict != item.conflict) {
221 | 		return zeroValue[V](), false
222 | 	}
223 | 	if m.shouldUpdate != nil && !m.shouldUpdate(newItem.Value, item.value) {
224 | 		return item.value, false
225 | 	}
226 | 
227 | 	m.em.update(newItem.Key, newItem.Conflict, item.expiration, newItem.Expiration)
228 | 	m.data[newItem.Key] = storeItem[V]{
229 | 		key:        newItem.Key,
230 | 		conflict:   newItem.Conflict,
231 | 		value:      newItem.Value,
232 | 		expiration: newItem.Expiration,
233 | 	}
234 | 
235 | 	return item.value, true
236 | }
237 | 
238 | func (m *lockedMap[V]) Clear(onEvict func(item *Item[V])) {
239 | 	m.Lock()
240 | 	defer m.Unlock()
241 | 	i := &Item[V]{}
242 | 	if onEvict != nil {
243 | 		for _, si := range m.data {
244 | 			i.Key = si.key
245 | 			i.Conflict = si.conflict
246 | 			i.Value = si.value
247 | 			onEvict(i)
248 | 		}
249 | 	}
250 | 	m.data = make(map[uint64]storeItem[V])
251 | }
252 | 


--------------------------------------------------------------------------------
/store_test.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
  3 |  * SPDX-License-Identifier: Apache-2.0
  4 |  */
  5 | 
  6 | package ristretto
  7 | 
  8 | import (
  9 | 	"testing"
 10 | 	"time"
 11 | 
 12 | 	"github.com/dgraph-io/ristretto/v2/z"
 13 | 	"github.com/stretchr/testify/require"
 14 | )
 15 | 
 16 | func TestStoreSetGet(t *testing.T) {
 17 | 	s := newStore[int]()
 18 | 	key, conflict := z.KeyToHash(1)
 19 | 	i := Item[int]{
 20 | 		Key:      key,
 21 | 		Conflict: conflict,
 22 | 		Value:    2,
 23 | 	}
 24 | 	s.Set(&i)
 25 | 	val, ok := s.Get(key, conflict)
 26 | 	require.True(t, ok)
 27 | 	require.Equal(t, 2, val)
 28 | 
 29 | 	i.Value = 3
 30 | 	s.Set(&i)
 31 | 	val, ok = s.Get(key, conflict)
 32 | 	require.True(t, ok)
 33 | 	require.Equal(t, 3, val)
 34 | 
 35 | 	key, conflict = z.KeyToHash(2)
 36 | 	i = Item[int]{
 37 | 		Key:      key,
 38 | 		Conflict: conflict,
 39 | 		Value:    2,
 40 | 	}
 41 | 	s.Set(&i)
 42 | 	val, ok = s.Get(key, conflict)
 43 | 	require.True(t, ok)
 44 | 	require.Equal(t, 2, val)
 45 | }
 46 | 
 47 | func TestStoreDel(t *testing.T) {
 48 | 	s := newStore[int]()
 49 | 	key, conflict := z.KeyToHash(1)
 50 | 	i := Item[int]{
 51 | 		Key:      key,
 52 | 		Conflict: conflict,
 53 | 		Value:    1,
 54 | 	}
 55 | 	s.Set(&i)
 56 | 	s.Del(key, conflict)
 57 | 	val, ok := s.Get(key, conflict)
 58 | 	require.False(t, ok)
 59 | 	require.Empty(t, val)
 60 | 
 61 | 	s.Del(2, 0)
 62 | }
 63 | 
 64 | func TestStoreClear(t *testing.T) {
 65 | 	s := newStore[uint64]()
 66 | 	for i := uint64(0); i < 1000; i++ {
 67 | 		key, conflict := z.KeyToHash(i)
 68 | 		it := Item[uint64]{
 69 | 			Key:      key,
 70 | 			Conflict: conflict,
 71 | 			Value:    i,
 72 | 		}
 73 | 		s.Set(&it)
 74 | 	}
 75 | 	s.Clear(nil)
 76 | 	for i := uint64(0); i < 1000; i++ {
 77 | 		key, conflict := z.KeyToHash(i)
 78 | 		val, ok := s.Get(key, conflict)
 79 | 		require.False(t, ok)
 80 | 		require.Empty(t, val)
 81 | 	}
 82 | }
 83 | 
 84 | func TestShouldUpdate(t *testing.T) {
 85 | 	// Create a should update function where the value only increases.
 86 | 	s := newStore[int]()
 87 | 	s.SetShouldUpdateFn(func(cur, prev int) bool {
 88 | 		return cur > prev
 89 | 	})
 90 | 
 91 | 	key, conflict := z.KeyToHash(1)
 92 | 	i := Item[int]{
 93 | 		Key:      key,
 94 | 		Conflict: conflict,
 95 | 		Value:    2,
 96 | 	}
 97 | 	s.Set(&i)
 98 | 	i.Value = 1
 99 | 	_, ok := s.Update(&i)
100 | 	require.False(t, ok)
101 | 
102 | 	i.Value = 3
103 | 	_, ok = s.Update(&i)
104 | 	require.True(t, ok)
105 | }
106 | 
107 | func TestStoreUpdate(t *testing.T) {
108 | 	s := newStore[int]()
109 | 	key, conflict := z.KeyToHash(1)
110 | 	i := Item[int]{
111 | 		Key:      key,
112 | 		Conflict: conflict,
113 | 		Value:    1,
114 | 	}
115 | 	s.Set(&i)
116 | 	i.Value = 2
117 | 	_, ok := s.Update(&i)
118 | 	require.True(t, ok)
119 | 
120 | 	val, ok := s.Get(key, conflict)
121 | 	require.True(t, ok)
122 | 	require.NotNil(t, val)
123 | 
124 | 	val, ok = s.Get(key, conflict)
125 | 	require.True(t, ok)
126 | 	require.Equal(t, 2, val)
127 | 
128 | 	i.Value = 3
129 | 	_, ok = s.Update(&i)
130 | 	require.True(t, ok)
131 | 
132 | 	val, ok = s.Get(key, conflict)
133 | 	require.True(t, ok)
134 | 	require.Equal(t, 3, val)
135 | 
136 | 	key, conflict = z.KeyToHash(2)
137 | 	i = Item[int]{
138 | 		Key:      key,
139 | 		Conflict: conflict,
140 | 		Value:    2,
141 | 	}
142 | 	_, ok = s.Update(&i)
143 | 	require.False(t, ok)
144 | 	val, ok = s.Get(key, conflict)
145 | 	require.False(t, ok)
146 | 	require.Empty(t, val)
147 | }
148 | 
149 | func TestStoreCollision(t *testing.T) {
150 | 	s := newShardedMap[int]()
151 | 	s.shards[1].Lock()
152 | 	s.shards[1].data[1] = storeItem[int]{
153 | 		key:      1,
154 | 		conflict: 0,
155 | 		value:    1,
156 | 	}
157 | 	s.shards[1].Unlock()
158 | 	val, ok := s.Get(1, 1)
159 | 	require.False(t, ok)
160 | 	require.Empty(t, val)
161 | 
162 | 	i := Item[int]{
163 | 		Key:      1,
164 | 		Conflict: 1,
165 | 		Value:    2,
166 | 	}
167 | 	s.Set(&i)
168 | 	val, ok = s.Get(1, 0)
169 | 	require.True(t, ok)
170 | 	require.NotEqual(t, 2, val)
171 | 
172 | 	_, ok = s.Update(&i)
173 | 	require.False(t, ok)
174 | 	val, ok = s.Get(1, 0)
175 | 	require.True(t, ok)
176 | 	require.NotEqual(t, 2, val)
177 | 
178 | 	s.Del(1, 1)
179 | 	val, ok = s.Get(1, 0)
180 | 	require.True(t, ok)
181 | 	require.NotEmpty(t, val)
182 | }
183 | 
184 | func TestStoreExpiration(t *testing.T) {
185 | 	s := newStore[int]()
186 | 	key, conflict := z.KeyToHash(1)
187 | 	expiration := time.Now().Add(time.Second)
188 | 	i := Item[int]{
189 | 		Key:        key,
190 | 		Conflict:   conflict,
191 | 		Value:      1,
192 | 		Expiration: expiration,
193 | 	}
194 | 	s.Set(&i)
195 | 	val, ok := s.Get(key, conflict)
196 | 	require.True(t, ok)
197 | 	require.Equal(t, 1, val)
198 | 
199 | 	ttl := s.Expiration(key)
200 | 	require.Equal(t, expiration, ttl)
201 | 
202 | 	s.Del(key, conflict)
203 | 
204 | 	_, ok = s.Get(key, conflict)
205 | 	require.False(t, ok)
206 | 	require.True(t, s.Expiration(key).IsZero())
207 | 
208 | 	// missing item
209 | 	key, _ = z.KeyToHash(4340958203495)
210 | 	ttl = s.Expiration(key)
211 | 	require.True(t, ttl.IsZero())
212 | }
213 | 
214 | func BenchmarkStoreGet(b *testing.B) {
215 | 	s := newStore[int]()
216 | 	key, conflict := z.KeyToHash(1)
217 | 	i := Item[int]{
218 | 		Key:      key,
219 | 		Conflict: conflict,
220 | 		Value:    1,
221 | 	}
222 | 	s.Set(&i)
223 | 	b.SetBytes(1)
224 | 	b.RunParallel(func(pb *testing.PB) {
225 | 		for pb.Next() {
226 | 			s.Get(key, conflict)
227 | 		}
228 | 	})
229 | }
230 | 
231 | func BenchmarkStoreSet(b *testing.B) {
232 | 	s := newStore[int]()
233 | 	key, conflict := z.KeyToHash(1)
234 | 	b.SetBytes(1)
235 | 	b.RunParallel(func(pb *testing.PB) {
236 | 		for pb.Next() {
237 | 			i := Item[int]{
238 | 				Key:      key,
239 | 				Conflict: conflict,
240 | 				Value:    1,
241 | 			}
242 | 			s.Set(&i)
243 | 		}
244 | 	})
245 | }
246 | 
247 | func BenchmarkStoreUpdate(b *testing.B) {
248 | 	s := newStore[int]()
249 | 	key, conflict := z.KeyToHash(1)
250 | 	i := Item[int]{
251 | 		Key:      key,
252 | 		Conflict: conflict,
253 | 		Value:    1,
254 | 	}
255 | 	s.Set(&i)
256 | 	b.SetBytes(1)
257 | 	b.RunParallel(func(pb *testing.PB) {
258 | 		for pb.Next() {
259 | 			s.Update(&Item[int]{
260 | 				Key:      key,
261 | 				Conflict: conflict,
262 | 				Value:    2,
263 | 			})
264 | 		}
265 | 	})
266 | }
267 | 


--------------------------------------------------------------------------------
/stress_test.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
  3 |  * SPDX-License-Identifier: Apache-2.0
  4 |  */
  5 | 
  6 | package ristretto
  7 | 
  8 | import (
  9 | 	"container/heap"
 10 | 	"fmt"
 11 | 	"math/rand"
 12 | 	"runtime"
 13 | 	"sync"
 14 | 	"testing"
 15 | 	"time"
 16 | 
 17 | 	"github.com/dgraph-io/ristretto/v2/sim"
 18 | 	"github.com/stretchr/testify/require"
 19 | )
 20 | 
 21 | func TestStressSetGet(t *testing.T) {
 22 | 	c, err := NewCache(&Config[int, int]{
 23 | 		NumCounters:        1000,
 24 | 		MaxCost:            100,
 25 | 		IgnoreInternalCost: true,
 26 | 		BufferItems:        64,
 27 | 		Metrics:            true,
 28 | 	})
 29 | 	require.NoError(t, err)
 30 | 
 31 | 	for i := 0; i < 100; i++ {
 32 | 		c.Set(i, i, 1)
 33 | 	}
 34 | 	time.Sleep(wait)
 35 | 	wg := &sync.WaitGroup{}
 36 | 	for i := 0; i < runtime.GOMAXPROCS(0); i++ {
 37 | 		wg.Add(1)
 38 | 		go func() {
 39 | 			r := rand.New(rand.NewSource(time.Now().UnixNano()))
 40 | 			for a := 0; a < 1000; a++ {
 41 | 				k := r.Int() % 10
 42 | 				if val, ok := c.Get(k); !ok {
 43 | 					err = fmt.Errorf("expected %d but got nil", k)
 44 | 					break
 45 | 				} else if val != 0 && val != k {
 46 | 					err = fmt.Errorf("expected %d but got %d", k, val)
 47 | 					break
 48 | 				}
 49 | 			}
 50 | 			wg.Done()
 51 | 		}()
 52 | 	}
 53 | 	wg.Wait()
 54 | 	require.NoError(t, err)
 55 | 	require.Equal(t, 1.0, c.Metrics.Ratio())
 56 | }
 57 | 
 58 | func TestStressHitRatio(t *testing.T) {
 59 | 	key := sim.NewZipfian(1.0001, 1, 1000)
 60 | 	c, err := NewCache(&Config[uint64, uint64]{
 61 | 		NumCounters: 1000,
 62 | 		MaxCost:     100,
 63 | 		BufferItems: 64,
 64 | 		Metrics:     true,
 65 | 	})
 66 | 	require.NoError(t, err)
 67 | 
 68 | 	o := NewClairvoyant(100)
 69 | 	for i := 0; i < 10000; i++ {
 70 | 		k, err := key()
 71 | 		require.NoError(t, err)
 72 | 
 73 | 		if _, ok := o.Get(k); !ok {
 74 | 			o.Set(k, k, 1)
 75 | 		}
 76 | 		if _, ok := c.Get(k); !ok {
 77 | 			c.Set(k, k, 1)
 78 | 		}
 79 | 	}
 80 | 	t.Logf("actual: %.2f, optimal: %.2f", c.Metrics.Ratio(), o.Metrics().Ratio())
 81 | }
 82 | 
 83 | // Clairvoyant is a mock cache providing us with optimal hit ratios to compare
 84 | // with Ristretto's. It looks ahead and evicts the absolute least valuable item,
 85 | // which we try to approximate in a real cache.
 86 | type Clairvoyant struct {
 87 | 	capacity uint64
 88 | 	hits     map[uint64]uint64
 89 | 	access   []uint64
 90 | }
 91 | 
 92 | func NewClairvoyant(capacity uint64) *Clairvoyant {
 93 | 	return &Clairvoyant{
 94 | 		capacity: capacity,
 95 | 		hits:     make(map[uint64]uint64),
 96 | 		access:   make([]uint64, 0),
 97 | 	}
 98 | }
 99 | 
100 | // Get just records the cache access so that we can later take this event into
101 | // consideration when calculating the absolute least valuable item to evict.
102 | func (c *Clairvoyant) Get(key interface{}) (interface{}, bool) {
103 | 	c.hits[key.(uint64)]++
104 | 	c.access = append(c.access, key.(uint64))
105 | 	return nil, false
106 | }
107 | 
108 | // Set isn't important because it is only called after a Get (in the case of our
109 | // hit ratio benchmarks, at least).
110 | func (c *Clairvoyant) Set(key, value interface{}, cost int64) bool {
111 | 	return false
112 | }
113 | 
114 | func (c *Clairvoyant) Metrics() *Metrics {
115 | 	stat := newMetrics()
116 | 	look := make(map[uint64]struct{}, c.capacity)
117 | 	data := &clairvoyantHeap{}
118 | 	heap.Init(data)
119 | 	for _, key := range c.access {
120 | 		if _, has := look[key]; has {
121 | 			stat.add(hit, 0, 1)
122 | 			continue
123 | 		}
124 | 		if uint64(data.Len()) >= c.capacity {
125 | 			victim := heap.Pop(data)
126 | 			delete(look, victim.(*clairvoyantItem).key)
127 | 		}
128 | 		stat.add(miss, 0, 1)
129 | 		look[key] = struct{}{}
130 | 		heap.Push(data, &clairvoyantItem{key, c.hits[key]})
131 | 	}
132 | 	return stat
133 | }
134 | 
135 | type clairvoyantItem struct {
136 | 	key  uint64
137 | 	hits uint64
138 | }
139 | 
140 | type clairvoyantHeap []*clairvoyantItem
141 | 
142 | func (h clairvoyantHeap) Len() int           { return len(h) }
143 | func (h clairvoyantHeap) Less(i, j int) bool { return h[i].hits < h[j].hits }
144 | func (h clairvoyantHeap) Swap(i, j int)      { h[i], h[j] = h[j], h[i] }
145 | 
146 | func (h *clairvoyantHeap) Push(x interface{}) {
147 | 	*h = append(*h, x.(*clairvoyantItem))
148 | }
149 | 
150 | func (h *clairvoyantHeap) Pop() interface{} {
151 | 	old := *h
152 | 	n := len(old)
153 | 	x := old[n-1]
154 | 	*h = old[0 : n-1]
155 | 	return x
156 | }
157 | 


--------------------------------------------------------------------------------
/ttl.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
  3 |  * SPDX-License-Identifier: Apache-2.0
  4 |  */
  5 | 
  6 | package ristretto
  7 | 
  8 | import (
  9 | 	"sync"
 10 | 	"time"
 11 | )
 12 | 
 13 | var (
 14 | 	// TODO: find the optimal value or make it configurable.
 15 | 	bucketDurationSecs = int64(5)
 16 | )
 17 | 
 18 | func storageBucket(t time.Time) int64 {
 19 | 	return (t.Unix() / bucketDurationSecs) + 1
 20 | }
 21 | 
 22 | func cleanupBucket(t time.Time) int64 {
 23 | 	// The bucket to cleanup is always behind the storage bucket by one so that
 24 | 	// no elements in that bucket (which might not have expired yet) are deleted.
 25 | 	return storageBucket(t) - 1
 26 | }
 27 | 
 28 | // bucket type is a map of key to conflict.
 29 | type bucket map[uint64]uint64
 30 | 
 31 | // expirationMap is a map of bucket number to the corresponding bucket.
 32 | type expirationMap[V any] struct {
 33 | 	sync.RWMutex
 34 | 	buckets              map[int64]bucket
 35 | 	lastCleanedBucketNum int64
 36 | }
 37 | 
 38 | func newExpirationMap[V any]() *expirationMap[V] {
 39 | 	return &expirationMap[V]{
 40 | 		buckets:              make(map[int64]bucket),
 41 | 		lastCleanedBucketNum: cleanupBucket(time.Now()),
 42 | 	}
 43 | }
 44 | 
 45 | func (m *expirationMap[_]) add(key, conflict uint64, expiration time.Time) {
 46 | 	if m == nil {
 47 | 		return
 48 | 	}
 49 | 
 50 | 	// Items that don't expire don't need to be in the expiration map.
 51 | 	if expiration.IsZero() {
 52 | 		return
 53 | 	}
 54 | 
 55 | 	bucketNum := storageBucket(expiration)
 56 | 	m.Lock()
 57 | 	defer m.Unlock()
 58 | 
 59 | 	b, ok := m.buckets[bucketNum]
 60 | 	if !ok {
 61 | 		b = make(bucket)
 62 | 		m.buckets[bucketNum] = b
 63 | 	}
 64 | 	b[key] = conflict
 65 | }
 66 | 
 67 | func (m *expirationMap[_]) update(key, conflict uint64, oldExpTime, newExpTime time.Time) {
 68 | 	if m == nil {
 69 | 		return
 70 | 	}
 71 | 
 72 | 	m.Lock()
 73 | 	defer m.Unlock()
 74 | 
 75 | 	oldBucketNum := storageBucket(oldExpTime)
 76 | 	oldBucket, ok := m.buckets[oldBucketNum]
 77 | 	if ok {
 78 | 		delete(oldBucket, key)
 79 | 	}
 80 | 
 81 | 	// Items that don't expire don't need to be in the expiration map.
 82 | 	if newExpTime.IsZero() {
 83 | 		return
 84 | 	}
 85 | 
 86 | 	newBucketNum := storageBucket(newExpTime)
 87 | 	newBucket, ok := m.buckets[newBucketNum]
 88 | 	if !ok {
 89 | 		newBucket = make(bucket)
 90 | 		m.buckets[newBucketNum] = newBucket
 91 | 	}
 92 | 	newBucket[key] = conflict
 93 | }
 94 | 
 95 | func (m *expirationMap[_]) del(key uint64, expiration time.Time) {
 96 | 	if m == nil {
 97 | 		return
 98 | 	}
 99 | 
100 | 	bucketNum := storageBucket(expiration)
101 | 	m.Lock()
102 | 	defer m.Unlock()
103 | 	_, ok := m.buckets[bucketNum]
104 | 	if !ok {
105 | 		return
106 | 	}
107 | 	delete(m.buckets[bucketNum], key)
108 | }
109 | 
110 | // cleanup removes all the items in the bucket that was just completed. It deletes
111 | // those items from the store, and calls the onEvict function on those items.
112 | // This function is meant to be called periodically.
113 | func (m *expirationMap[V]) cleanup(store store[V], policy *defaultPolicy[V], onEvict func(item *Item[V])) int {
114 | 	if m == nil {
115 | 		return 0
116 | 	}
117 | 
118 | 	m.Lock()
119 | 	now := time.Now()
120 | 	currentBucketNum := cleanupBucket(now)
121 | 	// Clean up all buckets up to and including currentBucketNum, starting from
122 | 	// (but not including) the last one that was cleaned up
123 | 	var buckets []bucket
124 | 	for bucketNum := m.lastCleanedBucketNum + 1; bucketNum <= currentBucketNum; bucketNum++ {
125 | 		// With an empty bucket, we don't need to add it to the Clean list
126 | 		if b := m.buckets[bucketNum]; b != nil {
127 | 			buckets = append(buckets, b)
128 | 		}
129 | 		delete(m.buckets, bucketNum)
130 | 	}
131 | 	m.lastCleanedBucketNum = currentBucketNum
132 | 	m.Unlock()
133 | 
134 | 	for _, keys := range buckets {
135 | 		for key, conflict := range keys {
136 | 			expr := store.Expiration(key)
137 | 			// Sanity check. Verify that the store agrees that this key is expired.
138 | 			if expr.After(now) {
139 | 				continue
140 | 			}
141 | 
142 | 			cost := policy.Cost(key)
143 | 			policy.Del(key)
144 | 			_, value := store.Del(key, conflict)
145 | 
146 | 			if onEvict != nil {
147 | 				onEvict(&Item[V]{Key: key,
148 | 					Conflict:   conflict,
149 | 					Value:      value,
150 | 					Cost:       cost,
151 | 					Expiration: expr,
152 | 				})
153 | 			}
154 | 		}
155 | 	}
156 | 
157 | 	cleanedBucketsCount := len(buckets)
158 | 
159 | 	return cleanedBucketsCount
160 | }
161 | 
162 | // clear clears the expirationMap, the caller is responsible for properly
163 | // evicting the referenced items
164 | func (m *expirationMap[V]) clear() {
165 | 	if m == nil {
166 | 		return
167 | 	}
168 | 
169 | 	m.Lock()
170 | 	m.buckets = make(map[int64]bucket)
171 | 	m.lastCleanedBucketNum = cleanupBucket(time.Now())
172 | 	m.Unlock()
173 | }
174 | 


--------------------------------------------------------------------------------
/ttl_test.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | 
 6 | package ristretto
 7 | 
 8 | import (
 9 | 	"testing"
10 | 	"time"
11 | 
12 | 	"github.com/stretchr/testify/require"
13 | )
14 | 
15 | // TestExpirationMapCleanup tests the cleanup functionality of the expiration map.
16 | // It verifies that expired items are correctly evicted from the store and that
17 | // non-expired items remain in the store.
18 | func TestExpirationMapCleanup(t *testing.T) {
19 | 	// Create a new expiration map
20 | 	em := newExpirationMap[int]()
21 | 	// Create a new store
22 | 	s := newShardedMap[int]()
23 | 	// Create a new policy
24 | 	p := newDefaultPolicy[int](100, 10)
25 | 
26 | 	// Add items to the store and expiration map
27 | 	now := time.Now()
28 | 	i1 := &Item[int]{Key: 1, Conflict: 1, Value: 100, Expiration: now.Add(1 * time.Second)}
29 | 	s.Set(i1)
30 | 	em.add(i1.Key, i1.Conflict, i1.Expiration)
31 | 
32 | 	i2 := &Item[int]{Key: 2, Conflict: 2, Value: 200, Expiration: now.Add(3 * time.Second)}
33 | 	s.Set(i2)
34 | 	em.add(i2.Key, i2.Conflict, i2.Expiration)
35 | 
36 | 	// Create a map to store evicted items
37 | 	evictedItems := make(map[uint64]int)
38 | 	evictedItemsOnEvictFunc := func(item *Item[int]) {
39 | 		evictedItems[item.Key] = item.Value
40 | 	}
41 | 
42 | 	// Wait for the first item to expire
43 | 	time.Sleep(2 * time.Second)
44 | 
45 | 	// Cleanup the expiration map
46 | 	cleanedBucketsCount := em.cleanup(s, p, evictedItemsOnEvictFunc)
47 | 	require.Equal(t, 1, cleanedBucketsCount, "cleanedBucketsCount should be 1 after first cleanup")
48 | 
49 | 	// Check that the first item was evicted
50 | 	require.Equal(t, 1, len(evictedItems), "evictedItems should have 1 item")
51 | 	require.Equal(t, 100, evictedItems[1], "evictedItems should have the first item")
52 | 	_, ok := s.Get(i1.Key, i1.Conflict)
53 | 	require.False(t, ok, "i1 should have been evicted")
54 | 
55 | 	// Check that the second item is still in the store
56 | 	_, ok = s.Get(i2.Key, i2.Conflict)
57 | 	require.True(t, ok, "i2 should still be in the store")
58 | 
59 | 	// Wait for the second item to expire
60 | 	time.Sleep(2 * time.Second)
61 | 
62 | 	// Cleanup the expiration map
63 | 	cleanedBucketsCount = em.cleanup(s, p, evictedItemsOnEvictFunc)
64 | 	require.Equal(t, 1, cleanedBucketsCount, "cleanedBucketsCount should be 1 after second cleanup")
65 | 
66 | 	// Check that the second item was evicted
67 | 	require.Equal(t, 2, len(evictedItems), "evictedItems should have 2 items")
68 | 	require.Equal(t, 200, evictedItems[2], "evictedItems should have the second item")
69 | 	_, ok = s.Get(i2.Key, i2.Conflict)
70 | 	require.False(t, ok, "i2 should have been evicted")
71 | 
72 | 	t.Run("Miscalculation of buckets does not cause memory leaks", func(t *testing.T) {
73 | 		// Break lastCleanedBucketNum, this can happen if the system time is changed.
74 | 		em.lastCleanedBucketNum = storageBucket(now.AddDate(-1, 0, 0))
75 | 
76 | 		cleanedBucketsCount = em.cleanup(s, p, evictedItemsOnEvictFunc)
77 | 		require.Equal(t,
78 | 			0, cleanedBucketsCount,
79 | 			"cleanedBucketsCount should be 0 after cleanup with lastCleanedBucketNum change",
80 | 		)
81 | 	})
82 | }
83 | 


--------------------------------------------------------------------------------
/z/LICENSE:
--------------------------------------------------------------------------------
 1 | bbloom.go
 2 | 
 3 | // The MIT License (MIT)
 4 | // Copyright (c) 2014 Andreas Briese, eduToolbox@Bri-C GmbH, Sarstedt
 5 | 
 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy of
 7 | // this software and associated documentation files (the "Software"), to deal in
 8 | // the Software without restriction, including without limitation the rights to
 9 | // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
10 | // the Software, and to permit persons to whom the Software is furnished to do so,
11 | // subject to the following conditions:
12 | 
13 | // The above copyright notice and this permission notice shall be included in all
14 | // copies or substantial portions of the Software.
15 | 
16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
18 | // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
19 | // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
20 | // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 | // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 | 
23 | rtutil.go
24 | 
25 | // MIT License
26 | 
27 | // Copyright (c) 2019 Ewan Chou
28 | 
29 | // Permission is hereby granted, free of charge, to any person obtaining a copy
30 | // of this software and associated documentation files (the "Software"), to deal
31 | // in the Software without restriction, including without limitation the rights
32 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
33 | // copies of the Software, and to permit persons to whom the Software is
34 | // furnished to do so, subject to the following conditions:
35 | 
36 | // The above copyright notice and this permission notice shall be included in all
37 | // copies or substantial portions of the Software.
38 | 
39 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
40 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
41 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
42 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
43 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
44 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
45 | // SOFTWARE.
46 | 
47 | Modifications:
48 | 
49 | /*
50 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
51 |  * SPDX-License-Identifier: Apache-2.0
52 |  */
53 | 


--------------------------------------------------------------------------------
/z/README.md:
--------------------------------------------------------------------------------
  1 | ## bbloom: a bitset Bloom filter for go/golang
  2 | 
  3 | ===
  4 | 
  5 | package implements a fast bloom filter with real 'bitset' and JSONMarshal/JSONUnmarshal to
  6 | store/reload the Bloom filter.
  7 | 
  8 | NOTE: the package uses unsafe.Pointer to set and read the bits from the bitset. If you're
  9 | uncomfortable with using the unsafe package, please consider using my bloom filter package at
 10 | github.com/AndreasBriese/bloom
 11 | 
 12 | ===
 13 | 
 14 | changelog 11/2015: new thread safe methods AddTS(), HasTS(), AddIfNotHasTS() following a suggestion
 15 | from Srdjan Marinovic (github @a-little-srdjan), who used this to code a bloomfilter cache.
 16 | 
 17 | This bloom filter was developed to strengthen a website-log database and was tested and optimized
 18 | for this log-entry mask: "2014/%02i/%02i %02i:%02i:%02i /info.html". Nonetheless bbloom should work
 19 | with any other form of entries.
 20 | 
 21 | ~~Hash function is a modified Berkeley DB sdbm hash (to optimize for smaller strings). sdbm
 22 | http://www.cse.yorku.ca/~oz/hash.html~~
 23 | 
 24 | Found sipHash (SipHash-2-4, a fast short-input PRF created by Jean-Philippe Aumasson and Daniel J.
 25 | Bernstein.) to be about as fast. sipHash had been ported by Dimtry Chestnyk to Go
 26 | (github.com/dchest/siphash )
 27 | 
 28 | Minimum hashset size is: 512 ([4]uint64; will be set automatically).
 29 | 
 30 | ### install
 31 | 
 32 | ```sh
 33 | go get github.com/AndreasBriese/bbloom
 34 | ```
 35 | 
 36 | ### test
 37 | 
 38 | - change to folder ../bbloom
 39 | - create wordlist in file "words.txt" (you might use `python permut.py`)
 40 | - run 'go test -bench=.' within the folder
 41 | 
 42 | ```go
 43 | go test -bench=.
 44 | ```
 45 | 
 46 | ~~If you've installed the GOCONVEY TDD-framework http://goconvey.co/ you can run the tests
 47 | automatically.~~
 48 | 
 49 | using go's testing framework now (have in mind that the op timing is related to 65536 operations of
 50 | Add, Has, AddIfNotHas respectively)
 51 | 
 52 | ### usage
 53 | 
 54 | after installation add
 55 | 
 56 | ```go
 57 | import (
 58 |   ...
 59 |   "github.com/AndreasBriese/bbloom"
 60 |   ...
 61 |   )
 62 | ```
 63 | 
 64 | at your header. In the program use
 65 | 
 66 | ```go
 67 | // create a bloom filter for 65536 items and 1 % wrong-positive ratio
 68 | bf := bbloom.New(float64(1<<16), float64(0.01))
 69 | 
 70 | // or
 71 | // create a bloom filter with 650000 for 65536 items and 7 locs per hash explicitly
 72 | // bf = bbloom.New(float64(650000), float64(7))
 73 | // or
 74 | bf = bbloom.New(650000.0, 7.0)
 75 | 
 76 | // add one item
 77 | bf.Add([]byte("butter"))
 78 | 
 79 | // Number of elements added is exposed now
 80 | // Note: ElemNum will not be included in JSON export (for compatability to older version)
 81 | nOfElementsInFilter := bf.ElemNum
 82 | 
 83 | // check if item is in the filter
 84 | isIn := bf.Has([]byte("butter"))    // should be true
 85 | isNotIn := bf.Has([]byte("Butter")) // should be false
 86 | 
 87 | // 'add only if item is new' to the bloomfilter
 88 | added := bf.AddIfNotHas([]byte("butter"))    // should be false because 'butter' is already in the set
 89 | added = bf.AddIfNotHas([]byte("buTTer"))    // should be true because 'buTTer' is new
 90 | 
 91 | // thread safe versions for concurrent use: AddTS, HasTS, AddIfNotHasTS
 92 | // add one item
 93 | bf.AddTS([]byte("peanutbutter"))
 94 | // check if item is in the filter
 95 | isIn = bf.HasTS([]byte("peanutbutter"))    // should be true
 96 | isNotIn = bf.HasTS([]byte("peanutButter")) // should be false
 97 | // 'add only if item is new' to the bloomfilter
 98 | added = bf.AddIfNotHasTS([]byte("butter"))    // should be false because 'peanutbutter' is already in the set
 99 | added = bf.AddIfNotHasTS([]byte("peanutbuTTer"))    // should be true because 'penutbuTTer' is new
100 | 
101 | // convert to JSON ([]byte)
102 | Json := bf.JSONMarshal()
103 | 
104 | // bloomfilters Mutex is exposed for external un-/locking
105 | // i.e. mutex lock while doing JSON conversion
106 | bf.Mtx.Lock()
107 | Json = bf.JSONMarshal()
108 | bf.Mtx.Unlock()
109 | 
110 | // restore a bloom filter from storage
111 | bfNew := bbloom.JSONUnmarshal(Json)
112 | 
113 | isInNew := bfNew.Has([]byte("butter"))    // should be true
114 | isNotInNew := bfNew.Has([]byte("Butter")) // should be false
115 | 
116 | ```
117 | 
118 | to work with the bloom filter.
119 | 
120 | ### why 'fast'?
121 | 
122 | It's about 3 times faster than William Fitzgeralds bitset bloom filter
123 | https://github.com/willf/bloom . And it is about so fast as my []bool set variant for Boom filters
124 | (see https://github.com/AndreasBriese/bloom ) but having a 8times smaller memory footprint:
125 | 
126 | ```sh
127 | Bloom filter (filter size 524288, 7 hashlocs)
128 | github.com/AndreasBriese/bbloom 'Add' 65536 items (10 repetitions): 6595800 ns (100 ns/op)
129 | github.com/AndreasBriese/bbloom 'Has' 65536 items (10 repetitions): 5986600 ns (91 ns/op)
130 | github.com/AndreasBriese/bloom 'Add' 65536 items (10 repetitions): 6304684 ns (96 ns/op)
131 | github.com/AndreasBriese/bloom 'Has' 65536 items (10 repetitions): 6568663 ns (100 ns/op)
132 | 
133 | github.com/willf/bloom 'Add' 65536 items (10 repetitions): 24367224 ns (371 ns/op)
134 | github.com/willf/bloom 'Test' 65536 items (10 repetitions): 21881142 ns (333 ns/op)
135 | github.com/dataence/bloom/standard 'Add' 65536 items (10 repetitions): 23041644 ns (351 ns/op)
136 | github.com/dataence/bloom/standard 'Check' 65536 items (10 repetitions): 19153133 ns (292 ns/op)
137 | github.com/cabello/bloom 'Add' 65536 items (10 repetitions): 131921507 ns (2012 ns/op)
138 | github.com/cabello/bloom 'Contains' 65536 items (10 repetitions): 131108962 ns (2000 ns/op)
139 | ```
140 | 
141 | (on MBPro15 OSX10.8.5 i7 4Core 2.4Ghz)
142 | 
143 | With 32bit bloom filters (bloom32) using modified sdbm, bloom32 does hashing with only 2 bit shifts,
144 | one xor and one substraction per byte. smdb is about as fast as fnv64a but gives less collisions
145 | with the dataset (see mask above). bloom.New(float64(10 \* 1<<16),float64(7)) populated with 1<<16
146 | random items from the dataset (see above) and tested against the rest results in less than 0.05%
147 | collisions.
148 | 


--------------------------------------------------------------------------------
/z/allocator.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
  3 |  * SPDX-License-Identifier: Apache-2.0
  4 |  */
  5 | 
  6 | package z
  7 | 
  8 | import (
  9 | 	"bytes"
 10 | 	"fmt"
 11 | 	"math"
 12 | 	"math/bits"
 13 | 	"math/rand"
 14 | 	"strings"
 15 | 	"sync"
 16 | 	"sync/atomic"
 17 | 	"time"
 18 | 	"unsafe"
 19 | 
 20 | 	"github.com/dustin/go-humanize"
 21 | )
 22 | 
 23 | // Allocator amortizes the cost of small allocations by allocating memory in
 24 | // bigger chunks.  Internally it uses z.Calloc to allocate memory. Once
 25 | // allocated, the memory is not moved, so it is safe to use the allocated bytes
 26 | // to unsafe cast them to Go struct pointers. Maintaining a freelist is slow.
 27 | // Instead, Allocator only allocates memory, with the idea that finally we
 28 | // would just release the entire Allocator.
 29 | type Allocator struct {
 30 | 	sync.Mutex
 31 | 	compIdx uint64 // Stores bufIdx in 32 MSBs and posIdx in 32 LSBs.
 32 | 	buffers [][]byte
 33 | 	Ref     uint64
 34 | 	Tag     string
 35 | }
 36 | 
 37 | // allocs keeps references to all Allocators, so we can safely discard them later.
 38 | var allocsMu *sync.Mutex
 39 | var allocRef uint64
 40 | var allocs map[uint64]*Allocator
 41 | var calculatedLog2 []int
 42 | 
 43 | func init() {
 44 | 	allocsMu = new(sync.Mutex)
 45 | 	allocs = make(map[uint64]*Allocator)
 46 | 
 47 | 	// Set up a unique Ref per process.
 48 | 	allocRef = uint64(rand.Int63n(1<<16)) << 48
 49 | 	calculatedLog2 = make([]int, 1025)
 50 | 	for i := 1; i <= 1024; i++ {
 51 | 		calculatedLog2[i] = int(math.Log2(float64(i)))
 52 | 	}
 53 | }
 54 | 
 55 | // NewAllocator creates an allocator starting with the given size.
 56 | func NewAllocator(sz int, tag string) *Allocator {
 57 | 	ref := atomic.AddUint64(&allocRef, 1)
 58 | 	// We should not allow a zero sized page because addBufferWithMinSize
 59 | 	// will run into an infinite loop trying to double the pagesize.
 60 | 	if sz < 512 {
 61 | 		sz = 512
 62 | 	}
 63 | 	a := &Allocator{
 64 | 		Ref:     ref,
 65 | 		buffers: make([][]byte, 64),
 66 | 		Tag:     tag,
 67 | 	}
 68 | 	l2 := uint64(log2(sz))
 69 | 	if bits.OnesCount64(uint64(sz)) > 1 {
 70 | 		l2 += 1
 71 | 	}
 72 | 	a.buffers[0] = Calloc(1<<l2, a.Tag)
 73 | 
 74 | 	allocsMu.Lock()
 75 | 	allocs[ref] = a
 76 | 	allocsMu.Unlock()
 77 | 	return a
 78 | }
 79 | 
 80 | func (a *Allocator) Reset() {
 81 | 	atomic.StoreUint64(&a.compIdx, 0)
 82 | }
 83 | 
 84 | func Allocators() string {
 85 | 	allocsMu.Lock()
 86 | 	tags := make(map[string]uint64)
 87 | 	num := make(map[string]int)
 88 | 	for _, ac := range allocs {
 89 | 		tags[ac.Tag] += ac.Allocated()
 90 | 		num[ac.Tag] += 1
 91 | 	}
 92 | 
 93 | 	var buf bytes.Buffer
 94 | 	for tag, sz := range tags {
 95 | 		fmt.Fprintf(&buf, "Tag: %s Num: %d Size: %s . ", tag, num[tag], humanize.IBytes(sz))
 96 | 	}
 97 | 	allocsMu.Unlock()
 98 | 	return buf.String()
 99 | }
100 | 
101 | func (a *Allocator) String() string {
102 | 	var s strings.Builder
103 | 	s.WriteString(fmt.Sprintf("Allocator: %x\n", a.Ref))
104 | 	var cum int
105 | 	for i, b := range a.buffers {
106 | 		cum += len(b)
107 | 		if len(b) == 0 {
108 | 			break
109 | 		}
110 | 		s.WriteString(fmt.Sprintf("idx: %d len: %d cum: %d\n", i, len(b), cum))
111 | 	}
112 | 	pos := atomic.LoadUint64(&a.compIdx)
113 | 	bi, pi := parse(pos)
114 | 	s.WriteString(fmt.Sprintf("bi: %d pi: %d\n", bi, pi))
115 | 	s.WriteString(fmt.Sprintf("Size: %d\n", a.Size()))
116 | 	return s.String()
117 | }
118 | 
119 | // AllocatorFrom would return the allocator corresponding to the ref.
120 | func AllocatorFrom(ref uint64) *Allocator {
121 | 	allocsMu.Lock()
122 | 	a := allocs[ref]
123 | 	allocsMu.Unlock()
124 | 	return a
125 | }
126 | 
127 | func parse(pos uint64) (bufIdx, posIdx int) {
128 | 	return int(pos >> 32), int(pos & 0xFFFFFFFF)
129 | }
130 | 
131 | // Size returns the size of the allocations so far.
132 | func (a *Allocator) Size() int {
133 | 	pos := atomic.LoadUint64(&a.compIdx)
134 | 	bi, pi := parse(pos)
135 | 	var sz int
136 | 	for i, b := range a.buffers {
137 | 		if i < bi {
138 | 			sz += len(b)
139 | 			continue
140 | 		}
141 | 		sz += pi
142 | 		return sz
143 | 	}
144 | 	panic("Size should not reach here")
145 | }
146 | 
147 | func log2(sz int) int {
148 | 	if sz < len(calculatedLog2) {
149 | 		return calculatedLog2[sz]
150 | 	}
151 | 	pow := 10
152 | 	sz >>= 10
153 | 	for sz > 1 {
154 | 		sz >>= 1
155 | 		pow++
156 | 	}
157 | 	return pow
158 | }
159 | 
160 | func (a *Allocator) Allocated() uint64 {
161 | 	var alloc int
162 | 	for _, b := range a.buffers {
163 | 		alloc += cap(b)
164 | 	}
165 | 	return uint64(alloc)
166 | }
167 | 
168 | func (a *Allocator) TrimTo(max int) {
169 | 	var alloc int
170 | 	for i, b := range a.buffers {
171 | 		if len(b) == 0 {
172 | 			break
173 | 		}
174 | 		alloc += len(b)
175 | 		if alloc < max {
176 | 			continue
177 | 		}
178 | 		Free(b)
179 | 		a.buffers[i] = nil
180 | 	}
181 | }
182 | 
183 | // Release would release the memory back. Remember to make this call to avoid memory leaks.
184 | func (a *Allocator) Release() {
185 | 	if a == nil {
186 | 		return
187 | 	}
188 | 
189 | 	var alloc int
190 | 	for _, b := range a.buffers {
191 | 		if len(b) == 0 {
192 | 			break
193 | 		}
194 | 		alloc += len(b)
195 | 		Free(b)
196 | 	}
197 | 
198 | 	allocsMu.Lock()
199 | 	delete(allocs, a.Ref)
200 | 	allocsMu.Unlock()
201 | }
202 | 
203 | const maxAlloc = 1 << 30
204 | 
205 | func (a *Allocator) MaxAlloc() int {
206 | 	return maxAlloc
207 | }
208 | 
209 | const nodeAlign = unsafe.Sizeof(uint64(0)) - 1
210 | 
211 | func (a *Allocator) AllocateAligned(sz int) []byte {
212 | 	tsz := sz + int(nodeAlign)
213 | 	out := a.Allocate(tsz)
214 | 	// We are reusing allocators. In that case, it's important to zero out the memory allocated
215 | 	// here. We don't always zero it out (in Allocate), because other functions would be immediately
216 | 	// overwriting the allocated slices anyway (see Copy).
217 | 	ZeroOut(out, 0, len(out))
218 | 
219 | 	addr := uintptr(unsafe.Pointer(&out[0]))
220 | 	aligned := (addr + nodeAlign) & ^nodeAlign
221 | 	start := int(aligned - addr)
222 | 
223 | 	return out[start : start+sz]
224 | }
225 | 
226 | func (a *Allocator) Copy(buf []byte) []byte {
227 | 	if a == nil {
228 | 		return append([]byte{}, buf...)
229 | 	}
230 | 	out := a.Allocate(len(buf))
231 | 	copy(out, buf)
232 | 	return out
233 | }
234 | 
235 | func (a *Allocator) addBufferAt(bufIdx, minSz int) {
236 | 	for {
237 | 		if bufIdx >= len(a.buffers) {
238 | 			panic(fmt.Sprintf("Allocator can not allocate more than %d buffers", len(a.buffers)))
239 | 		}
240 | 		if len(a.buffers[bufIdx]) == 0 {
241 | 			break
242 | 		}
243 | 		if minSz <= len(a.buffers[bufIdx]) {
244 | 			// No need to do anything. We already have a buffer which can satisfy minSz.
245 | 			return
246 | 		}
247 | 		bufIdx++
248 | 	}
249 | 	assert(bufIdx > 0)
250 | 	// We need to allocate a new buffer.
251 | 	// Make pageSize double of the last allocation.
252 | 	pageSize := 2 * len(a.buffers[bufIdx-1])
253 | 	// Ensure pageSize is bigger than sz.
254 | 	for pageSize < minSz {
255 | 		pageSize *= 2
256 | 	}
257 | 	// If bigger than maxAlloc, trim to maxAlloc.
258 | 	if pageSize > maxAlloc {
259 | 		pageSize = maxAlloc
260 | 	}
261 | 
262 | 	buf := Calloc(pageSize, a.Tag)
263 | 	assert(len(a.buffers[bufIdx]) == 0)
264 | 	a.buffers[bufIdx] = buf
265 | }
266 | 
267 | func (a *Allocator) Allocate(sz int) []byte {
268 | 	if a == nil {
269 | 		return make([]byte, sz)
270 | 	}
271 | 	if sz > maxAlloc {
272 | 		panic(fmt.Sprintf("Unable to allocate more than %d\n", maxAlloc))
273 | 	}
274 | 	if sz == 0 {
275 | 		return nil
276 | 	}
277 | 	for {
278 | 		pos := atomic.AddUint64(&a.compIdx, uint64(sz))
279 | 		bufIdx, posIdx := parse(pos)
280 | 		buf := a.buffers[bufIdx]
281 | 		if posIdx > len(buf) {
282 | 			a.Lock()
283 | 			newPos := atomic.LoadUint64(&a.compIdx)
284 | 			newBufIdx, _ := parse(newPos)
285 | 			if newBufIdx != bufIdx {
286 | 				a.Unlock()
287 | 				continue
288 | 			}
289 | 			a.addBufferAt(bufIdx+1, sz)
290 | 			atomic.StoreUint64(&a.compIdx, uint64((bufIdx+1)<<32))
291 | 			a.Unlock()
292 | 			// We added a new buffer. Let's acquire slice the right way by going back to the top.
293 | 			continue
294 | 		}
295 | 		data := buf[posIdx-sz : posIdx]
296 | 		return data
297 | 	}
298 | }
299 | 
300 | type AllocatorPool struct {
301 | 	numGets int64
302 | 	allocCh chan *Allocator
303 | 	closer  *Closer
304 | }
305 | 
306 | func NewAllocatorPool(sz int) *AllocatorPool {
307 | 	a := &AllocatorPool{
308 | 		allocCh: make(chan *Allocator, sz),
309 | 		closer:  NewCloser(1),
310 | 	}
311 | 	go a.freeupAllocators()
312 | 	return a
313 | }
314 | 
315 | func (p *AllocatorPool) Get(sz int, tag string) *Allocator {
316 | 	if p == nil {
317 | 		return NewAllocator(sz, tag)
318 | 	}
319 | 	atomic.AddInt64(&p.numGets, 1)
320 | 	select {
321 | 	case alloc := <-p.allocCh:
322 | 		alloc.Reset()
323 | 		alloc.Tag = tag
324 | 		return alloc
325 | 	default:
326 | 		return NewAllocator(sz, tag)
327 | 	}
328 | }
329 | func (p *AllocatorPool) Return(a *Allocator) {
330 | 	if a == nil {
331 | 		return
332 | 	}
333 | 	if p == nil {
334 | 		a.Release()
335 | 		return
336 | 	}
337 | 	a.TrimTo(400 << 20)
338 | 
339 | 	select {
340 | 	case p.allocCh <- a:
341 | 		return
342 | 	default:
343 | 		a.Release()
344 | 	}
345 | }
346 | 
347 | func (p *AllocatorPool) Release() {
348 | 	if p == nil {
349 | 		return
350 | 	}
351 | 	p.closer.SignalAndWait()
352 | }
353 | 
354 | func (p *AllocatorPool) freeupAllocators() {
355 | 	defer p.closer.Done()
356 | 
357 | 	ticker := time.NewTicker(2 * time.Second)
358 | 	defer ticker.Stop()
359 | 
360 | 	releaseOne := func() bool {
361 | 		select {
362 | 		case alloc := <-p.allocCh:
363 | 			alloc.Release()
364 | 			return true
365 | 		default:
366 | 			return false
367 | 		}
368 | 	}
369 | 
370 | 	var last int64
371 | 	for {
372 | 		select {
373 | 		case <-p.closer.HasBeenClosed():
374 | 			close(p.allocCh)
375 | 			for alloc := range p.allocCh {
376 | 				alloc.Release()
377 | 			}
378 | 			return
379 | 
380 | 		case <-ticker.C:
381 | 			gets := atomic.LoadInt64(&p.numGets)
382 | 			if gets != last {
383 | 				// Some retrievals were made since the last time. So, let's avoid doing a release.
384 | 				last = gets
385 | 				continue
386 | 			}
387 | 			releaseOne()
388 | 		}
389 | 	}
390 | }
391 | 


--------------------------------------------------------------------------------
/z/allocator_test.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
  3 |  * SPDX-License-Identifier: Apache-2.0
  4 |  */
  5 | 
  6 | package z
  7 | 
  8 | import (
  9 | 	"math/rand"
 10 | 	"sort"
 11 | 	"sync"
 12 | 	"testing"
 13 | 	"unsafe"
 14 | 
 15 | 	"github.com/stretchr/testify/require"
 16 | )
 17 | 
 18 | func TestAllocate(t *testing.T) {
 19 | 	a := NewAllocator(1024, "test")
 20 | 	defer a.Release()
 21 | 
 22 | 	check := func() {
 23 | 		t.Logf("Running checks\n")
 24 | 		require.Equal(t, 0, len(a.Allocate(0)))
 25 | 		require.Equal(t, 1, len(a.Allocate(1)))
 26 | 		require.Equal(t, 1<<20+1, len(a.Allocate(1<<20+1)))
 27 | 		require.Equal(t, 256<<20, len(a.Allocate(256<<20)))
 28 | 		require.Panics(t, func() { a.Allocate(maxAlloc + 1) })
 29 | 	}
 30 | 
 31 | 	check()
 32 | 	t.Logf("%s", a)
 33 | 	prev := a.Allocated()
 34 | 	t.Logf("Resetting\n")
 35 | 	a.Reset()
 36 | 	check()
 37 | 	t.Logf("%s", a)
 38 | 	require.Equal(t, int(prev), int(a.Allocated()))
 39 | 	t.Logf("Allocated: %d\n", prev)
 40 | }
 41 | 
 42 | func TestAllocateSize(t *testing.T) {
 43 | 	a := NewAllocator(1024, "test")
 44 | 	require.Equal(t, 1024, len(a.buffers[0]))
 45 | 	a.Release()
 46 | 
 47 | 	b := NewAllocator(1025, "test")
 48 | 	require.Equal(t, 2048, len(b.buffers[0]))
 49 | 	b.Release()
 50 | }
 51 | 
 52 | func TestAllocateReset(t *testing.T) {
 53 | 	a := NewAllocator(16, "test")
 54 | 	defer a.Release()
 55 | 
 56 | 	buf := make([]byte, 128)
 57 | 	rand.Read(buf)
 58 | 	for i := 0; i < 1000; i++ {
 59 | 		a.Copy(buf)
 60 | 	}
 61 | 
 62 | 	prev := a.Allocated()
 63 | 	a.Reset()
 64 | 	for i := 0; i < 100; i++ {
 65 | 		a.Copy(buf)
 66 | 	}
 67 | 	t.Logf("%s", a)
 68 | 	require.Equal(t, prev, a.Allocated())
 69 | }
 70 | 
 71 | func TestAllocateTrim(t *testing.T) {
 72 | 	a := NewAllocator(16, "test")
 73 | 	defer a.Release()
 74 | 
 75 | 	buf := make([]byte, 128)
 76 | 	rand.Read(buf)
 77 | 	for i := 0; i < 1000; i++ {
 78 | 		a.Copy(buf)
 79 | 	}
 80 | 
 81 | 	N := 2048
 82 | 	a.TrimTo(N)
 83 | 	require.LessOrEqual(t, int(a.Allocated()), N)
 84 | }
 85 | 
 86 | func TestPowTwo(t *testing.T) {
 87 | 	require.Equal(t, 2, log2(4))
 88 | 	require.Equal(t, 2, log2(7))
 89 | 	require.Equal(t, 3, log2(8))
 90 | 	require.Equal(t, 3, log2(15))
 91 | 	require.Equal(t, 4, log2(16))
 92 | 	require.Equal(t, 4, log2(31))
 93 | 	require.Equal(t, 10, log2(1024))
 94 | 	require.Equal(t, 10, log2(1025))
 95 | 	require.Equal(t, 10, log2(2047))
 96 | 	require.Equal(t, 11, log2(2048))
 97 | }
 98 | 
 99 | func TestAllocateAligned(t *testing.T) {
100 | 	a := NewAllocator(1024, "test")
101 | 	defer a.Release()
102 | 
103 | 	a.Allocate(1)
104 | 	out := a.Allocate(1)
105 | 	ptr := uintptr(unsafe.Pointer(&out[0]))
106 | 	require.True(t, ptr%8 == 1)
107 | 
108 | 	out = a.AllocateAligned(5)
109 | 	ptr = uintptr(unsafe.Pointer(&out[0]))
110 | 	require.True(t, ptr%8 == 0)
111 | 
112 | 	out = a.AllocateAligned(3)
113 | 	ptr = uintptr(unsafe.Pointer(&out[0]))
114 | 	require.True(t, ptr%8 == 0)
115 | }
116 | 
117 | func TestAllocateConcurrent(t *testing.T) {
118 | 	a := NewAllocator(63, "test")
119 | 	defer a.Release()
120 | 
121 | 	N := 10240
122 | 	M := 16
123 | 	var wg sync.WaitGroup
124 | 
125 | 	m := make(map[uintptr]struct{})
126 | 	mu := new(sync.Mutex)
127 | 	for i := 0; i < M; i++ {
128 | 		wg.Add(1)
129 | 		go func() {
130 | 			defer wg.Done()
131 | 			var bufs []uintptr
132 | 			for j := 0; j < N; j++ {
133 | 				buf := a.Allocate(16)
134 | 				require.Equal(t, 16, len(buf))
135 | 				bufs = append(bufs, uintptr(unsafe.Pointer(&buf[0])))
136 | 			}
137 | 
138 | 			mu.Lock()
139 | 			for _, b := range bufs {
140 | 				if _, ok := m[b]; ok {
141 | 					panic("did not expect to see the same ptr")
142 | 				}
143 | 				m[b] = struct{}{}
144 | 			}
145 | 			mu.Unlock()
146 | 		}()
147 | 	}
148 | 	wg.Wait()
149 | 	t.Logf("Size of allocator: %v. Allocator: %s\n", a.Size(), a)
150 | 
151 | 	require.Equal(t, N*M, len(m))
152 | 	var sorted []uintptr
153 | 	for ptr := range m {
154 | 		sorted = append(sorted, ptr)
155 | 	}
156 | 
157 | 	sort.Slice(sorted, func(i, j int) bool {
158 | 		return sorted[i] < sorted[j]
159 | 	})
160 | 
161 | 	var last uintptr
162 | 	for _, ptr := range sorted {
163 | 		if ptr-last < 16 {
164 | 			t.Fatalf("Should not have less than 16: %v %v\n", ptr, last)
165 | 		}
166 | 		// fmt.Printf("ptr [%d]: %x %d\n", i, ptr, ptr-last)
167 | 		last = ptr
168 | 	}
169 | }
170 | 
171 | func BenchmarkAllocate(b *testing.B) {
172 | 	a := NewAllocator(15, "test")
173 | 	b.RunParallel(func(pb *testing.PB) {
174 | 		for pb.Next() {
175 | 			buf := a.Allocate(1)
176 | 			if len(buf) != 1 {
177 | 				b.FailNow()
178 | 			}
179 | 		}
180 | 	})
181 | 	b.StopTimer()
182 | 	b.Logf("%s", a)
183 | }
184 | 


--------------------------------------------------------------------------------
/z/bbloom.go:
--------------------------------------------------------------------------------
  1 | // The MIT License (MIT)
  2 | // Copyright (c) 2014 Andreas Briese, eduToolbox@Bri-C GmbH, Sarstedt
  3 | 
  4 | // Permission is hereby granted, free of charge, to any person obtaining a copy of
  5 | // this software and associated documentation files (the "Software"), to deal in
  6 | // the Software without restriction, including without limitation the rights to
  7 | // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
  8 | // the Software, and to permit persons to whom the Software is furnished to do so,
  9 | // subject to the following conditions:
 10 | 
 11 | // The above copyright notice and this permission notice shall be included in all
 12 | // copies or substantial portions of the Software.
 13 | 
 14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 16 | // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 17 | // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 18 | // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 19 | // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 20 | 
 21 | package z
 22 | 
 23 | import (
 24 | 	"bytes"
 25 | 	"encoding/json"
 26 | 	"log"
 27 | 	"math"
 28 | 	"unsafe"
 29 | )
 30 | 
 31 | // helper
 32 | var mask = []uint8{1, 2, 4, 8, 16, 32, 64, 128}
 33 | 
 34 | func getSize(ui64 uint64) (size uint64, exponent uint64) {
 35 | 	if ui64 < uint64(512) {
 36 | 		ui64 = uint64(512)
 37 | 	}
 38 | 	size = uint64(1)
 39 | 	for size < ui64 {
 40 | 		size <<= 1
 41 | 		exponent++
 42 | 	}
 43 | 	return size, exponent
 44 | }
 45 | 
 46 | func calcSizeByWrongPositives(numEntries, wrongs float64) (uint64, uint64) {
 47 | 	size := -1 * numEntries * math.Log(wrongs) / math.Pow(float64(0.69314718056), 2)
 48 | 	locs := math.Ceil(float64(0.69314718056) * size / numEntries)
 49 | 	return uint64(size), uint64(locs)
 50 | }
 51 | 
 52 | // NewBloomFilter returns a new bloomfilter.
 53 | func NewBloomFilter(params ...float64) (bloomfilter *Bloom) {
 54 | 	var entries, locs uint64
 55 | 	if len(params) == 2 {
 56 | 		if params[1] < 1 {
 57 | 			entries, locs = calcSizeByWrongPositives(params[0], params[1])
 58 | 		} else {
 59 | 			entries, locs = uint64(params[0]), uint64(params[1])
 60 | 		}
 61 | 	} else {
 62 | 		log.Fatal("usage: New(float64(number_of_entries), float64(number_of_hashlocations))" +
 63 | 			" i.e. New(float64(1000), float64(3)) or New(float64(number_of_entries)," +
 64 | 			" float64(number_of_hashlocations)) i.e. New(float64(1000), float64(0.03))")
 65 | 	}
 66 | 	size, exponent := getSize(entries)
 67 | 	bloomfilter = &Bloom{
 68 | 		sizeExp: exponent,
 69 | 		size:    size - 1,
 70 | 		setLocs: locs,
 71 | 		shift:   64 - exponent,
 72 | 	}
 73 | 	bloomfilter.Size(size)
 74 | 	return bloomfilter
 75 | }
 76 | 
 77 | // Bloom filter
 78 | type Bloom struct {
 79 | 	bitset  []uint64
 80 | 	ElemNum uint64
 81 | 	sizeExp uint64
 82 | 	size    uint64
 83 | 	setLocs uint64
 84 | 	shift   uint64
 85 | }
 86 | 
 87 | // <--- http://www.cse.yorku.ca/~oz/hash.html
 88 | // modified Berkeley DB Hash (32bit)
 89 | // hash is casted to l, h = 16bit fragments
 90 | // func (bl Bloom) absdbm(b *[]byte) (l, h uint64) {
 91 | // 	hash := uint64(len(*b))
 92 | // 	for _, c := range *b {
 93 | // 		hash = uint64(c) + (hash << 6) + (hash << bl.sizeExp) - hash
 94 | // 	}
 95 | // 	h = hash >> bl.shift
 96 | // 	l = hash << bl.shift >> bl.shift
 97 | // 	return l, h
 98 | // }
 99 | 
100 | // Add adds hash of a key to the bloomfilter.
101 | func (bl *Bloom) Add(hash uint64) {
102 | 	h := hash >> bl.shift
103 | 	l := hash << bl.shift >> bl.shift
104 | 	for i := uint64(0); i < bl.setLocs; i++ {
105 | 		bl.Set((h + i*l) & bl.size)
106 | 		bl.ElemNum++
107 | 	}
108 | }
109 | 
110 | // Has checks if bit(s) for entry hash is/are set,
111 | // returns true if the hash was added to the Bloom Filter.
112 | func (bl Bloom) Has(hash uint64) bool {
113 | 	h := hash >> bl.shift
114 | 	l := hash << bl.shift >> bl.shift
115 | 	for i := uint64(0); i < bl.setLocs; i++ {
116 | 		if !bl.IsSet((h + i*l) & bl.size) {
117 | 			return false
118 | 		}
119 | 	}
120 | 	return true
121 | }
122 | 
123 | // AddIfNotHas only Adds hash, if it's not present in the bloomfilter.
124 | // Returns true if hash was added.
125 | // Returns false if hash was already registered in the bloomfilter.
126 | func (bl *Bloom) AddIfNotHas(hash uint64) bool {
127 | 	if bl.Has(hash) {
128 | 		return false
129 | 	}
130 | 	bl.Add(hash)
131 | 	return true
132 | }
133 | 
134 | // TotalSize returns the total size of the bloom filter.
135 | func (bl *Bloom) TotalSize() int {
136 | 	// The bl struct has 5 members and each one is 8 byte. The bitset is a
137 | 	// uint64 byte slice.
138 | 	return len(bl.bitset)*8 + 5*8
139 | }
140 | 
141 | // Size makes Bloom filter with as bitset of size sz.
142 | func (bl *Bloom) Size(sz uint64) {
143 | 	bl.bitset = make([]uint64, sz>>6)
144 | }
145 | 
146 | // Clear resets the Bloom filter.
147 | func (bl *Bloom) Clear() {
148 | 	for i := range bl.bitset {
149 | 		bl.bitset[i] = 0
150 | 	}
151 | }
152 | 
153 | // Set sets the bit[idx] of bitset.
154 | func (bl *Bloom) Set(idx uint64) {
155 | 	ptr := unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[idx>>6])) + uintptr((idx%64)>>3))
156 | 	*(*uint8)(ptr) |= mask[idx%8]
157 | }
158 | 
159 | // IsSet checks if bit[idx] of bitset is set, returns true/false.
160 | func (bl *Bloom) IsSet(idx uint64) bool {
161 | 	ptr := unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[idx>>6])) + uintptr((idx%64)>>3))
162 | 	r := ((*(*uint8)(ptr)) >> (idx % 8)) & 1
163 | 	return r == 1
164 | }
165 | 
166 | // bloomJSONImExport
167 | // Im/Export structure used by JSONMarshal / JSONUnmarshal
168 | type bloomJSONImExport struct {
169 | 	FilterSet []byte
170 | 	SetLocs   uint64
171 | }
172 | 
173 | // NewWithBoolset takes a []byte slice and number of locs per entry,
174 | // returns the bloomfilter with a bitset populated according to the input []byte.
175 | func newWithBoolset(bs *[]byte, locs uint64) *Bloom {
176 | 	bloomfilter := NewBloomFilter(float64(len(*bs)<<3), float64(locs))
177 | 	for i, b := range *bs {
178 | 		*(*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(&bloomfilter.bitset[0])) + uintptr(i))) = b
179 | 	}
180 | 	return bloomfilter
181 | }
182 | 
183 | // JSONUnmarshal takes JSON-Object (type bloomJSONImExport) as []bytes
184 | // returns bloom32 / bloom64 object.
185 | func JSONUnmarshal(dbData []byte) (*Bloom, error) {
186 | 	bloomImEx := bloomJSONImExport{}
187 | 	if err := json.Unmarshal(dbData, &bloomImEx); err != nil {
188 | 		return nil, err
189 | 	}
190 | 	buf := bytes.NewBuffer(bloomImEx.FilterSet)
191 | 	bs := buf.Bytes()
192 | 	bf := newWithBoolset(&bs, bloomImEx.SetLocs)
193 | 	return bf, nil
194 | }
195 | 
196 | // JSONMarshal returns JSON-object (type bloomJSONImExport) as []byte.
197 | func (bl Bloom) JSONMarshal() []byte {
198 | 	bloomImEx := bloomJSONImExport{}
199 | 	bloomImEx.SetLocs = bl.setLocs
200 | 	bloomImEx.FilterSet = make([]byte, len(bl.bitset)<<3)
201 | 	for i := range bloomImEx.FilterSet {
202 | 		bloomImEx.FilterSet[i] = *(*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[0])) +
203 | 			uintptr(i)))
204 | 	}
205 | 	data, err := json.Marshal(bloomImEx)
206 | 	if err != nil {
207 | 		log.Fatal("json.Marshal failed: ", err)
208 | 	}
209 | 	return data
210 | }
211 | 


--------------------------------------------------------------------------------
/z/bbloom_test.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
  3 |  * SPDX-License-Identifier: Apache-2.0
  4 |  */
  5 | 
  6 | package z
  7 | 
  8 | import (
  9 | 	"crypto/rand"
 10 | 	"fmt"
 11 | 	"testing"
 12 | 
 13 | 	"github.com/stretchr/testify/require"
 14 | )
 15 | 
 16 | var (
 17 | 	wordlist1 [][]byte
 18 | 	n         = 1 << 16
 19 | 	bf        *Bloom
 20 | )
 21 | 
 22 | func TestMain(m *testing.M) {
 23 | 	wordlist1 = make([][]byte, n)
 24 | 	for i := range wordlist1 {
 25 | 		b := make([]byte, 32)
 26 | 		_, _ = rand.Read(b)
 27 | 		wordlist1[i] = b
 28 | 	}
 29 | 	fmt.Println("\n###############\nbbloom_test.go")
 30 | 	fmt.Print("Benchmarks relate to 2**16 OP. --> output/65536 op/ns\n###############\n\n")
 31 | 
 32 | 	m.Run()
 33 | }
 34 | 
 35 | func TestM_NumberOfWrongs(t *testing.T) {
 36 | 	bf = NewBloomFilter(float64(n*10), float64(7))
 37 | 
 38 | 	cnt := 0
 39 | 	for i := range wordlist1 {
 40 | 		hash := MemHash(wordlist1[i])
 41 | 		if !bf.AddIfNotHas(hash) {
 42 | 			cnt++
 43 | 		}
 44 | 	}
 45 | 	//nolint:lll
 46 | 	fmt.Printf("Bloomfilter New(7* 2**16, 7) (-> size=%v bit): \n            Check for 'false positives': %v wrong positive 'Has' results on 2**16 entries => %v %%\n", len(bf.bitset)<<6, cnt, float64(cnt)/float64(n))
 47 | 
 48 | }
 49 | 
 50 | func TestM_JSON(t *testing.T) {
 51 | 	const shallBe = int(1 << 16)
 52 | 
 53 | 	bf = NewBloomFilter(float64(n*10), float64(7))
 54 | 
 55 | 	cnt := 0
 56 | 	for i := range wordlist1 {
 57 | 		hash := MemHash(wordlist1[i])
 58 | 		if !bf.AddIfNotHas(hash) {
 59 | 			cnt++
 60 | 		}
 61 | 	}
 62 | 
 63 | 	Json := bf.JSONMarshal()
 64 | 
 65 | 	// create new bloomfilter from bloomfilter's JSON representation
 66 | 	bf2, err := JSONUnmarshal(Json)
 67 | 	require.NoError(t, err)
 68 | 
 69 | 	cnt2 := 0
 70 | 	for i := range wordlist1 {
 71 | 		hash := MemHash(wordlist1[i])
 72 | 		if !bf2.AddIfNotHas(hash) {
 73 | 			cnt2++
 74 | 		}
 75 | 	}
 76 | 	require.Equal(t, shallBe, cnt2)
 77 | }
 78 | 
 79 | func BenchmarkM_New(b *testing.B) {
 80 | 	for r := 0; r < b.N; r++ {
 81 | 		_ = NewBloomFilter(float64(n*10), float64(7))
 82 | 	}
 83 | }
 84 | 
 85 | func BenchmarkM_Clear(b *testing.B) {
 86 | 	bf = NewBloomFilter(float64(n*10), float64(7))
 87 | 	for i := range wordlist1 {
 88 | 		hash := MemHash(wordlist1[i])
 89 | 		bf.Add(hash)
 90 | 	}
 91 | 	b.ResetTimer()
 92 | 	for r := 0; r < b.N; r++ {
 93 | 		bf.Clear()
 94 | 	}
 95 | }
 96 | 
 97 | func BenchmarkM_Add(b *testing.B) {
 98 | 	bf = NewBloomFilter(float64(n*10), float64(7))
 99 | 	b.ResetTimer()
100 | 	for r := 0; r < b.N; r++ {
101 | 		for i := range wordlist1 {
102 | 			hash := MemHash(wordlist1[i])
103 | 			bf.Add(hash)
104 | 		}
105 | 	}
106 | 
107 | }
108 | 
109 | func BenchmarkM_Has(b *testing.B) {
110 | 	b.ResetTimer()
111 | 	for r := 0; r < b.N; r++ {
112 | 		for i := range wordlist1 {
113 | 			hash := MemHash(wordlist1[i])
114 | 			bf.Has(hash)
115 | 		}
116 | 	}
117 | }
118 | 


--------------------------------------------------------------------------------
/z/buffer_test.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
  3 |  * SPDX-License-Identifier: Apache-2.0
  4 |  */
  5 | 
  6 | package z
  7 | 
  8 | import (
  9 | 	"bytes"
 10 | 	"encoding/binary"
 11 | 	"encoding/hex"
 12 | 	"fmt"
 13 | 	"math/rand"
 14 | 	"sort"
 15 | 	"testing"
 16 | 
 17 | 	"github.com/stretchr/testify/require"
 18 | )
 19 | 
 20 | func TestBuffer(t *testing.T) {
 21 | 	const capacity = 512
 22 | 	buffers := newTestBuffers(t, capacity)
 23 | 
 24 | 	for _, buf := range buffers {
 25 | 		name := fmt.Sprintf("Using buffer type: %s", buf.bufType)
 26 | 		t.Run(name, func(t *testing.T) {
 27 | 			// This is just for verifying result
 28 | 			var bytesBuf bytes.Buffer
 29 | 			bytesBuf.Grow(capacity)
 30 | 
 31 | 			// Writer small []byte
 32 | 			var smallData [256]byte
 33 | 			rand.Read(smallData[:])
 34 | 			var bigData [1024]byte
 35 | 			rand.Read(bigData[:])
 36 | 
 37 | 			_, err := buf.Write(smallData[:])
 38 | 			require.NoError(t, err, "unable to write data to page buffer")
 39 | 			_, err = buf.Write(bigData[:])
 40 | 			require.NoError(t, err, "unable to write data to page buffer")
 41 | 
 42 | 			// Write data to bytesBuffer also, just to match result.
 43 | 			bytesBuf.Write(smallData[:])
 44 | 			bytesBuf.Write(bigData[:])
 45 | 			require.Equal(t, buf.Bytes(), bytesBuf.Bytes())
 46 | 		})
 47 | 	}
 48 | }
 49 | 
 50 | func TestBufferWrite(t *testing.T) {
 51 | 	const capacity = 32
 52 | 	buffers := newTestBuffers(t, capacity)
 53 | 
 54 | 	for _, buf := range buffers {
 55 | 		name := fmt.Sprintf("Using buffer type: %s", buf.bufType)
 56 | 		t.Run(name, func(t *testing.T) {
 57 | 			var data [128]byte
 58 | 			rand.Read(data[:])
 59 | 			bytesBuf := new(bytes.Buffer)
 60 | 
 61 | 			end := 32
 62 | 			for i := 0; i < 3; i++ {
 63 | 				n, err := buf.Write(data[:end])
 64 | 				require.NoError(t, err, "unable to write bytes to buffer")
 65 | 				require.Equal(t, n, end, "length of buffer and length written should be equal")
 66 | 
 67 | 				// append to bb also for testing.
 68 | 				bytesBuf.Write(data[:end])
 69 | 
 70 | 				require.Equal(t, buf.Bytes(), bytesBuf.Bytes())
 71 | 				end = end * 2
 72 | 			}
 73 | 
 74 | 		})
 75 | 	}
 76 | }
 77 | 
 78 | func TestBufferAutoMmap(t *testing.T) {
 79 | 	buf := NewBuffer(1<<20, "test").WithAutoMmap(64<<20, "")
 80 | 	defer func() { require.NoError(t, buf.Release()) }()
 81 | 
 82 | 	N := 128 << 10
 83 | 	var wb [1024]byte
 84 | 	for i := 0; i < N; i++ {
 85 | 		rand.Read(wb[:])
 86 | 		b := buf.SliceAllocate(len(wb))
 87 | 		copy(b, wb[:])
 88 | 	}
 89 | 	t.Logf("Buffer size: %d\n", buf.LenWithPadding())
 90 | 
 91 | 	buf.SortSlice(func(l, r []byte) bool {
 92 | 		return bytes.Compare(l, r) < 0
 93 | 	})
 94 | 	t.Logf("sort done\n")
 95 | 
 96 | 	var count int
 97 | 	var last []byte
 98 | 	require.NoError(t, buf.SliceIterate(func(slice []byte) error {
 99 | 		require.True(t, bytes.Compare(slice, last) >= 0)
100 | 		last = append(last[:0], slice...)
101 | 		count++
102 | 		return nil
103 | 	}))
104 | 	require.Equal(t, N, count)
105 | }
106 | 
107 | func TestBufferSimpleSort(t *testing.T) {
108 | 	bufs := newTestBuffers(t, 1<<20)
109 | 	for _, buf := range bufs {
110 | 		name := fmt.Sprintf("Using buffer type: %s", buf.bufType)
111 | 		t.Run(name, func(t *testing.T) {
112 | 			for i := 0; i < 25600; i++ {
113 | 				b := buf.SliceAllocate(4)
114 | 				binary.BigEndian.PutUint32(b, uint32(rand.Int31n(256000)))
115 | 			}
116 | 			buf.SortSlice(func(ls, rs []byte) bool {
117 | 				left := binary.BigEndian.Uint32(ls)
118 | 				right := binary.BigEndian.Uint32(rs)
119 | 				return left < right
120 | 			})
121 | 			var last uint32
122 | 			var i int
123 | 			require.NoError(t, buf.SliceIterate(func(slice []byte) error {
124 | 				num := binary.BigEndian.Uint32(slice)
125 | 				if num < last {
126 | 					fmt.Printf("num: %d idx: %d last: %d\n", num, i, last)
127 | 				}
128 | 				i++
129 | 				require.GreaterOrEqual(t, num, last)
130 | 				last = num
131 | 				// fmt.Printf("Got number: %d\n", num)
132 | 				return nil
133 | 			}))
134 | 		})
135 | 	}
136 | }
137 | 
138 | func TestBufferSlice(t *testing.T) {
139 | 	const capacity = 32
140 | 	buffers := newTestBuffers(t, capacity)
141 | 
142 | 	for _, buf := range buffers {
143 | 		name := fmt.Sprintf("Using buffer type: %s", buf.bufType)
144 | 		t.Run(name, func(t *testing.T) {
145 | 			count := 10000
146 | 			exp := make([][]byte, 0, count)
147 | 
148 | 			// Create "count" number of slices.
149 | 			for i := 0; i < count; i++ {
150 | 				sz := 1 + rand.Intn(8)
151 | 				testBuf := make([]byte, sz)
152 | 				rand.Read(testBuf)
153 | 
154 | 				newSlice := buf.SliceAllocate(sz)
155 | 				require.Equal(t, sz, copy(newSlice, testBuf))
156 | 
157 | 				// Save testBuf for verification.
158 | 				exp = append(exp, testBuf)
159 | 			}
160 | 
161 | 			compare := func() {
162 | 				i := 0
163 | 				require.NoError(t, buf.SliceIterate(func(slice []byte) error {
164 | 					// All the slices returned by the buffer should be equal to what we
165 | 					// inserted earlier.
166 | 					if !bytes.Equal(exp[i], slice) {
167 | 						fmt.Printf("exp: %s got: %s\n", hex.Dump(exp[i]), hex.Dump(slice))
168 | 						t.Fail()
169 | 					}
170 | 					require.Equal(t, exp[i], slice)
171 | 					i++
172 | 					return nil
173 | 				}))
174 | 				require.Equal(t, len(exp), i)
175 | 			}
176 | 			compare() // same order as inserted.
177 | 
178 | 			t.Logf("Sorting using sort.Slice\n")
179 | 			sort.Slice(exp, func(i, j int) bool {
180 | 				return bytes.Compare(exp[i], exp[j]) < 0
181 | 			})
182 | 			t.Logf("Sorting using buf.SortSlice\n")
183 | 			buf.SortSlice(func(a, b []byte) bool {
184 | 				return bytes.Compare(a, b) < 0
185 | 			})
186 | 			t.Logf("Done sorting\n")
187 | 			compare() // same order after sort.
188 | 		})
189 | 	}
190 | }
191 | 
192 | func TestBufferSort(t *testing.T) {
193 | 	const capacity = 32
194 | 	bufs := newTestBuffers(t, capacity)
195 | 
196 | 	for _, buf := range bufs {
197 | 		name := fmt.Sprintf("Using buffer type: %s", buf.bufType)
198 | 		t.Run(name, func(t *testing.T) {
199 | 			const N = 10000
200 | 
201 | 			for i := 0; i < N; i++ {
202 | 				newSlice := buf.SliceAllocate(8)
203 | 				uid := uint64(rand.Int63())
204 | 				binary.BigEndian.PutUint64(newSlice, uid)
205 | 			}
206 | 
207 | 			test := func(start, end int) {
208 | 				start = buf.StartOffset() + 16*start
209 | 				end = buf.StartOffset() + 16*end
210 | 				buf.SortSliceBetween(start, end, func(ls, rs []byte) bool {
211 | 					lhs := binary.BigEndian.Uint64(ls)
212 | 					rhs := binary.BigEndian.Uint64(rs)
213 | 					return lhs < rhs
214 | 				})
215 | 
216 | 				next := start
217 | 				var slice []byte
218 | 				var last uint64
219 | 				var count int
220 | 				for next >= 0 && next < end {
221 | 					slice, next = buf.Slice(next)
222 | 					uid := binary.BigEndian.Uint64(slice)
223 | 					require.GreaterOrEqual(t, uid, last)
224 | 					last = uid
225 | 					count++
226 | 				}
227 | 				require.Equal(t, (end-start)/16, count)
228 | 			}
229 | 			for i := 10; i <= N; i += 10 {
230 | 				test(i-10, i)
231 | 			}
232 | 			test(0, N)
233 | 		})
234 | 	}
235 | }
236 | 
237 | // Test that the APIs returns the expected offsets.
238 | func TestBufferPadding(t *testing.T) {
239 | 	bufs := newTestBuffers(t, 1<<10)
240 | 	for _, buf := range bufs {
241 | 		name := fmt.Sprintf("Using buffer type: %s", buf.bufType)
242 | 		t.Run(name, func(t *testing.T) {
243 | 			sz := rand.Int31n(100)
244 | 
245 | 			writeOffset := buf.AllocateOffset(int(sz))
246 | 			require.Equal(t, buf.StartOffset(), writeOffset)
247 | 
248 | 			b := make([]byte, sz)
249 | 			rand.Read(b)
250 | 
251 | 			copy(buf.Bytes(), b)
252 | 			data := buf.Data(buf.StartOffset())
253 | 			require.Equal(t, b, data[:sz])
254 | 		})
255 | 	}
256 | }
257 | 
258 | func newTestBuffers(t *testing.T, capacity int) []*Buffer {
259 | 	var bufs []*Buffer
260 | 
261 | 	buf := NewBuffer(capacity, "test")
262 | 	bufs = append(bufs, buf)
263 | 
264 | 	buf, err := NewBufferTmp("", capacity)
265 | 	require.NoError(t, err)
266 | 	bufs = append(bufs, buf)
267 | 
268 | 	t.Cleanup(func() {
269 | 		for _, buf := range bufs {
270 | 			require.NoError(t, buf.Release())
271 | 		}
272 | 	})
273 | 
274 | 	return bufs
275 | }
276 | 
277 | func TestSmallBuffer(t *testing.T) {
278 | 	buf := NewBuffer(5, "test")
279 | 	t.Cleanup(func() {
280 | 		require.NoError(t, buf.Release())
281 | 	})
282 | 	// Write something to buffer so sort actually happens.
283 | 	buf.WriteSlice([]byte("abc"))
284 | 	// This test fails if the buffer has offset > currSz.
285 | 	require.NotPanics(t, func() {
286 | 		buf.SortSlice(func(left, right []byte) bool {
287 | 			return true
288 | 		})
289 | 	})
290 | }
291 | 


--------------------------------------------------------------------------------
/z/calloc.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | 
 6 | package z
 7 | 
 8 | import "sync/atomic"
 9 | 
10 | var numBytes int64
11 | 
12 | // NumAllocBytes returns the number of bytes allocated using calls to z.Calloc. The allocations
13 | // could be happening via either Go or jemalloc, depending upon the build flags.
14 | func NumAllocBytes() int64 {
15 | 	return atomic.LoadInt64(&numBytes)
16 | }
17 | 
18 | // MemStats is used to fetch JE Malloc Stats. The stats are fetched from
19 | // the mallctl namespace http://jemalloc.net/jemalloc.3.html#mallctl_namespace.
20 | type MemStats struct {
21 | 	// Total number of bytes allocated by the application.
22 | 	// http://jemalloc.net/jemalloc.3.html#stats.allocated
23 | 	Allocated uint64
24 | 	// Total number of bytes in active pages allocated by the application. This
25 | 	// is a multiple of the page size, and greater than or equal to
26 | 	// Allocated.
27 | 	// http://jemalloc.net/jemalloc.3.html#stats.active
28 | 	Active uint64
29 | 	// Maximum number of bytes in physically resident data pages mapped by the
30 | 	// allocator, comprising all pages dedicated to allocator metadata, pages
31 | 	// backing active allocations, and unused dirty pages. This is a maximum
32 | 	// rather than precise because pages may not actually be physically
33 | 	// resident if they correspond to demand-zeroed virtual memory that has not
34 | 	// yet been touched. This is a multiple of the page size, and is larger
35 | 	// than stats.active.
36 | 	// http://jemalloc.net/jemalloc.3.html#stats.resident
37 | 	Resident uint64
38 | 	// Total number of bytes in virtual memory mappings that were retained
39 | 	// rather than being returned to the operating system via e.g. munmap(2) or
40 | 	// similar. Retained virtual memory is typically untouched, decommitted, or
41 | 	// purged, so it has no strongly associated physical memory (see extent
42 | 	// hooks http://jemalloc.net/jemalloc.3.html#arena.i.extent_hooks for
43 | 	// details). Retained memory is excluded from mapped memory statistics,
44 | 	// e.g. stats.mapped (http://jemalloc.net/jemalloc.3.html#stats.mapped).
45 | 	// http://jemalloc.net/jemalloc.3.html#stats.retained
46 | 	Retained uint64
47 | }
48 | 


--------------------------------------------------------------------------------
/z/calloc_32bit.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use
 2 | // of this source code is governed by a BSD-style license that can be found in
 3 | // the LICENSE file.
 4 | 
 5 | //go:build 386 || amd64p32 || arm || armbe || mips || mipsle || mips64p32 || mips64p32le || ppc || sparc
 6 | // +build 386 amd64p32 arm armbe mips mipsle mips64p32 mips64p32le ppc sparc
 7 | 
 8 | package z
 9 | 
10 | const (
11 | 	// MaxArrayLen is a safe maximum length for slices on this architecture.
12 | 	MaxArrayLen = 1<<31 - 1
13 | 	// MaxBufferSize is the size of virtually unlimited buffer on this architecture.
14 | 	MaxBufferSize = 1 << 30
15 | )
16 | 


--------------------------------------------------------------------------------
/z/calloc_64bit.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use
 2 | // of this source code is governed by a BSD-style license that can be found in
 3 | // the LICENSE file.
 4 | 
 5 | //go:build amd64 || arm64 || arm64be || ppc64 || ppc64le || mips64 || mips64le || riscv64 || s390x || sparc64
 6 | // +build amd64 arm64 arm64be ppc64 ppc64le mips64 mips64le riscv64 s390x sparc64
 7 | 
 8 | package z
 9 | 
10 | const (
11 | 	// MaxArrayLen is a safe maximum length for slices on this architecture.
12 | 	MaxArrayLen = 1<<50 - 1
13 | 	// MaxBufferSize is the size of virtually unlimited buffer on this architecture.
14 | 	MaxBufferSize = 256 << 30
15 | )
16 | 


--------------------------------------------------------------------------------
/z/calloc_jemalloc.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use
  2 | // of this source code is governed by a BSD-style license that can be found in
  3 | // the LICENSE file.
  4 | 
  5 | //go:build jemalloc
  6 | // +build jemalloc
  7 | 
  8 | package z
  9 | 
 10 | /*
 11 | #cgo LDFLAGS: /usr/local/lib/libjemalloc.a -L/usr/local/lib -Wl,-rpath,/usr/local/lib -ljemalloc -lm -lstdc++ -pthread -ldl
 12 | #include <stdlib.h>
 13 | #include <jemalloc/jemalloc.h>
 14 | */
 15 | import "C"
 16 | import (
 17 | 	"bytes"
 18 | 	"fmt"
 19 | 	"sync"
 20 | 	"sync/atomic"
 21 | 	"unsafe"
 22 | 
 23 | 	"github.com/dustin/go-humanize"
 24 | )
 25 | 
 26 | // The go:linkname directives provides backdoor access to private functions in
 27 | // the runtime. Below we're accessing the throw function.
 28 | 
 29 | //go:linkname throw runtime.throw
 30 | func throw(s string)
 31 | 
 32 | // New allocates a slice of size n. The returned slice is from manually managed
 33 | // memory and MUST be released by calling Free. Failure to do so will result in
 34 | // a memory leak.
 35 | //
 36 | // Compile jemalloc with ./configure --with-jemalloc-prefix="je_"
 37 | // https://android.googlesource.com/platform/external/jemalloc_new/+/6840b22e8e11cb68b493297a5cd757d6eaa0b406/TUNING.md
 38 | // These two config options seems useful for frequent allocations and deallocations in
 39 | // multi-threaded programs (like we have).
 40 | // JE_MALLOC_CONF="background_thread:true,metadata_thp:auto"
 41 | //
 42 | // Compile Go program with `go build -tags=jemalloc` to enable this.
 43 | 
 44 | type dalloc struct {
 45 | 	t  string
 46 | 	sz int
 47 | }
 48 | 
 49 | var dallocsMu sync.Mutex
 50 | var dallocs map[unsafe.Pointer]*dalloc
 51 | 
 52 | func init() {
 53 | 	// By initializing dallocs, we can start tracking allocations and deallocations via z.Calloc.
 54 | 	dallocs = make(map[unsafe.Pointer]*dalloc)
 55 | }
 56 | 
 57 | func Calloc(n int, tag string) []byte {
 58 | 	if n == 0 {
 59 | 		return make([]byte, 0)
 60 | 	}
 61 | 	// We need to be conscious of the Cgo pointer passing rules:
 62 | 	//
 63 | 	//   https://golang.org/cmd/cgo/#hdr-Passing_pointers
 64 | 	//
 65 | 	//   ...
 66 | 	//   Note: the current implementation has a bug. While Go code is permitted
 67 | 	//   to write nil or a C pointer (but not a Go pointer) to C memory, the
 68 | 	//   current implementation may sometimes cause a runtime error if the
 69 | 	//   contents of the C memory appear to be a Go pointer. Therefore, avoid
 70 | 	//   passing uninitialized C memory to Go code if the Go code is going to
 71 | 	//   store pointer values in it. Zero out the memory in C before passing it
 72 | 	//   to Go.
 73 | 
 74 | 	ptr := C.je_calloc(C.size_t(n), 1)
 75 | 	if ptr == nil {
 76 | 		// NB: throw is like panic, except it guarantees the process will be
 77 | 		// terminated. The call below is exactly what the Go runtime invokes when
 78 | 		// it cannot allocate memory.
 79 | 		throw("out of memory")
 80 | 	}
 81 | 
 82 | 	uptr := unsafe.Pointer(ptr)
 83 | 	dallocsMu.Lock()
 84 | 	dallocs[uptr] = &dalloc{
 85 | 		t:  tag,
 86 | 		sz: n,
 87 | 	}
 88 | 	dallocsMu.Unlock()
 89 | 	atomic.AddInt64(&numBytes, int64(n))
 90 | 	// Interpret the C pointer as a pointer to a Go array, then slice.
 91 | 	return (*[MaxArrayLen]byte)(uptr)[:n:n]
 92 | }
 93 | 
 94 | // CallocNoRef does the exact same thing as Calloc with jemalloc enabled.
 95 | func CallocNoRef(n int, tag string) []byte {
 96 | 	return Calloc(n, tag)
 97 | }
 98 | 
 99 | // Free frees the specified slice.
100 | func Free(b []byte) {
101 | 	if sz := cap(b); sz != 0 {
102 | 		b = b[:cap(b)]
103 | 		ptr := unsafe.Pointer(&b[0])
104 | 		C.je_free(ptr)
105 | 		atomic.AddInt64(&numBytes, -int64(sz))
106 | 		dallocsMu.Lock()
107 | 		delete(dallocs, ptr)
108 | 		dallocsMu.Unlock()
109 | 	}
110 | }
111 | 
112 | func Leaks() string {
113 | 	if dallocs == nil {
114 | 		return "Leak detection disabled. Enable with 'leak' build flag."
115 | 	}
116 | 	dallocsMu.Lock()
117 | 	defer dallocsMu.Unlock()
118 | 	if len(dallocs) == 0 {
119 | 		return "NO leaks found."
120 | 	}
121 | 	m := make(map[string]int)
122 | 	for _, da := range dallocs {
123 | 		m[da.t] += da.sz
124 | 	}
125 | 	var buf bytes.Buffer
126 | 	fmt.Fprintf(&buf, "Allocations:\n")
127 | 	for f, sz := range m {
128 | 		fmt.Fprintf(&buf, "%s at file: %s\n", humanize.IBytes(uint64(sz)), f)
129 | 	}
130 | 	return buf.String()
131 | }
132 | 
133 | // ReadMemStats populates stats with JE Malloc statistics.
134 | func ReadMemStats(stats *MemStats) {
135 | 	if stats == nil {
136 | 		return
137 | 	}
138 | 	// Call an epoch mallclt to refresh the stats data as mentioned in the docs.
139 | 	// http://jemalloc.net/jemalloc.3.html#epoch
140 | 	// Note: This epoch mallctl is as expensive as a malloc call. It takes up the
141 | 	// malloc_mutex_lock.
142 | 	epoch := 1
143 | 	sz := unsafe.Sizeof(&epoch)
144 | 	C.je_mallctl(
145 | 		(C.CString)("epoch"),
146 | 		unsafe.Pointer(&epoch),
147 | 		(*C.size_t)(unsafe.Pointer(&sz)),
148 | 		unsafe.Pointer(&epoch),
149 | 		(C.size_t)(unsafe.Sizeof(epoch)))
150 | 	stats.Allocated = fetchStat("stats.allocated")
151 | 	stats.Active = fetchStat("stats.active")
152 | 	stats.Resident = fetchStat("stats.resident")
153 | 	stats.Retained = fetchStat("stats.retained")
154 | }
155 | 
156 | // fetchStat is used to read a specific attribute from je malloc stats using mallctl.
157 | func fetchStat(s string) uint64 {
158 | 	var out uint64
159 | 	sz := unsafe.Sizeof(&out)
160 | 	C.je_mallctl(
161 | 		(C.CString)(s),                   // Query: eg: stats.allocated, stats.resident, etc.
162 | 		unsafe.Pointer(&out),             // Variable to store the output.
163 | 		(*C.size_t)(unsafe.Pointer(&sz)), // Size of the output variable.
164 | 		nil,                              // Input variable used to set a value.
165 | 		0)                                // Size of the input variable.
166 | 	return out
167 | }
168 | 
169 | func StatsPrint() {
170 | 	opts := C.CString("mdablxe")
171 | 	C.je_malloc_stats_print(nil, nil, opts)
172 | 	C.free(unsafe.Pointer(opts))
173 | }
174 | 


--------------------------------------------------------------------------------
/z/calloc_nojemalloc.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use
 2 | // of this source code is governed by a BSD-style license that can be found in
 3 | // the LICENSE file.
 4 | 
 5 | //go:build !jemalloc || !cgo
 6 | // +build !jemalloc !cgo
 7 | 
 8 | package z
 9 | 
10 | import (
11 | 	"fmt"
12 | )
13 | 
14 | // Provides versions of Calloc, CallocNoRef, etc when jemalloc is not available
15 | // (eg: build without jemalloc tag).
16 | 
17 | // Calloc allocates a slice of size n.
18 | func Calloc(n int, tag string) []byte {
19 | 	return make([]byte, n)
20 | }
21 | 
22 | // CallocNoRef will not give you memory back without jemalloc.
23 | func CallocNoRef(n int, tag string) []byte {
24 | 	// We do the add here just to stay compatible with a corresponding Free call.
25 | 	return nil
26 | }
27 | 
28 | // Free does not do anything in this mode.
29 | func Free(b []byte) {}
30 | 
31 | func Leaks() string { return "Leaks: Using Go memory" }
32 | func StatsPrint() {
33 | 	fmt.Println("Using Go memory")
34 | }
35 | 
36 | // ReadMemStats doesn't do anything since all the memory is being managed
37 | // by the Go runtime.
38 | func ReadMemStats(_ *MemStats) {}
39 | 


--------------------------------------------------------------------------------
/z/calloc_test.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | 
 6 | package z
 7 | 
 8 | import (
 9 | 	"fmt"
10 | 	"sync"
11 | 	"testing"
12 | 	"time"
13 | 
14 | 	"math/rand"
15 | 
16 | 	"github.com/stretchr/testify/require"
17 | )
18 | 
19 | // $ go test -failfast -run xxx -bench . -benchmem  -count 10 > out.txt
20 | // $ benchstat out.txt
21 | // name                 time/op
22 | // Allocation/Pool-8    200µs ± 5%
23 | // Allocation/Calloc-8  100µs ±11%
24 | //
25 | // name                 alloc/op
26 | // Allocation/Pool-8     477B ±29%
27 | // Allocation/Calloc-8  4.00B ± 0%
28 | //
29 | // name                 allocs/op
30 | // Allocation/Pool-8     1.00 ± 0%
31 | // Allocation/Calloc-8   0.00
32 | func BenchmarkAllocation(b *testing.B) {
33 | 	b.Run("Pool", func(b *testing.B) {
34 | 		pool := sync.Pool{
35 | 			New: func() interface{} {
36 | 				return make([]byte, 4<<10)
37 | 			},
38 | 		}
39 | 		b.RunParallel(func(pb *testing.PB) {
40 | 			source := rand.NewSource(time.Now().UnixNano())
41 | 			r := rand.New(source)
42 | 			for pb.Next() {
43 | 				x := pool.Get().([]byte)
44 | 				sz := r.Intn(100) << 10
45 | 				if len(x) < sz {
46 | 					x = make([]byte, sz)
47 | 				}
48 | 				r.Read(x)
49 | 				//nolint:staticcheck
50 | 				pool.Put(x)
51 | 			}
52 | 		})
53 | 	})
54 | 
55 | 	b.Run("Calloc", func(b *testing.B) {
56 | 		b.RunParallel(func(pb *testing.PB) {
57 | 			source := rand.NewSource(time.Now().UnixNano())
58 | 			r := rand.New(source)
59 | 			for pb.Next() {
60 | 				sz := r.Intn(100) << 10
61 | 				x := Calloc(sz, "test")
62 | 				r.Read(x)
63 | 				Free(x)
64 | 			}
65 | 		})
66 | 	})
67 | }
68 | 
69 | func TestCalloc(t *testing.T) {
70 | 	// Check if we're using jemalloc.
71 | 	// JE_MALLOC_CONF="abort:true,tcache:false"
72 | 
73 | 	StatsPrint()
74 | 	buf := CallocNoRef(1, "test")
75 | 	if len(buf) == 0 {
76 | 		t.Skipf("Not using jemalloc. Skipping test.")
77 | 	}
78 | 	Free(buf)
79 | 	require.Equal(t, int64(0), NumAllocBytes())
80 | 
81 | 	buf1 := Calloc(128, "test")
82 | 	require.Equal(t, int64(128), NumAllocBytes())
83 | 	buf2 := Calloc(128, "test")
84 | 	require.Equal(t, int64(256), NumAllocBytes())
85 | 
86 | 	Free(buf1)
87 | 	require.Equal(t, int64(128), NumAllocBytes())
88 | 
89 | 	// _ = buf2
90 | 	Free(buf2)
91 | 	require.Equal(t, int64(0), NumAllocBytes())
92 | 	fmt.Println(Leaks())
93 | 
94 | 	// Double free would panic when debug mode is enabled in jemalloc.
95 | 	// Free(buf2)
96 | 	// require.Equal(t, int64(0), NumAllocBytes())
97 | }
98 | 


--------------------------------------------------------------------------------
/z/file.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
  3 |  * SPDX-License-Identifier: Apache-2.0
  4 |  */
  5 | 
  6 | package z
  7 | 
  8 | import (
  9 | 	"encoding/binary"
 10 | 	"errors"
 11 | 	"fmt"
 12 | 	"io"
 13 | 	"os"
 14 | 	"path/filepath"
 15 | )
 16 | 
 17 | // MmapFile represents an mmapd file and includes both the buffer to the data
 18 | // and the file descriptor.
 19 | type MmapFile struct {
 20 | 	Data []byte
 21 | 	Fd   *os.File
 22 | }
 23 | 
 24 | var NewFile = errors.New("Create a new file")
 25 | 
 26 | func OpenMmapFileUsing(fd *os.File, sz int, writable bool) (*MmapFile, error) {
 27 | 	filename := fd.Name()
 28 | 	fi, err := fd.Stat()
 29 | 	if err != nil {
 30 | 		return nil, errors.Join(err, fmt.Errorf("cannot stat file: %s", filename))
 31 | 	}
 32 | 
 33 | 	var rerr error
 34 | 	fileSize := fi.Size()
 35 | 	if sz > 0 && fileSize == 0 {
 36 | 		// If file is empty, truncate it to sz.
 37 | 		if err := fd.Truncate(int64(sz)); err != nil {
 38 | 			return nil, errors.Join(err, errors.New("error while truncation"))
 39 | 		}
 40 | 		fileSize = int64(sz)
 41 | 		rerr = NewFile
 42 | 	}
 43 | 
 44 | 	// fmt.Printf("Mmaping file: %s with writable: %v filesize: %d\n", fd.Name(), writable, fileSize)
 45 | 	buf, err := Mmap(fd, writable, fileSize) // Mmap up to file size.
 46 | 	if err != nil {
 47 | 		return nil, errors.Join(err, fmt.Errorf("while mmapping %s with size: %d", fd.Name(), fileSize))
 48 | 	}
 49 | 
 50 | 	if fileSize == 0 {
 51 | 		dir, _ := filepath.Split(filename)
 52 | 		if err := SyncDir(dir); err != nil {
 53 | 			return nil, err
 54 | 		}
 55 | 	}
 56 | 	return &MmapFile{
 57 | 		Data: buf,
 58 | 		Fd:   fd,
 59 | 	}, rerr
 60 | }
 61 | 
 62 | // OpenMmapFile opens an existing file or creates a new file. If the file is
 63 | // created, it would truncate the file to maxSz. In both cases, it would mmap
 64 | // the file to maxSz and returned it. In case the file is created, z.NewFile is
 65 | // returned.
 66 | func OpenMmapFile(filename string, flag int, maxSz int) (*MmapFile, error) {
 67 | 	// fmt.Printf("opening file %s with flag: %v\n", filename, flag)
 68 | 	fd, err := os.OpenFile(filename, flag, 0666)
 69 | 	if err != nil {
 70 | 		return nil, errors.Join(err, fmt.Errorf("unable to open: %s", filename))
 71 | 	}
 72 | 	writable := true
 73 | 	if flag == os.O_RDONLY {
 74 | 		writable = false
 75 | 	}
 76 | 	return OpenMmapFileUsing(fd, maxSz, writable)
 77 | }
 78 | 
 79 | type mmapReader struct {
 80 | 	Data   []byte
 81 | 	offset int
 82 | }
 83 | 
 84 | func (mr *mmapReader) Read(buf []byte) (int, error) {
 85 | 	if mr.offset > len(mr.Data) {
 86 | 		return 0, io.EOF
 87 | 	}
 88 | 	n := copy(buf, mr.Data[mr.offset:])
 89 | 	mr.offset += n
 90 | 	if n < len(buf) {
 91 | 		return n, io.EOF
 92 | 	}
 93 | 	return n, nil
 94 | }
 95 | 
 96 | func (m *MmapFile) NewReader(offset int) io.Reader {
 97 | 	return &mmapReader{
 98 | 		Data:   m.Data,
 99 | 		offset: offset,
100 | 	}
101 | }
102 | 
103 | // Bytes returns data starting from offset off of size sz. If there's not enough data, it would
104 | // return nil slice and io.EOF.
105 | func (m *MmapFile) Bytes(off, sz int) ([]byte, error) {
106 | 	if len(m.Data[off:]) < sz {
107 | 		return nil, io.EOF
108 | 	}
109 | 	return m.Data[off : off+sz], nil
110 | }
111 | 
112 | // Slice returns the slice at the given offset.
113 | func (m *MmapFile) Slice(offset int) []byte {
114 | 	sz := binary.BigEndian.Uint32(m.Data[offset:])
115 | 	start := offset + 4
116 | 	next := start + int(sz)
117 | 	if next > len(m.Data) {
118 | 		return []byte{}
119 | 	}
120 | 	res := m.Data[start:next]
121 | 	return res
122 | }
123 | 
124 | // AllocateSlice allocates a slice of the given size at the given offset.
125 | func (m *MmapFile) AllocateSlice(sz, offset int) ([]byte, int, error) {
126 | 	start := offset + 4
127 | 
128 | 	// If the file is too small, double its size or increase it by 1GB, whichever is smaller.
129 | 	if start+sz > len(m.Data) {
130 | 		const oneGB = 1 << 30
131 | 		growBy := len(m.Data)
132 | 		if growBy > oneGB {
133 | 			growBy = oneGB
134 | 		}
135 | 		if growBy < sz+4 {
136 | 			growBy = sz + 4
137 | 		}
138 | 		if err := m.Truncate(int64(len(m.Data) + growBy)); err != nil {
139 | 			return nil, 0, err
140 | 		}
141 | 	}
142 | 
143 | 	binary.BigEndian.PutUint32(m.Data[offset:], uint32(sz))
144 | 	return m.Data[start : start+sz], start + sz, nil
145 | }
146 | 
147 | func (m *MmapFile) Sync() error {
148 | 	if m == nil {
149 | 		return nil
150 | 	}
151 | 	return Msync(m.Data)
152 | }
153 | 
154 | func (m *MmapFile) Delete() error {
155 | 	// Badger can set the m.Data directly, without setting any Fd. In that case, this should be a
156 | 	// NOOP.
157 | 	if m.Fd == nil {
158 | 		return nil
159 | 	}
160 | 
161 | 	if err := Munmap(m.Data); err != nil {
162 | 		return fmt.Errorf("while munmap file: %s, error: %v\n", m.Fd.Name(), err)
163 | 	}
164 | 	m.Data = nil
165 | 	if err := m.Fd.Truncate(0); err != nil {
166 | 		return fmt.Errorf("while truncate file: %s, error: %v\n", m.Fd.Name(), err)
167 | 	}
168 | 	if err := m.Fd.Close(); err != nil {
169 | 		return fmt.Errorf("while close file: %s, error: %v\n", m.Fd.Name(), err)
170 | 	}
171 | 	return os.Remove(m.Fd.Name())
172 | }
173 | 
174 | // Close would close the file. It would also truncate the file if maxSz >= 0.
175 | func (m *MmapFile) Close(maxSz int64) error {
176 | 	// Badger can set the m.Data directly, without setting any Fd. In that case, this should be a
177 | 	// NOOP.
178 | 	if m.Fd == nil {
179 | 		return nil
180 | 	}
181 | 	if err := m.Sync(); err != nil {
182 | 		return fmt.Errorf("while sync file: %s, error: %v\n", m.Fd.Name(), err)
183 | 	}
184 | 	if err := Munmap(m.Data); err != nil {
185 | 		return fmt.Errorf("while munmap file: %s, error: %v\n", m.Fd.Name(), err)
186 | 	}
187 | 	if maxSz >= 0 {
188 | 		if err := m.Fd.Truncate(maxSz); err != nil {
189 | 			return fmt.Errorf("while truncate file: %s, error: %v\n", m.Fd.Name(), err)
190 | 		}
191 | 	}
192 | 	return m.Fd.Close()
193 | }
194 | 
195 | func SyncDir(dir string) error {
196 | 	df, err := os.Open(dir)
197 | 	if err != nil {
198 | 		return errors.Join(err, fmt.Errorf("while opening %s", dir))
199 | 	}
200 | 	if err := df.Sync(); err != nil {
201 | 		return errors.Join(err, fmt.Errorf("while syncing %s", dir))
202 | 	}
203 | 	if err := df.Close(); err != nil {
204 | 		return errors.Join(err, fmt.Errorf("while closing %s", dir))
205 | 	}
206 | 	return nil
207 | }
208 | 


--------------------------------------------------------------------------------
/z/file_default.go:
--------------------------------------------------------------------------------
 1 | //go:build !linux
 2 | // +build !linux
 3 | 
 4 | /*
 5 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
 6 |  * SPDX-License-Identifier: Apache-2.0
 7 |  */
 8 | 
 9 | package z
10 | 
11 | import "fmt"
12 | 
13 | // Truncate would truncate the mmapped file to the given size. On Linux, we truncate
14 | // the underlying file and then call mremap, but on other systems, we unmap first,
15 | // then truncate, then re-map.
16 | func (m *MmapFile) Truncate(maxSz int64) error {
17 | 	if err := m.Sync(); err != nil {
18 | 		return fmt.Errorf("while sync file: %s, error: %v\n", m.Fd.Name(), err)
19 | 	}
20 | 	if err := Munmap(m.Data); err != nil {
21 | 		return fmt.Errorf("while munmap file: %s, error: %v\n", m.Fd.Name(), err)
22 | 	}
23 | 	if err := m.Fd.Truncate(maxSz); err != nil {
24 | 		return fmt.Errorf("while truncate file: %s, error: %v\n", m.Fd.Name(), err)
25 | 	}
26 | 	var err error
27 | 	m.Data, err = Mmap(m.Fd, true, maxSz) // Mmap up to max size.
28 | 	return err
29 | }
30 | 


--------------------------------------------------------------------------------
/z/file_linux.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | 
 6 | package z
 7 | 
 8 | import (
 9 | 	"fmt"
10 | )
11 | 
12 | // Truncate would truncate the mmapped file to the given size. On Linux, we truncate
13 | // the underlying file and then call mremap, but on other systems, we unmap first,
14 | // then truncate, then re-map.
15 | func (m *MmapFile) Truncate(maxSz int64) error {
16 | 	if err := m.Sync(); err != nil {
17 | 		return fmt.Errorf("while sync file: %s, error: %v\n", m.Fd.Name(), err)
18 | 	}
19 | 	if err := m.Fd.Truncate(maxSz); err != nil {
20 | 		return fmt.Errorf("while truncate file: %s, error: %v\n", m.Fd.Name(), err)
21 | 	}
22 | 
23 | 	var err error
24 | 	m.Data, err = mremap(m.Data, int(maxSz)) // Mmap up to max size.
25 | 	return err
26 | }
27 | 


--------------------------------------------------------------------------------
/z/flags.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
  3 |  * SPDX-License-Identifier: Apache-2.0
  4 |  */
  5 | 
  6 | package z
  7 | 
  8 | import (
  9 | 	"errors"
 10 | 	"fmt"
 11 | 	"log"
 12 | 	"os"
 13 | 	"os/user"
 14 | 	"path/filepath"
 15 | 	"sort"
 16 | 	"strconv"
 17 | 	"strings"
 18 | 	"time"
 19 | )
 20 | 
 21 | // SuperFlagHelp makes it really easy to generate command line `--help` output for a SuperFlag. For
 22 | // example:
 23 | //
 24 | //	const flagDefaults = `enabled=true; path=some/path;`
 25 | //
 26 | //	var help string = z.NewSuperFlagHelp(flagDefaults).
 27 | //		Flag("enabled", "Turns on <something>.").
 28 | //		Flag("path", "The path to <something>.").
 29 | //		Flag("another", "Not present in defaults, but still included.").
 30 | //		String()
 31 | //
 32 | // The `help` string would then contain:
 33 | //
 34 | //	enabled=true; Turns on <something>.
 35 | //	path=some/path; The path to <something>.
 36 | //	another=; Not present in defaults, but still included.
 37 | //
 38 | // All flags are sorted alphabetically for consistent `--help` output. Flags with default values are
 39 | // placed at the top, and everything else goes under.
 40 | type SuperFlagHelp struct {
 41 | 	head     string
 42 | 	defaults *SuperFlag
 43 | 	flags    map[string]string
 44 | }
 45 | 
 46 | func NewSuperFlagHelp(defaults string) *SuperFlagHelp {
 47 | 	return &SuperFlagHelp{
 48 | 		defaults: NewSuperFlag(defaults),
 49 | 		flags:    make(map[string]string, 0),
 50 | 	}
 51 | }
 52 | 
 53 | func (h *SuperFlagHelp) Head(head string) *SuperFlagHelp {
 54 | 	h.head = head
 55 | 	return h
 56 | }
 57 | 
 58 | func (h *SuperFlagHelp) Flag(name, description string) *SuperFlagHelp {
 59 | 	h.flags[name] = description
 60 | 	return h
 61 | }
 62 | 
 63 | func (h *SuperFlagHelp) String() string {
 64 | 	defaultLines := make([]string, 0)
 65 | 	otherLines := make([]string, 0)
 66 | 	for name, help := range h.flags {
 67 | 		val, found := h.defaults.m[name]
 68 | 		line := fmt.Sprintf("    %s=%s; %s\n", name, val, help)
 69 | 		if found {
 70 | 			defaultLines = append(defaultLines, line)
 71 | 		} else {
 72 | 			otherLines = append(otherLines, line)
 73 | 		}
 74 | 	}
 75 | 	sort.Strings(defaultLines)
 76 | 	sort.Strings(otherLines)
 77 | 	dls := strings.Join(defaultLines, "")
 78 | 	ols := strings.Join(otherLines, "")
 79 | 	if len(h.defaults.m) == 0 && len(ols) == 0 {
 80 | 		// remove last newline
 81 | 		dls = dls[:len(dls)-1]
 82 | 	}
 83 | 	// remove last newline
 84 | 	if len(h.defaults.m) == 0 && len(ols) > 1 {
 85 | 		ols = ols[:len(ols)-1]
 86 | 	}
 87 | 	return h.head + "\n" + dls + ols
 88 | }
 89 | 
 90 | func parseFlag(flag string) (map[string]string, error) {
 91 | 	kvm := make(map[string]string)
 92 | 	for _, kv := range strings.Split(flag, ";") {
 93 | 		if strings.TrimSpace(kv) == "" {
 94 | 			continue
 95 | 		}
 96 | 		// For a non-empty separator, 0 < len(splits) ≤ 2.
 97 | 		splits := strings.SplitN(kv, "=", 2)
 98 | 		k := strings.TrimSpace(splits[0])
 99 | 		if len(splits) < 2 {
100 | 			return nil, fmt.Errorf("superflag: missing value for '%s' in flag: %s", k, flag)
101 | 		}
102 | 		k = strings.ToLower(k)
103 | 		k = strings.ReplaceAll(k, "_", "-")
104 | 		kvm[k] = strings.TrimSpace(splits[1])
105 | 	}
106 | 	return kvm, nil
107 | }
108 | 
109 | type SuperFlag struct {
110 | 	m map[string]string
111 | }
112 | 
113 | func NewSuperFlag(flag string) *SuperFlag {
114 | 	sf, err := newSuperFlagImpl(flag)
115 | 	if err != nil {
116 | 		log.Fatal(err)
117 | 	}
118 | 	return sf
119 | }
120 | 
121 | func newSuperFlagImpl(flag string) (*SuperFlag, error) {
122 | 	m, err := parseFlag(flag)
123 | 	if err != nil {
124 | 		return nil, err
125 | 	}
126 | 	return &SuperFlag{m}, nil
127 | }
128 | 
129 | func (sf *SuperFlag) String() string {
130 | 	if sf == nil {
131 | 		return ""
132 | 	}
133 | 	kvs := make([]string, 0, len(sf.m))
134 | 	for k, v := range sf.m {
135 | 		kvs = append(kvs, fmt.Sprintf("%s=%s", k, v))
136 | 	}
137 | 	return strings.Join(kvs, "; ")
138 | }
139 | 
140 | func (sf *SuperFlag) MergeAndCheckDefault(flag string) *SuperFlag {
141 | 	sf, err := sf.mergeAndCheckDefaultImpl(flag)
142 | 	if err != nil {
143 | 		log.Fatal(err)
144 | 	}
145 | 	return sf
146 | }
147 | 
148 | func (sf *SuperFlag) mergeAndCheckDefaultImpl(flag string) (*SuperFlag, error) {
149 | 	if sf == nil {
150 | 		m, err := parseFlag(flag)
151 | 		if err != nil {
152 | 			return nil, err
153 | 		}
154 | 		return &SuperFlag{m}, nil
155 | 	}
156 | 
157 | 	src, err := parseFlag(flag)
158 | 	if err != nil {
159 | 		return nil, err
160 | 	}
161 | 
162 | 	numKeys := len(sf.m)
163 | 	for k := range src {
164 | 		if _, ok := sf.m[k]; ok {
165 | 			numKeys--
166 | 		}
167 | 	}
168 | 	if numKeys != 0 {
169 | 		return nil, fmt.Errorf("superflag: found invalid options: %s.\nvalid options: %v", sf, flag)
170 | 	}
171 | 	for k, v := range src {
172 | 		if _, ok := sf.m[k]; !ok {
173 | 			sf.m[k] = v
174 | 		}
175 | 	}
176 | 	return sf, nil
177 | }
178 | 
179 | func (sf *SuperFlag) Has(opt string) bool {
180 | 	val := sf.GetString(opt)
181 | 	return val != ""
182 | }
183 | 
184 | func (sf *SuperFlag) GetDuration(opt string) time.Duration {
185 | 	val := sf.GetString(opt)
186 | 	if val == "" {
187 | 		return time.Duration(0)
188 | 	}
189 | 	if strings.Contains(val, "d") {
190 | 		val = strings.Replace(val, "d", "", 1)
191 | 		days, err := strconv.ParseInt(val, 0, 64)
192 | 		if err != nil {
193 | 			return time.Duration(0)
194 | 		}
195 | 		return time.Hour * 24 * time.Duration(days)
196 | 	}
197 | 	d, err := time.ParseDuration(val)
198 | 	if err != nil {
199 | 		return time.Duration(0)
200 | 	}
201 | 	return d
202 | }
203 | 
204 | func (sf *SuperFlag) GetBool(opt string) bool {
205 | 	val := sf.GetString(opt)
206 | 	if val == "" {
207 | 		return false
208 | 	}
209 | 	b, err := strconv.ParseBool(val)
210 | 	if err != nil {
211 | 		err = errors.Join(err,
212 | 			fmt.Errorf("Unable to parse %s as bool for key: %s. Options: %s\n", val, opt, sf))
213 | 		log.Fatalf("%+v", err)
214 | 	}
215 | 	return b
216 | }
217 | 
218 | func (sf *SuperFlag) GetFloat64(opt string) float64 {
219 | 	val := sf.GetString(opt)
220 | 	if val == "" {
221 | 		return 0
222 | 	}
223 | 	f, err := strconv.ParseFloat(val, 64)
224 | 	if err != nil {
225 | 		err = errors.Join(err,
226 | 			fmt.Errorf("Unable to parse %s as float64 for key: %s. Options: %s\n", val, opt, sf))
227 | 		log.Fatalf("%+v", err)
228 | 	}
229 | 	return f
230 | }
231 | 
232 | func (sf *SuperFlag) GetInt64(opt string) int64 {
233 | 	val := sf.GetString(opt)
234 | 	if val == "" {
235 | 		return 0
236 | 	}
237 | 	i, err := strconv.ParseInt(val, 0, 64)
238 | 	if err != nil {
239 | 		err = errors.Join(err,
240 | 			fmt.Errorf("Unable to parse %s as int64 for key: %s. Options: %s\n", val, opt, sf))
241 | 		log.Fatalf("%+v", err)
242 | 	}
243 | 	return i
244 | }
245 | 
246 | func (sf *SuperFlag) GetUint64(opt string) uint64 {
247 | 	val := sf.GetString(opt)
248 | 	if val == "" {
249 | 		return 0
250 | 	}
251 | 	u, err := strconv.ParseUint(val, 0, 64)
252 | 	if err != nil {
253 | 		err = errors.Join(err,
254 | 			fmt.Errorf("Unable to parse %s as uint64 for key: %s. Options: %s\n", val, opt, sf))
255 | 		log.Fatalf("%+v", err)
256 | 	}
257 | 	return u
258 | }
259 | 
260 | func (sf *SuperFlag) GetUint32(opt string) uint32 {
261 | 	val := sf.GetString(opt)
262 | 	if val == "" {
263 | 		return 0
264 | 	}
265 | 	u, err := strconv.ParseUint(val, 0, 32)
266 | 	if err != nil {
267 | 		err = errors.Join(err,
268 | 			fmt.Errorf("Unable to parse %s as uint32 for key: %s. Options: %s\n", val, opt, sf))
269 | 		log.Fatalf("%+v", err)
270 | 	}
271 | 	return uint32(u)
272 | }
273 | 
274 | func (sf *SuperFlag) GetString(opt string) string {
275 | 	if sf == nil {
276 | 		return ""
277 | 	}
278 | 	return sf.m[opt]
279 | }
280 | 
281 | func (sf *SuperFlag) GetPath(opt string) string {
282 | 	p := sf.GetString(opt)
283 | 	path, err := expandPath(p)
284 | 	if err != nil {
285 | 		log.Fatalf("Failed to get path: %+v", err)
286 | 	}
287 | 	return path
288 | }
289 | 
290 | // expandPath expands the paths containing ~ to /home/user. It also computes the absolute path
291 | // from the relative paths. For example: ~/abc/../cef will be transformed to /home/user/cef.
292 | func expandPath(path string) (string, error) {
293 | 	if len(path) == 0 {
294 | 		return "", nil
295 | 	}
296 | 	if path[0] == '~' && (len(path) == 1 || os.IsPathSeparator(path[1])) {
297 | 		usr, err := user.Current()
298 | 		if err != nil {
299 | 			return "", errors.Join(err, errors.New("Failed to get the home directory of the user"))
300 | 		}
301 | 		path = filepath.Join(usr.HomeDir, path[1:])
302 | 	}
303 | 
304 | 	var err error
305 | 	path, err = filepath.Abs(path)
306 | 	if err != nil {
307 | 		return "", errors.Join(err, errors.New("Failed to generate absolute path"))
308 | 	}
309 | 	return path, nil
310 | }
311 | 


--------------------------------------------------------------------------------
/z/flags_test.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
  3 |  * SPDX-License-Identifier: Apache-2.0
  4 |  */
  5 | 
  6 | package z
  7 | 
  8 | import (
  9 | 	"fmt"
 10 | 	"os"
 11 | 	"os/user"
 12 | 	"path/filepath"
 13 | 	"testing"
 14 | 	"time"
 15 | 
 16 | 	"github.com/stretchr/testify/require"
 17 | )
 18 | 
 19 | func TestFlag(t *testing.T) {
 20 | 	const opt = `bool_key=true; int-key=5; float-key=0.05; string_key=value; ;`
 21 | 	const def = `bool_key=false; int-key=0; float-key=1.0; string-key=; other-key=5;
 22 | 		duration-minutes=15m; duration-hours=12h; duration-days=30d;`
 23 | 
 24 | 	_, err := NewSuperFlag("boolo-key=true").mergeAndCheckDefaultImpl(def)
 25 | 	require.Error(t, err)
 26 | 	_, err = newSuperFlagImpl("key-without-value")
 27 | 	require.Error(t, err)
 28 | 
 29 | 	// bool-key and int-key should not be overwritten. Only other-key should be set.
 30 | 	sf := NewSuperFlag(opt)
 31 | 	sf.MergeAndCheckDefault(def)
 32 | 
 33 | 	require.Equal(t, true, sf.GetBool("bool-key"))
 34 | 	require.Equal(t, uint64(5), sf.GetUint64("int-key"))
 35 | 	require.Equal(t, "value", sf.GetString("string-key"))
 36 | 	require.Equal(t, uint64(5), sf.GetUint64("other-key"))
 37 | 
 38 | 	require.Equal(t, time.Minute*15, sf.GetDuration("duration-minutes"))
 39 | 	require.Equal(t, time.Hour*12, sf.GetDuration("duration-hours"))
 40 | 	require.Equal(t, time.Hour*24*30, sf.GetDuration("duration-days"))
 41 | }
 42 | 
 43 | func TestFlagDefault(t *testing.T) {
 44 | 	def := `one=false; two=; three=;`
 45 | 	f := NewSuperFlag(`one=true; two=4;`).MergeAndCheckDefault(def)
 46 | 	require.Equal(t, true, f.GetBool("one"))
 47 | 	require.Equal(t, int64(4), f.GetInt64("two"))
 48 | }
 49 | 
 50 | func TestGetPath(t *testing.T) {
 51 | 	usr, err := user.Current()
 52 | 	require.NoError(t, err)
 53 | 	homeDir := usr.HomeDir
 54 | 	cwd, err := os.Getwd()
 55 | 	require.NoError(t, err)
 56 | 
 57 | 	tests := []struct {
 58 | 		path     string
 59 | 		expected string
 60 | 	}{
 61 | 		{
 62 | 			"/home/user/file.txt",
 63 | 			"/home/user/file.txt",
 64 | 		},
 65 | 		{
 66 | 			"~/file.txt",
 67 | 			filepath.Join(homeDir, "file.txt"),
 68 | 		},
 69 | 		{
 70 | 			"~/abc/../file.txt",
 71 | 			filepath.Join(homeDir, "file.txt"),
 72 | 		},
 73 | 		{
 74 | 			"~/",
 75 | 			homeDir,
 76 | 		},
 77 | 		{
 78 | 			"~filename",
 79 | 			filepath.Join(cwd, "~filename"),
 80 | 		},
 81 | 		{
 82 | 			"./filename",
 83 | 			filepath.Join(cwd, "filename"),
 84 | 		},
 85 | 		{
 86 | 			"",
 87 | 			"",
 88 | 		},
 89 | 		{
 90 | 			"./",
 91 | 			cwd,
 92 | 		},
 93 | 	}
 94 | 
 95 | 	get := func(p string) string {
 96 | 		opt := fmt.Sprintf("file=%s", p)
 97 | 		sf := NewSuperFlag(opt)
 98 | 		return sf.GetPath("file")
 99 | 	}
100 | 
101 | 	for _, tc := range tests {
102 | 		actual := get(tc.path)
103 | 		require.Equalf(t, tc.expected, actual, "Failed on testcase: %s", tc.path)
104 | 	}
105 | }
106 | 


--------------------------------------------------------------------------------
/z/histogram.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
  3 |  * SPDX-License-Identifier: Apache-2.0
  4 |  */
  5 | 
  6 | package z
  7 | 
  8 | import (
  9 | 	"fmt"
 10 | 	"math"
 11 | 	"strings"
 12 | 
 13 | 	"github.com/dustin/go-humanize"
 14 | )
 15 | 
 16 | // Creates bounds for an histogram. The bounds are powers of two of the form
 17 | // [2^min_exponent, ..., 2^max_exponent].
 18 | func HistogramBounds(minExponent, maxExponent uint32) []float64 {
 19 | 	var bounds []float64
 20 | 	for i := minExponent; i <= maxExponent; i++ {
 21 | 		bounds = append(bounds, float64(int(1)<<i))
 22 | 	}
 23 | 	return bounds
 24 | }
 25 | 
 26 | func Fibonacci(num int) []float64 {
 27 | 	assert(num > 4)
 28 | 	bounds := make([]float64, num)
 29 | 	bounds[0] = 1
 30 | 	bounds[1] = 2
 31 | 	for i := 2; i < num; i++ {
 32 | 		bounds[i] = bounds[i-1] + bounds[i-2]
 33 | 	}
 34 | 	return bounds
 35 | }
 36 | 
 37 | // HistogramData stores the information needed to represent the sizes of the keys and values
 38 | // as a histogram.
 39 | type HistogramData struct {
 40 | 	Bounds         []float64
 41 | 	Count          int64
 42 | 	CountPerBucket []int64
 43 | 	Min            int64
 44 | 	Max            int64
 45 | 	Sum            int64
 46 | }
 47 | 
 48 | // NewHistogramData returns a new instance of HistogramData with properly initialized fields.
 49 | func NewHistogramData(bounds []float64) *HistogramData {
 50 | 	return &HistogramData{
 51 | 		Bounds:         bounds,
 52 | 		CountPerBucket: make([]int64, len(bounds)+1),
 53 | 		Max:            0,
 54 | 		Min:            math.MaxInt64,
 55 | 	}
 56 | }
 57 | 
 58 | func (histogram *HistogramData) Copy() *HistogramData {
 59 | 	if histogram == nil {
 60 | 		return nil
 61 | 	}
 62 | 	return &HistogramData{
 63 | 		Bounds:         append([]float64{}, histogram.Bounds...),
 64 | 		CountPerBucket: append([]int64{}, histogram.CountPerBucket...),
 65 | 		Count:          histogram.Count,
 66 | 		Min:            histogram.Min,
 67 | 		Max:            histogram.Max,
 68 | 		Sum:            histogram.Sum,
 69 | 	}
 70 | }
 71 | 
 72 | // Update changes the Min and Max fields if value is less than or greater than the current values.
 73 | func (histogram *HistogramData) Update(value int64) {
 74 | 	if histogram == nil {
 75 | 		return
 76 | 	}
 77 | 	if value > histogram.Max {
 78 | 		histogram.Max = value
 79 | 	}
 80 | 	if value < histogram.Min {
 81 | 		histogram.Min = value
 82 | 	}
 83 | 
 84 | 	histogram.Sum += value
 85 | 	histogram.Count++
 86 | 
 87 | 	for index := 0; index <= len(histogram.Bounds); index++ {
 88 | 		// Allocate value in the last buckets if we reached the end of the Bounds array.
 89 | 		if index == len(histogram.Bounds) {
 90 | 			histogram.CountPerBucket[index]++
 91 | 			break
 92 | 		}
 93 | 
 94 | 		if value < int64(histogram.Bounds[index]) {
 95 | 			histogram.CountPerBucket[index]++
 96 | 			break
 97 | 		}
 98 | 	}
 99 | }
100 | 
101 | // Mean returns the mean value for the histogram.
102 | func (histogram *HistogramData) Mean() float64 {
103 | 	if histogram.Count == 0 {
104 | 		return 0
105 | 	}
106 | 	return float64(histogram.Sum) / float64(histogram.Count)
107 | }
108 | 
109 | // String converts the histogram data into human-readable string.
110 | func (histogram *HistogramData) String() string {
111 | 	if histogram == nil {
112 | 		return ""
113 | 	}
114 | 	var b strings.Builder
115 | 
116 | 	b.WriteString("\n -- Histogram: \n")
117 | 	b.WriteString(fmt.Sprintf("Min value: %d \n", histogram.Min))
118 | 	b.WriteString(fmt.Sprintf("Max value: %d \n", histogram.Max))
119 | 	b.WriteString(fmt.Sprintf("Count: %d \n", histogram.Count))
120 | 	b.WriteString(fmt.Sprintf("50p: %.2f \n", histogram.Percentile(0.5)))
121 | 	b.WriteString(fmt.Sprintf("75p: %.2f \n", histogram.Percentile(0.75)))
122 | 	b.WriteString(fmt.Sprintf("90p: %.2f \n", histogram.Percentile(0.90)))
123 | 
124 | 	numBounds := len(histogram.Bounds)
125 | 	var cum float64
126 | 	for index, count := range histogram.CountPerBucket {
127 | 		if count == 0 {
128 | 			continue
129 | 		}
130 | 
131 | 		// The last bucket represents the bucket that contains the range from
132 | 		// the last bound up to infinity so it's processed differently than the
133 | 		// other buckets.
134 | 		if index == len(histogram.CountPerBucket)-1 {
135 | 			lowerBound := uint64(histogram.Bounds[numBounds-1])
136 | 			page := float64(count*100) / float64(histogram.Count)
137 | 			cum += page
138 | 			b.WriteString(fmt.Sprintf("[%s, %s) %d %.2f%% %.2f%%\n",
139 | 				humanize.IBytes(lowerBound), "infinity", count, page, cum))
140 | 			continue
141 | 		}
142 | 
143 | 		upperBound := uint64(histogram.Bounds[index])
144 | 		lowerBound := uint64(0)
145 | 		if index > 0 {
146 | 			lowerBound = uint64(histogram.Bounds[index-1])
147 | 		}
148 | 
149 | 		page := float64(count*100) / float64(histogram.Count)
150 | 		cum += page
151 | 		b.WriteString(fmt.Sprintf("[%d, %d) %d %.2f%% %.2f%%\n",
152 | 			lowerBound, upperBound, count, page, cum))
153 | 	}
154 | 	b.WriteString(" --\n")
155 | 	return b.String()
156 | }
157 | 
158 | // Percentile returns the percentile value for the histogram.
159 | // value of p should be between [0.0-1.0]
160 | func (histogram *HistogramData) Percentile(p float64) float64 {
161 | 	if histogram == nil {
162 | 		return 0
163 | 	}
164 | 
165 | 	if histogram.Count == 0 {
166 | 		// if no data return the minimum range
167 | 		return histogram.Bounds[0]
168 | 	}
169 | 	pval := int64(float64(histogram.Count) * p)
170 | 	for i, v := range histogram.CountPerBucket {
171 | 		pval = pval - v
172 | 		if pval <= 0 {
173 | 			if i == len(histogram.Bounds) {
174 | 				break
175 | 			}
176 | 			return histogram.Bounds[i]
177 | 		}
178 | 	}
179 | 	// default return should be the max range
180 | 	return histogram.Bounds[len(histogram.Bounds)-1]
181 | }
182 | 
183 | // Clear reset the histogram. Helpful in situations where we need to reset the metrics
184 | func (histogram *HistogramData) Clear() {
185 | 	if histogram == nil {
186 | 		return
187 | 	}
188 | 
189 | 	histogram.Count = 0
190 | 	histogram.CountPerBucket = make([]int64, len(histogram.Bounds)+1)
191 | 	histogram.Sum = 0
192 | 	histogram.Max = 0
193 | 	histogram.Min = math.MaxInt64
194 | }
195 | 


--------------------------------------------------------------------------------
/z/histogram_test.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | 
 6 | package z
 7 | 
 8 | import (
 9 | 	"math"
10 | 	"testing"
11 | 
12 | 	"github.com/stretchr/testify/require"
13 | )
14 | 
15 | func TestPercentile00(t *testing.T) {
16 | 	size := int(math.Ceil((float64(514) - float64(32)) / float64(4)))
17 | 	bounds := make([]float64, size+1)
18 | 	for i := range bounds {
19 | 		if i == 0 {
20 | 			bounds[0] = 32
21 | 			continue
22 | 		}
23 | 		if i == size {
24 | 			bounds[i] = 514
25 | 			break
26 | 		}
27 | 		bounds[i] = bounds[i-1] + 4
28 | 	}
29 | 
30 | 	h := NewHistogramData(bounds)
31 | 	for v := 16; v <= 1024; v = v + 4 {
32 | 		for i := 0; i < 1000; i++ {
33 | 			h.Update(int64(v))
34 | 		}
35 | 	}
36 | 
37 | 	require.Equal(t, h.Percentile(0.0), 32.0)
38 | }
39 | 
40 | func TestPercentile99(t *testing.T) {
41 | 	size := int(math.Ceil((float64(514) - float64(32)) / float64(4)))
42 | 	bounds := make([]float64, size+1)
43 | 	for i := range bounds {
44 | 		if i == 0 {
45 | 			bounds[0] = 32
46 | 			continue
47 | 		}
48 | 		if i == size {
49 | 			bounds[i] = 514
50 | 			break
51 | 		}
52 | 		bounds[i] = bounds[i-1] + 4
53 | 	}
54 | 	h := NewHistogramData(bounds)
55 | 	for v := 16; v <= 512; v = v + 4 {
56 | 		for i := 0; i < 1000; i++ {
57 | 			h.Update(int64(v))
58 | 		}
59 | 	}
60 | 
61 | 	require.Equal(t, h.Percentile(0.99), 512.0)
62 | }
63 | 
64 | func TestPercentile100(t *testing.T) {
65 | 	size := int(math.Ceil((float64(514) - float64(32)) / float64(4)))
66 | 	bounds := make([]float64, size+1)
67 | 	for i := range bounds {
68 | 		if i == 0 {
69 | 			bounds[0] = 32
70 | 			continue
71 | 		}
72 | 		if i == size {
73 | 			bounds[i] = 514
74 | 			break
75 | 		}
76 | 		bounds[i] = bounds[i-1] + 4
77 | 	}
78 | 	h := NewHistogramData(bounds)
79 | 	for v := 16; v <= 1024; v = v + 4 {
80 | 		for i := 0; i < 1000; i++ {
81 | 			h.Update(int64(v))
82 | 		}
83 | 	}
84 | 	require.Equal(t, h.Percentile(1.0), 514.0)
85 | }
86 | 


--------------------------------------------------------------------------------
/z/mmap.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | 
 6 | package z
 7 | 
 8 | import (
 9 | 	"os"
10 | )
11 | 
12 | // Mmap uses the mmap system call to memory-map a file. If writable is true,
13 | // memory protection of the pages is set so that they may be written to as well.
14 | func Mmap(fd *os.File, writable bool, size int64) ([]byte, error) {
15 | 	return mmap(fd, writable, size)
16 | }
17 | 
18 | // Munmap unmaps a previously mapped slice.
19 | func Munmap(b []byte) error {
20 | 	return munmap(b)
21 | }
22 | 
23 | // Madvise uses the madvise system call to give advise about the use of memory
24 | // when using a slice that is memory-mapped to a file. Set the readahead flag to
25 | // false if page references are expected in random order.
26 | func Madvise(b []byte, readahead bool) error {
27 | 	return madvise(b, readahead)
28 | }
29 | 
30 | // Msync would call sync on the mmapped data.
31 | func Msync(b []byte) error {
32 | 	return msync(b)
33 | }
34 | 


--------------------------------------------------------------------------------
/z/mmap_darwin.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | 
 6 | package z
 7 | 
 8 | import (
 9 | 	"os"
10 | 	"syscall"
11 | 	"unsafe"
12 | 
13 | 	"golang.org/x/sys/unix"
14 | )
15 | 
16 | // Mmap uses the mmap system call to memory-map a file. If writable is true,
17 | // memory protection of the pages is set so that they may be written to as well.
18 | func mmap(fd *os.File, writable bool, size int64) ([]byte, error) {
19 | 	mtype := unix.PROT_READ
20 | 	if writable {
21 | 		mtype |= unix.PROT_WRITE
22 | 	}
23 | 	return unix.Mmap(int(fd.Fd()), 0, int(size), mtype, unix.MAP_SHARED)
24 | }
25 | 
26 | // Munmap unmaps a previously mapped slice.
27 | func munmap(b []byte) error {
28 | 	return unix.Munmap(b)
29 | }
30 | 
31 | // This is required because the unix package does not support the madvise system call on OS X.
32 | func madvise(b []byte, readahead bool) error {
33 | 	advice := unix.MADV_NORMAL
34 | 	if !readahead {
35 | 		advice = unix.MADV_RANDOM
36 | 	}
37 | 
38 | 	_, _, e1 := syscall.Syscall(syscall.SYS_MADVISE, uintptr(unsafe.Pointer(&b[0])),
39 | 		uintptr(len(b)), uintptr(advice))
40 | 	if e1 != 0 {
41 | 		return e1
42 | 	}
43 | 	return nil
44 | }
45 | 
46 | func msync(b []byte) error {
47 | 	return unix.Msync(b, unix.MS_SYNC)
48 | }
49 | 


--------------------------------------------------------------------------------
/z/mmap_js.go:
--------------------------------------------------------------------------------
 1 | //go:build js
 2 | 
 3 | /*
 4 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
 5 |  * SPDX-License-Identifier: Apache-2.0
 6 |  */
 7 | 
 8 | package z
 9 | 
10 | import (
11 | 	"os"
12 | 	"syscall"
13 | )
14 | 
15 | func mmap(fd *os.File, writeable bool, size int64) ([]byte, error) {
16 | 	return nil, syscall.ENOSYS
17 | }
18 | 
19 | func munmap(b []byte) error {
20 | 	return syscall.ENOSYS
21 | }
22 | 
23 | func madvise(b []byte, readahead bool) error {
24 | 	return syscall.ENOSYS
25 | }
26 | 
27 | func msync(b []byte) error {
28 | 	return syscall.ENOSYS
29 | }
30 | 


--------------------------------------------------------------------------------
/z/mmap_linux.go:
--------------------------------------------------------------------------------
 1 | //go:build !js
 2 | // +build !js
 3 | 
 4 | /*
 5 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
 6 |  * SPDX-License-Identifier: Apache-2.0
 7 |  */
 8 | 
 9 | package z
10 | 
11 | import (
12 | 	"os"
13 | 	"unsafe"
14 | 
15 | 	"golang.org/x/sys/unix"
16 | )
17 | 
18 | // mmap uses the mmap system call to memory-map a file. If writable is true,
19 | // memory protection of the pages is set so that they may be written to as well.
20 | func mmap(fd *os.File, writable bool, size int64) ([]byte, error) {
21 | 	mtype := unix.PROT_READ
22 | 	if writable {
23 | 		mtype |= unix.PROT_WRITE
24 | 	}
25 | 	return unix.Mmap(int(fd.Fd()), 0, int(size), mtype, unix.MAP_SHARED)
26 | }
27 | 
28 | // munmap unmaps a previously mapped slice.
29 | //
30 | // unix.Munmap maintains an internal list of mmapped addresses, and only calls munmap
31 | // if the address is present in that list. If we use mremap, this list is not updated.
32 | // To bypass this, we call munmap ourselves.
33 | func munmap(data []byte) error {
34 | 	if len(data) == 0 || len(data) != cap(data) {
35 | 		return unix.EINVAL
36 | 	}
37 | 	_, _, errno := unix.Syscall(
38 | 		unix.SYS_MUNMAP,
39 | 		uintptr(unsafe.Pointer(&data[0])),
40 | 		uintptr(len(data)),
41 | 		0,
42 | 	)
43 | 	if errno != 0 {
44 | 		return errno
45 | 	}
46 | 	return nil
47 | }
48 | 
49 | // madvise uses the madvise system call to give advise about the use of memory
50 | // when using a slice that is memory-mapped to a file. Set the readahead flag to
51 | // false if page references are expected in random order.
52 | func madvise(b []byte, readahead bool) error {
53 | 	flags := unix.MADV_NORMAL
54 | 	if !readahead {
55 | 		flags = unix.MADV_RANDOM
56 | 	}
57 | 	return unix.Madvise(b, flags)
58 | }
59 | 
60 | // msync writes any modified data to persistent storage.
61 | func msync(b []byte) error {
62 | 	return unix.Msync(b, unix.MS_SYNC)
63 | }
64 | 


--------------------------------------------------------------------------------
/z/mmap_plan9.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | 
 6 | package z
 7 | 
 8 | import (
 9 | 	"os"
10 | 	"syscall"
11 | )
12 | 
13 | // Mmap uses the mmap system call to memory-map a file. If writable is true,
14 | // memory protection of the pages is set so that they may be written to as well.
15 | func mmap(fd *os.File, writable bool, size int64) ([]byte, error) {
16 | 	return nil, syscall.EPLAN9
17 | }
18 | 
19 | // Munmap unmaps a previously mapped slice.
20 | func munmap(b []byte) error {
21 | 	return syscall.EPLAN9
22 | }
23 | 
24 | // Madvise uses the madvise system call to give advise about the use of memory
25 | // when using a slice that is memory-mapped to a file. Set the readahead flag to
26 | // false if page references are expected in random order.
27 | func madvise(b []byte, readahead bool) error {
28 | 	return syscall.EPLAN9
29 | }
30 | 
31 | func msync(b []byte) error {
32 | 	return syscall.EPLAN9
33 | }
34 | 


--------------------------------------------------------------------------------
/z/mmap_unix.go:
--------------------------------------------------------------------------------
 1 | //go:build !windows && !darwin && !plan9 && !linux && !wasip1 && !js
 2 | // +build !windows,!darwin,!plan9,!linux,!wasip1,!js
 3 | 
 4 | /*
 5 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
 6 |  * SPDX-License-Identifier: Apache-2.0
 7 |  */
 8 | 
 9 | package z
10 | 
11 | import (
12 | 	"os"
13 | 
14 | 	"golang.org/x/sys/unix"
15 | )
16 | 
17 | // Mmap uses the mmap system call to memory-map a file. If writable is true,
18 | // memory protection of the pages is set so that they may be written to as well.
19 | func mmap(fd *os.File, writable bool, size int64) ([]byte, error) {
20 | 	mtype := unix.PROT_READ
21 | 	if writable {
22 | 		mtype |= unix.PROT_WRITE
23 | 	}
24 | 	return unix.Mmap(int(fd.Fd()), 0, int(size), mtype, unix.MAP_SHARED)
25 | }
26 | 
27 | // Munmap unmaps a previously mapped slice.
28 | func munmap(b []byte) error {
29 | 	return unix.Munmap(b)
30 | }
31 | 
32 | // Madvise uses the madvise system call to give advise about the use of memory
33 | // when using a slice that is memory-mapped to a file. Set the readahead flag to
34 | // false if page references are expected in random order.
35 | func madvise(b []byte, readahead bool) error {
36 | 	flags := unix.MADV_NORMAL
37 | 	if !readahead {
38 | 		flags = unix.MADV_RANDOM
39 | 	}
40 | 	return unix.Madvise(b, flags)
41 | }
42 | 
43 | func msync(b []byte) error {
44 | 	return unix.Msync(b, unix.MS_SYNC)
45 | }
46 | 


--------------------------------------------------------------------------------
/z/mmap_wasip1.go:
--------------------------------------------------------------------------------
 1 | //go:build wasip1
 2 | 
 3 | /*
 4 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
 5 |  * SPDX-License-Identifier: Apache-2.0
 6 |  */
 7 | 
 8 | package z
 9 | 
10 | import (
11 | 	"os"
12 | 	"syscall"
13 | )
14 | 
15 | func mmap(fd *os.File, writeable bool, size int64) ([]byte, error) {
16 | 	return nil, syscall.ENOSYS
17 | }
18 | 
19 | func munmap(b []byte) error {
20 | 	return syscall.ENOSYS
21 | }
22 | 
23 | func madvise(b []byte, readahead bool) error {
24 | 	return syscall.ENOSYS
25 | }
26 | 
27 | func msync(b []byte) error {
28 | 	return syscall.ENOSYS
29 | }
30 | 


--------------------------------------------------------------------------------
/z/mmap_windows.go:
--------------------------------------------------------------------------------
 1 | //go:build windows
 2 | // +build windows
 3 | 
 4 | /*
 5 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
 6 |  * SPDX-License-Identifier: Apache-2.0
 7 |  */
 8 | 
 9 | package z
10 | 
11 | import (
12 | 	"fmt"
13 | 	"os"
14 | 	"syscall"
15 | 	"unsafe"
16 | )
17 | 
18 | func mmap(fd *os.File, write bool, size int64) ([]byte, error) {
19 | 	protect := syscall.PAGE_READONLY
20 | 	access := syscall.FILE_MAP_READ
21 | 
22 | 	if write {
23 | 		protect = syscall.PAGE_READWRITE
24 | 		access = syscall.FILE_MAP_WRITE
25 | 	}
26 | 	fi, err := fd.Stat()
27 | 	if err != nil {
28 | 		return nil, err
29 | 	}
30 | 
31 | 	// In windows, we cannot mmap a file more than it's actual size.
32 | 	// So truncate the file to the size of the mmap.
33 | 	if fi.Size() < size {
34 | 		if err := fd.Truncate(size); err != nil {
35 | 			return nil, fmt.Errorf("truncate: %s", err)
36 | 		}
37 | 	}
38 | 
39 | 	// Open a file mapping handle.
40 | 	sizelo := uint32(size >> 32)
41 | 	sizehi := uint32(size) & 0xffffffff
42 | 
43 | 	handler, err := syscall.CreateFileMapping(syscall.Handle(fd.Fd()), nil,
44 | 		uint32(protect), sizelo, sizehi, nil)
45 | 	if err != nil {
46 | 		return nil, os.NewSyscallError("CreateFileMapping", err)
47 | 	}
48 | 
49 | 	// Create the memory map.
50 | 	addr, err := syscall.MapViewOfFile(handler, uint32(access), 0, 0, uintptr(size))
51 | 	if addr == 0 {
52 | 		return nil, os.NewSyscallError("MapViewOfFile", err)
53 | 	}
54 | 
55 | 	// Close mapping handle.
56 | 	if err := syscall.CloseHandle(syscall.Handle(handler)); err != nil {
57 | 		return nil, os.NewSyscallError("CloseHandle", err)
58 | 	}
59 | 
60 | 	// Slice memory layout
61 | 	// Copied this snippet from golang/sys package
62 | 	var sl = struct {
63 | 		addr uintptr
64 | 		len  int
65 | 		cap  int
66 | 	}{addr, int(size), int(size)}
67 | 
68 | 	// Use unsafe to turn sl into a []byte.
69 | 	data := *(*[]byte)(unsafe.Pointer(&sl))
70 | 
71 | 	return data, nil
72 | }
73 | 
74 | func munmap(b []byte) error {
75 | 	return syscall.UnmapViewOfFile(uintptr(unsafe.Pointer(&b[0])))
76 | }
77 | 
78 | func madvise(b []byte, readahead bool) error {
79 | 	// Do Nothing. We don’t care about this setting on Windows
80 | 	return nil
81 | }
82 | 
83 | func msync(b []byte) error {
84 | 	// TODO: Figure out how to do msync on Windows.
85 | 	return nil
86 | }
87 | 


--------------------------------------------------------------------------------
/z/mremap_nosize.go:
--------------------------------------------------------------------------------
 1 | //go:build (arm64 || arm) && linux && !js
 2 | // +build arm64 arm
 3 | // +build linux
 4 | // +build !js
 5 | 
 6 | /*
 7 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
 8 |  * SPDX-License-Identifier: Apache-2.0
 9 |  */
10 | 
11 | package z
12 | 
13 | import (
14 | 	"reflect"
15 | 	"unsafe"
16 | 
17 | 	"golang.org/x/sys/unix"
18 | )
19 | 
20 | // mremap is a Linux-specific system call to remap pages in memory. This can be used in place of munmap + mmap.
21 | func mremap(data []byte, size int) ([]byte, error) {
22 | 	//nolint:lll
23 | 	// taken from <https://github.com/torvalds/linux/blob/f8394f232b1eab649ce2df5c5f15b0e528c92091/include/uapi/linux/mman.h#L8>
24 | 	const MREMAP_MAYMOVE = 0x1
25 | 
26 | 	header := (*reflect.SliceHeader)(unsafe.Pointer(&data))
27 | 	// For ARM64, the second return argument for SYS_MREMAP is inconsistent (prior allocated size) with
28 | 	// other architectures, which return the size allocated
29 | 	mmapAddr, _, errno := unix.Syscall6(
30 | 		unix.SYS_MREMAP,
31 | 		header.Data,
32 | 		uintptr(header.Len),
33 | 		uintptr(size),
34 | 		uintptr(MREMAP_MAYMOVE),
35 | 		0,
36 | 		0,
37 | 	)
38 | 	if errno != 0 {
39 | 		return nil, errno
40 | 	}
41 | 
42 | 	header.Data = mmapAddr
43 | 	header.Cap = size
44 | 	header.Len = size
45 | 	return data, nil
46 | }
47 | 


--------------------------------------------------------------------------------
/z/mremap_size.go:
--------------------------------------------------------------------------------
 1 | //go:build linux && !arm64 && !arm && !js
 2 | // +build linux,!arm64,!arm,!js
 3 | 
 4 | /*
 5 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
 6 |  * SPDX-License-Identifier: Apache-2.0
 7 |  */
 8 | 
 9 | package z
10 | 
11 | import (
12 | 	"fmt"
13 | 	"reflect"
14 | 	"unsafe"
15 | 
16 | 	"golang.org/x/sys/unix"
17 | )
18 | 
19 | // mremap is a Linux-specific system call to remap pages in memory. This can be used in place of munmap + mmap.
20 | func mremap(data []byte, size int) ([]byte, error) {
21 | 	//nolint:lll
22 | 	// taken from <https://github.com/torvalds/linux/blob/f8394f232b1eab649ce2df5c5f15b0e528c92091/include/uapi/linux/mman.h#L8>
23 | 	const MREMAP_MAYMOVE = 0x1
24 | 
25 | 	header := (*reflect.SliceHeader)(unsafe.Pointer(&data))
26 | 	mmapAddr, mmapSize, errno := unix.Syscall6(
27 | 		unix.SYS_MREMAP,
28 | 		header.Data,
29 | 		uintptr(header.Len),
30 | 		uintptr(size),
31 | 		uintptr(MREMAP_MAYMOVE),
32 | 		0,
33 | 		0,
34 | 	)
35 | 	if errno != 0 {
36 | 		return nil, errno
37 | 	}
38 | 	if mmapSize != uintptr(size) {
39 | 		return nil, fmt.Errorf("mremap size mismatch: requested: %d got: %d", size, mmapSize)
40 | 	}
41 | 
42 | 	header.Data = mmapAddr
43 | 	header.Cap = size
44 | 	header.Len = size
45 | 	return data, nil
46 | }
47 | 


--------------------------------------------------------------------------------
/z/rtutil.go:
--------------------------------------------------------------------------------
 1 | // MIT License
 2 | 
 3 | // Copyright (c) 2019 Ewan Chou
 4 | 
 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | // of this software and associated documentation files (the "Software"), to deal
 7 | // in the Software without restriction, including without limitation the rights
 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | // copies of the Software, and to permit persons to whom the Software is
10 | // furnished to do so, subject to the following conditions:
11 | 
12 | // The above copyright notice and this permission notice shall be included in all
13 | // copies or substantial portions of the Software.
14 | 
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | // SOFTWARE.
22 | 
23 | package z
24 | 
25 | import (
26 | 	"unsafe"
27 | )
28 | 
29 | // NanoTime returns the current time in nanoseconds from a monotonic clock.
30 | //
31 | //go:linkname NanoTime runtime.nanotime
32 | func NanoTime() int64
33 | 
34 | // CPUTicks is a faster alternative to NanoTime to measure time duration.
35 | //
36 | //go:linkname CPUTicks runtime.cputicks
37 | func CPUTicks() int64
38 | 
39 | type stringStruct struct {
40 | 	str unsafe.Pointer
41 | 	len int
42 | }
43 | 
44 | //go:noescape
45 | //go:linkname memhash runtime.memhash
46 | func memhash(p unsafe.Pointer, h, s uintptr) uintptr
47 | 
48 | // MemHash is the hash function used by go map, it utilizes available hardware instructions(behaves
49 | // as aeshash if aes instruction is available).
50 | // NOTE: The hash seed changes for every process. So, this cannot be used as a persistent hash.
51 | func MemHash(data []byte) uint64 {
52 | 	ss := (*stringStruct)(unsafe.Pointer(&data))
53 | 	return uint64(memhash(ss.str, 0, uintptr(ss.len)))
54 | }
55 | 
56 | // MemHashString is the hash function used by go map, it utilizes available hardware instructions
57 | // (behaves as aeshash if aes instruction is available).
58 | // NOTE: The hash seed changes for every process. So, this cannot be used as a persistent hash.
59 | func MemHashString(str string) uint64 {
60 | 	ss := (*stringStruct)(unsafe.Pointer(&str))
61 | 	return uint64(memhash(ss.str, 0, uintptr(ss.len)))
62 | }
63 | 
64 | // FastRand is a fast thread local random function.
65 | //
66 | //go:linkname FastRand runtime.fastrand
67 | func FastRand() uint32
68 | 
69 | //go:linkname memclrNoHeapPointers runtime.memclrNoHeapPointers
70 | func memclrNoHeapPointers(p unsafe.Pointer, n uintptr)
71 | 
72 | func Memclr(b []byte) {
73 | 	if len(b) == 0 {
74 | 		return
75 | 	}
76 | 	p := unsafe.Pointer(&b[0])
77 | 	memclrNoHeapPointers(p, uintptr(len(b)))
78 | }
79 | 


--------------------------------------------------------------------------------
/z/rtutil.s:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hypermodeinc/ristretto/9bc07160ec1e5425f8ce5c7a62655896890ec53c/z/rtutil.s


--------------------------------------------------------------------------------
/z/rtutil_test.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
  3 |  * SPDX-License-Identifier: Apache-2.0
  4 |  */
  5 | 
  6 | package z
  7 | 
  8 | import (
  9 | 	"hash/fnv"
 10 | 	"math/rand"
 11 | 	"sync/atomic"
 12 | 	"testing"
 13 | 	"time"
 14 | 
 15 | 	"github.com/dgryski/go-farm"
 16 | )
 17 | 
 18 | func BenchmarkMemHash(b *testing.B) {
 19 | 	buf := make([]byte, 64)
 20 | 	rand.Read(buf)
 21 | 
 22 | 	b.ReportAllocs()
 23 | 	b.ResetTimer()
 24 | 	for i := 0; i < b.N; i++ {
 25 | 		_ = MemHash(buf)
 26 | 	}
 27 | 	b.SetBytes(int64(len(buf)))
 28 | }
 29 | 
 30 | func BenchmarkMemHashString(b *testing.B) {
 31 | 	s := "Lorem ipsum dolor sit amet, consectetur adipiscing elit, " +
 32 | 		"sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."
 33 | 
 34 | 	b.ReportAllocs()
 35 | 	b.ResetTimer()
 36 | 	for i := 0; i < b.N; i++ {
 37 | 		_ = MemHashString(s)
 38 | 	}
 39 | 	b.SetBytes(int64(len(s)))
 40 | }
 41 | 
 42 | func BenchmarkSip(b *testing.B) {
 43 | 	buf := make([]byte, 64)
 44 | 	rand.Read(buf)
 45 | 	for i := 0; i < b.N; i++ {
 46 | 		SipHash(buf)
 47 | 	}
 48 | }
 49 | 
 50 | func BenchmarkFarm(b *testing.B) {
 51 | 	buf := make([]byte, 64)
 52 | 	rand.Read(buf)
 53 | 	for i := 0; i < b.N; i++ {
 54 | 		farm.Fingerprint64(buf)
 55 | 	}
 56 | }
 57 | 
 58 | func BenchmarkFnv(b *testing.B) {
 59 | 	buf := make([]byte, 64)
 60 | 	rand.Read(buf)
 61 | 	f := fnv.New64a()
 62 | 	for i := 0; i < b.N; i++ {
 63 | 		f.Write(buf)
 64 | 		f.Sum64()
 65 | 		f.Reset()
 66 | 	}
 67 | }
 68 | 
 69 | func SipHash(p []byte) (l, h uint64) {
 70 | 	// Initialization.
 71 | 	v0 := uint64(8317987320269560794) // k0 ^ 0x736f6d6570736575
 72 | 	v1 := uint64(7237128889637516672) // k1 ^ 0x646f72616e646f6d
 73 | 	v2 := uint64(7816392314733513934) // k0 ^ 0x6c7967656e657261
 74 | 	v3 := uint64(8387220255325274014) // k1 ^ 0x7465646279746573
 75 | 	t := uint64(len(p)) << 56
 76 | 
 77 | 	// Compression.
 78 | 	for len(p) >= 8 {
 79 | 		m := uint64(p[0]) | uint64(p[1])<<8 | uint64(p[2])<<16 | uint64(p[3])<<24 |
 80 | 			uint64(p[4])<<32 | uint64(p[5])<<40 | uint64(p[6])<<48 | uint64(p[7])<<56
 81 | 
 82 | 		v3 ^= m
 83 | 
 84 | 		// Round 1.
 85 | 		v0 += v1
 86 | 		v1 = v1<<13 | v1>>51
 87 | 		v1 ^= v0
 88 | 		v0 = v0<<32 | v0>>32
 89 | 
 90 | 		v2 += v3
 91 | 		v3 = v3<<16 | v3>>48
 92 | 		v3 ^= v2
 93 | 
 94 | 		v0 += v3
 95 | 		v3 = v3<<21 | v3>>43
 96 | 		v3 ^= v0
 97 | 
 98 | 		v2 += v1
 99 | 		v1 = v1<<17 | v1>>47
100 | 		v1 ^= v2
101 | 		v2 = v2<<32 | v2>>32
102 | 
103 | 		// Round 2.
104 | 		v0 += v1
105 | 		v1 = v1<<13 | v1>>51
106 | 		v1 ^= v0
107 | 		v0 = v0<<32 | v0>>32
108 | 
109 | 		v2 += v3
110 | 		v3 = v3<<16 | v3>>48
111 | 		v3 ^= v2
112 | 
113 | 		v0 += v3
114 | 		v3 = v3<<21 | v3>>43
115 | 		v3 ^= v0
116 | 
117 | 		v2 += v1
118 | 		v1 = v1<<17 | v1>>47
119 | 		v1 ^= v2
120 | 		v2 = v2<<32 | v2>>32
121 | 
122 | 		v0 ^= m
123 | 		p = p[8:]
124 | 	}
125 | 
126 | 	// Compress last block.
127 | 	switch len(p) {
128 | 	case 7:
129 | 		t |= uint64(p[6]) << 48
130 | 		fallthrough
131 | 	case 6:
132 | 		t |= uint64(p[5]) << 40
133 | 		fallthrough
134 | 	case 5:
135 | 		t |= uint64(p[4]) << 32
136 | 		fallthrough
137 | 	case 4:
138 | 		t |= uint64(p[3]) << 24
139 | 		fallthrough
140 | 	case 3:
141 | 		t |= uint64(p[2]) << 16
142 | 		fallthrough
143 | 	case 2:
144 | 		t |= uint64(p[1]) << 8
145 | 		fallthrough
146 | 	case 1:
147 | 		t |= uint64(p[0])
148 | 	}
149 | 
150 | 	v3 ^= t
151 | 
152 | 	// Round 1.
153 | 	v0 += v1
154 | 	v1 = v1<<13 | v1>>51
155 | 	v1 ^= v0
156 | 	v0 = v0<<32 | v0>>32
157 | 
158 | 	v2 += v3
159 | 	v3 = v3<<16 | v3>>48
160 | 	v3 ^= v2
161 | 
162 | 	v0 += v3
163 | 	v3 = v3<<21 | v3>>43
164 | 	v3 ^= v0
165 | 
166 | 	v2 += v1
167 | 	v1 = v1<<17 | v1>>47
168 | 	v1 ^= v2
169 | 	v2 = v2<<32 | v2>>32
170 | 
171 | 	// Round 2.
172 | 	v0 += v1
173 | 	v1 = v1<<13 | v1>>51
174 | 	v1 ^= v0
175 | 	v0 = v0<<32 | v0>>32
176 | 
177 | 	v2 += v3
178 | 	v3 = v3<<16 | v3>>48
179 | 	v3 ^= v2
180 | 
181 | 	v0 += v3
182 | 	v3 = v3<<21 | v3>>43
183 | 	v3 ^= v0
184 | 
185 | 	v2 += v1
186 | 	v1 = v1<<17 | v1>>47
187 | 	v1 ^= v2
188 | 	v2 = v2<<32 | v2>>32
189 | 
190 | 	v0 ^= t
191 | 
192 | 	// Finalization.
193 | 	v2 ^= 0xff
194 | 
195 | 	// Round 1.
196 | 	v0 += v1
197 | 	v1 = v1<<13 | v1>>51
198 | 	v1 ^= v0
199 | 	v0 = v0<<32 | v0>>32
200 | 
201 | 	v2 += v3
202 | 	v3 = v3<<16 | v3>>48
203 | 	v3 ^= v2
204 | 
205 | 	v0 += v3
206 | 	v3 = v3<<21 | v3>>43
207 | 	v3 ^= v0
208 | 
209 | 	v2 += v1
210 | 	v1 = v1<<17 | v1>>47
211 | 	v1 ^= v2
212 | 	v2 = v2<<32 | v2>>32
213 | 
214 | 	// Round 2.
215 | 	v0 += v1
216 | 	v1 = v1<<13 | v1>>51
217 | 	v1 ^= v0
218 | 	v0 = v0<<32 | v0>>32
219 | 
220 | 	v2 += v3
221 | 	v3 = v3<<16 | v3>>48
222 | 	v3 ^= v2
223 | 
224 | 	v0 += v3
225 | 	v3 = v3<<21 | v3>>43
226 | 	v3 ^= v0
227 | 
228 | 	v2 += v1
229 | 	v1 = v1<<17 | v1>>47
230 | 	v1 ^= v2
231 | 	v2 = v2<<32 | v2>>32
232 | 
233 | 	// Round 3.
234 | 	v0 += v1
235 | 	v1 = v1<<13 | v1>>51
236 | 	v1 ^= v0
237 | 	v0 = v0<<32 | v0>>32
238 | 
239 | 	v2 += v3
240 | 	v3 = v3<<16 | v3>>48
241 | 	v3 ^= v2
242 | 
243 | 	v0 += v3
244 | 	v3 = v3<<21 | v3>>43
245 | 	v3 ^= v0
246 | 
247 | 	v2 += v1
248 | 	v1 = v1<<17 | v1>>47
249 | 	v1 ^= v2
250 | 	v2 = v2<<32 | v2>>32
251 | 
252 | 	// Round 4.
253 | 	v0 += v1
254 | 	v1 = v1<<13 | v1>>51
255 | 	v1 ^= v0
256 | 	v0 = v0<<32 | v0>>32
257 | 
258 | 	v2 += v3
259 | 	v3 = v3<<16 | v3>>48
260 | 	v3 ^= v2
261 | 
262 | 	v0 += v3
263 | 	v3 = v3<<21 | v3>>43
264 | 	v3 ^= v0
265 | 
266 | 	v2 += v1
267 | 	v1 = v1<<17 | v1>>47
268 | 	v1 ^= v2
269 | 	v2 = v2<<32 | v2>>32
270 | 
271 | 	// return v0 ^ v1 ^ v2 ^ v3
272 | 
273 | 	hash := v0 ^ v1 ^ v2 ^ v3
274 | 	h = hash >> 1
275 | 	l = hash << 1 >> 1
276 | 	return l, h
277 | }
278 | 
279 | func BenchmarkNanoTime(b *testing.B) {
280 | 	for i := 0; i < b.N; i++ {
281 | 		NanoTime()
282 | 	}
283 | }
284 | 
285 | func BenchmarkCPUTicks(b *testing.B) {
286 | 	for i := 0; i < b.N; i++ {
287 | 		CPUTicks()
288 | 	}
289 | }
290 | 
291 | // goos: linux
292 | // goarch: amd64
293 | // pkg: github.com/dgraph-io/ristretto/v2/z
294 | // BenchmarkFastRand-16      	1000000000	         0.292 ns/op
295 | // BenchmarkRandSource-16    	1000000000	         0.747 ns/op
296 | // BenchmarkRandGlobal-16    	 6822332	       176 ns/op
297 | // BenchmarkRandAtomic-16    	77950322	        15.4 ns/op
298 | // PASS
299 | // ok  	github.com/dgraph-io/ristretto/v2/z	4.808s
300 | func benchmarkRand(b *testing.B, fab func() func() uint32) {
301 | 	b.RunParallel(func(pb *testing.PB) {
302 | 		gen := fab()
303 | 		for pb.Next() {
304 | 			gen()
305 | 		}
306 | 	})
307 | }
308 | 
309 | func BenchmarkFastRand(b *testing.B) {
310 | 	benchmarkRand(b, func() func() uint32 {
311 | 		return FastRand
312 | 	})
313 | }
314 | 
315 | func BenchmarkRandSource(b *testing.B) {
316 | 	benchmarkRand(b, func() func() uint32 {
317 | 		s := rand.New(rand.NewSource(time.Now().Unix()))
318 | 		return func() uint32 { return s.Uint32() }
319 | 	})
320 | }
321 | 
322 | func BenchmarkRandGlobal(b *testing.B) {
323 | 	benchmarkRand(b, func() func() uint32 {
324 | 		return func() uint32 { return rand.Uint32() }
325 | 	})
326 | }
327 | 
328 | func BenchmarkRandAtomic(b *testing.B) {
329 | 	var x uint32
330 | 	benchmarkRand(b, func() func() uint32 {
331 | 		return func() uint32 { return atomic.AddUint32(&x, 1) }
332 | 	})
333 | }
334 | 


--------------------------------------------------------------------------------
/z/simd/add_test.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | 
 6 | package simd
 7 | 
 8 | import (
 9 | 	"math"
10 | 	"testing"
11 | 
12 | 	"github.com/stretchr/testify/require"
13 | )
14 | 
15 | func TestSearch(t *testing.T) {
16 | 	keys := make([]uint64, 512)
17 | 	for i := 0; i < len(keys); i += 2 {
18 | 		keys[i] = uint64(i)
19 | 		keys[i+1] = 1
20 | 	}
21 | 
22 | 	for i := 0; i < len(keys); i++ {
23 | 		idx := int(Search(keys, uint64(i)))
24 | 		require.Equal(t, (i+1)/2, idx, "%v\n%v", i, keys)
25 | 	}
26 | 	require.Equal(t, 256, int(Search(keys, math.MaxInt64>>1)))
27 | 	require.Equal(t, 256, int(Search(keys, math.MaxInt64)))
28 | }
29 | 


--------------------------------------------------------------------------------
/z/simd/asm2.go:
--------------------------------------------------------------------------------
 1 | //go:build ignore
 2 | // +build ignore
 3 | 
 4 | /*
 5 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
 6 |  * SPDX-License-Identifier: Apache-2.0
 7 |  */
 8 | 
 9 | package main
10 | 
11 | import (
12 | 	. "github.com/mmcloughlin/avo/build"
13 | 	. "github.com/mmcloughlin/avo/operand"
14 | )
15 | 
16 | //go:generate go run asm2.go -out search_amd64.s -stubs stub_search_amd64.go
17 | 
18 | func main() {
19 | 	TEXT("Search", NOSPLIT, "func(xs []uint64, k uint64) int16")
20 | 	Doc("Search finds the first idx for which xs[idx] >= k in xs.")
21 | 	ptr := Load(Param("xs").Base(), GP64())
22 | 	n := Load(Param("xs").Len(), GP64())
23 | 	key := Load(Param("k"), GP64())
24 | 	retInd := ReturnIndex(0)
25 | 	retVal, err := retInd.Resolve()
26 | 	if err != nil {
27 | 		panic(err)
28 | 	}
29 | 
30 | 	Comment("Save n")
31 | 	n2 := GP64()
32 | 	MOVQ(n, n2)
33 | 
34 | 	Comment("Initialize idx register to zero.")
35 | 	idx := GP64()
36 | 	XORL(idx.As32(), idx.As32())
37 | 
38 | 	Label("loop")
39 | 	m := Mem{Base: ptr, Index: idx, Scale: 8}
40 | 
41 | 	Comment("Unroll1")
42 | 	CMPQ(m, key)
43 | 	JAE(LabelRef("Found"))
44 | 
45 | 	Comment("Unroll2")
46 | 	CMPQ(m.Offset(16), key)
47 | 	JAE(LabelRef("Found2"))
48 | 
49 | 	Comment("Unroll3")
50 | 	CMPQ(m.Offset(32), key)
51 | 	JAE(LabelRef("Found3"))
52 | 
53 | 	Comment("Unroll4")
54 | 	CMPQ(m.Offset(48), key)
55 | 	JAE(LabelRef("Found4"))
56 | 
57 | 	Comment("plus8")
58 | 	ADDQ(Imm(8), idx)
59 | 	CMPQ(idx, n)
60 | 	JB(LabelRef("loop"))
61 | 	JMP(LabelRef("NotFound"))
62 | 
63 | 	Label("Found2")
64 | 	ADDL(Imm(2), idx.As32())
65 | 	JMP(LabelRef("Found"))
66 | 
67 | 	Label("Found3")
68 | 	ADDL(Imm(4), idx.As32())
69 | 	JMP(LabelRef("Found"))
70 | 
71 | 	Label("Found4")
72 | 	ADDL(Imm(6), idx.As32())
73 | 
74 | 	Label("Found")
75 | 	MOVL(idx.As32(), n2.As32()) // n2 is no longer being used
76 | 
77 | 	Label("NotFound")
78 | 	MOVL(n2.As32(), idx.As32())
79 | 	SHRL(Imm(31), idx.As32())
80 | 	ADDL(n2.As32(), idx.As32())
81 | 	SHRL(Imm(1), idx.As32())
82 | 	MOVL(idx.As32(), retVal.Addr)
83 | 	RET()
84 | 
85 | 	Generate()
86 | }
87 | 


--------------------------------------------------------------------------------
/z/simd/baseline.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
  3 |  * SPDX-License-Identifier: Apache-2.0
  4 |  */
  5 | 
  6 | package simd
  7 | 
  8 | import (
  9 | 	"fmt"
 10 | 	"runtime"
 11 | 	"sort"
 12 | 	"sync"
 13 | )
 14 | 
 15 | // Search finds the key using the naive way
 16 | func Naive(xs []uint64, k uint64) int16 {
 17 | 	var i int
 18 | 	for i = 0; i < len(xs); i += 2 {
 19 | 		x := xs[i]
 20 | 		if x >= k {
 21 | 			return int16(i / 2)
 22 | 		}
 23 | 	}
 24 | 	return int16(i / 2)
 25 | }
 26 | 
 27 | func Clever(xs []uint64, k uint64) int16 {
 28 | 	if len(xs) < 8 {
 29 | 		return Naive(xs, k)
 30 | 	}
 31 | 	var twos, pk [4]uint64
 32 | 	pk[0] = k
 33 | 	pk[1] = k
 34 | 	pk[2] = k
 35 | 	pk[3] = k
 36 | 	for i := 0; i < len(xs); i += 8 {
 37 | 		twos[0] = xs[i]
 38 | 		twos[1] = xs[i+2]
 39 | 		twos[2] = xs[i+4]
 40 | 		twos[3] = xs[i+6]
 41 | 		if twos[0] >= pk[0] {
 42 | 			return int16(i / 2)
 43 | 		}
 44 | 		if twos[1] >= pk[1] {
 45 | 			return int16((i + 2) / 2)
 46 | 		}
 47 | 		if twos[2] >= pk[2] {
 48 | 			return int16((i + 4) / 2)
 49 | 		}
 50 | 		if twos[3] >= pk[3] {
 51 | 			return int16((i + 6) / 2)
 52 | 		}
 53 | 
 54 | 	}
 55 | 	return int16(len(xs) / 2)
 56 | }
 57 | 
 58 | func Parallel(xs []uint64, k uint64) int16 {
 59 | 	cpus := runtime.NumCPU()
 60 | 	if cpus%2 != 0 {
 61 | 		panic(fmt.Sprintf("odd number of CPUs %v", cpus))
 62 | 	}
 63 | 	sz := len(xs)/cpus + 1
 64 | 	var wg sync.WaitGroup
 65 | 	retChan := make(chan int16, cpus)
 66 | 	for i := 0; i < len(xs); i += sz {
 67 | 		end := i + sz
 68 | 		if end >= len(xs) {
 69 | 			end = len(xs)
 70 | 		}
 71 | 		chunk := xs[i:end]
 72 | 		wg.Add(1)
 73 | 		go func(hd int16, xs []uint64, k uint64, wg *sync.WaitGroup, ch chan int16) {
 74 | 			for i := 0; i < len(xs); i += 2 {
 75 | 				if xs[i] >= k {
 76 | 					ch <- (int16(i) + hd) / 2
 77 | 					break
 78 | 				}
 79 | 			}
 80 | 			wg.Done()
 81 | 		}(int16(i), chunk, k, &wg, retChan)
 82 | 	}
 83 | 	wg.Wait()
 84 | 	close(retChan)
 85 | 	var min int16 = (1 << 15) - 1
 86 | 	for i := range retChan {
 87 | 		if i < min {
 88 | 			min = i
 89 | 		}
 90 | 	}
 91 | 	if min == (1<<15)-1 {
 92 | 		return int16(len(xs) / 2)
 93 | 	}
 94 | 	return min
 95 | }
 96 | 
 97 | func Binary(keys []uint64, key uint64) int16 {
 98 | 	return int16(sort.Search(len(keys), func(i int) bool {
 99 | 		if i*2 >= len(keys) {
100 | 			return true
101 | 		}
102 | 		return keys[i*2] >= key
103 | 	}))
104 | }
105 | 
106 | //nolint:unused
107 | func cmp2_native(twos, pk [2]uint64) int16 {
108 | 	if twos[0] == pk[0] {
109 | 		return 0
110 | 	}
111 | 	if twos[1] == pk[1] {
112 | 		return 1
113 | 	}
114 | 	return 2
115 | }
116 | 
117 | //nolint:unused
118 | func cmp4_native(fours, pk [4]uint64) int16 {
119 | 	for i := range fours {
120 | 		if fours[i] >= pk[i] {
121 | 			return int16(i)
122 | 		}
123 | 	}
124 | 	return 4
125 | }
126 | 
127 | //nolint:unused
128 | func cmp8_native(a [8]uint64, pk [4]uint64) int16 {
129 | 	for i := range a {
130 | 		if a[i] >= pk[0] {
131 | 			return int16(i)
132 | 		}
133 | 	}
134 | 	return 8
135 | }
136 | 


--------------------------------------------------------------------------------
/z/simd/search.go:
--------------------------------------------------------------------------------
 1 | //go:build !amd64
 2 | // +build !amd64
 3 | 
 4 | /*
 5 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
 6 |  * SPDX-License-Identifier: Apache-2.0
 7 |  */
 8 | 
 9 | package simd
10 | 
11 | // Search uses the Clever search to find the correct key.
12 | func Search(xs []uint64, k uint64) int16 {
13 | 	if len(xs) < 8 || (len(xs)%8 != 0) {
14 | 		return Naive(xs, k)
15 | 	}
16 | 	var twos, pk [4]uint64
17 | 	pk[0] = k
18 | 	pk[1] = k
19 | 	pk[2] = k
20 | 	pk[3] = k
21 | 	for i := 0; i < len(xs); i += 8 {
22 | 		twos[0] = xs[i]
23 | 		twos[1] = xs[i+2]
24 | 		twos[2] = xs[i+4]
25 | 		twos[3] = xs[i+6]
26 | 		if twos[0] >= pk[0] {
27 | 			return int16(i / 2)
28 | 		}
29 | 		if twos[1] >= pk[1] {
30 | 			return int16((i + 2) / 2)
31 | 		}
32 | 		if twos[2] >= pk[2] {
33 | 			return int16((i + 4) / 2)
34 | 		}
35 | 		if twos[3] >= pk[3] {
36 | 			return int16((i + 6) / 2)
37 | 		}
38 | 
39 | 	}
40 | 	return int16(len(xs) / 2)
41 | }
42 | 


--------------------------------------------------------------------------------
/z/simd/search_amd64.s:
--------------------------------------------------------------------------------
 1 | // Code generated by command: go run asm2.go -out search_amd64.s -stubs stub_search_amd64.go. DO NOT EDIT.
 2 | 
 3 | #include "textflag.h"
 4 | 
 5 | // func Search(xs []uint64, k uint64) int16
 6 | TEXT ·Search(SB), NOSPLIT, $0-34
 7 | 	MOVQ xs_base+0(FP), AX
 8 | 	MOVQ xs_len+8(FP), CX
 9 | 	MOVQ k+24(FP), DX
10 | 
11 | 	// Save n
12 | 	MOVQ CX, BX
13 | 
14 | 	// Initialize idx register to zero.
15 | 	XORL BP, BP
16 | 
17 | loop:
18 | 	// Unroll1
19 | 	CMPQ (AX)(BP*8), DX
20 | 	JAE  Found
21 | 
22 | 	// Unroll2
23 | 	CMPQ 16(AX)(BP*8), DX
24 | 	JAE  Found2
25 | 
26 | 	// Unroll3
27 | 	CMPQ 32(AX)(BP*8), DX
28 | 	JAE  Found3
29 | 
30 | 	// Unroll4
31 | 	CMPQ 48(AX)(BP*8), DX
32 | 	JAE  Found4
33 | 
34 | 	// plus8
35 | 	ADDQ $0x08, BP
36 | 	CMPQ BP, CX
37 | 	JB   loop
38 | 	JMP  NotFound
39 | 
40 | Found2:
41 | 	ADDL $0x02, BP
42 | 	JMP  Found
43 | 
44 | Found3:
45 | 	ADDL $0x04, BP
46 | 	JMP  Found
47 | 
48 | Found4:
49 | 	ADDL $0x06, BP
50 | 
51 | Found:
52 | 	MOVL BP, BX
53 | 
54 | NotFound:
55 | 	MOVL BX, BP
56 | 	SHRL $0x1f, BP
57 | 	ADDL BX, BP
58 | 	SHRL $0x01, BP
59 | 	MOVL BP, ret+32(FP)
60 | 	RET
61 | 


--------------------------------------------------------------------------------
/z/simd/stub_search_amd64.go:
--------------------------------------------------------------------------------
1 | // Code generated by command: go run asm2.go -out search_amd64.s -stubs stub_search_amd64.go. DO NOT EDIT.
2 | 
3 | package simd
4 | 
5 | // Search finds the first idx for which xs[idx] >= k in xs.
6 | func Search(xs []uint64, k uint64) int16
7 | 


--------------------------------------------------------------------------------
/z/z.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
  3 |  * SPDX-License-Identifier: Apache-2.0
  4 |  */
  5 | 
  6 | package z
  7 | 
  8 | import (
  9 | 	"context"
 10 | 	"sync"
 11 | 
 12 | 	"github.com/cespare/xxhash/v2"
 13 | )
 14 | 
 15 | type Key interface {
 16 | 	uint64 | string | []byte | byte | int | int32 | uint32 | int64
 17 | }
 18 | 
 19 | // TODO: Figure out a way to re-use memhash for the second uint64 hash,
 20 | // we already know that appending bytes isn't reliable for generating a
 21 | // second hash (see Ristretto PR #88).
 22 | // We also know that while the Go runtime has a runtime memhash128
 23 | // function, it's not possible to use it to generate [2]uint64 or
 24 | // anything resembling a 128bit hash, even though that's exactly what
 25 | // we need in this situation.
 26 | func KeyToHash[K Key](key K) (uint64, uint64) {
 27 | 	keyAsAny := any(key)
 28 | 	switch k := keyAsAny.(type) {
 29 | 	case uint64:
 30 | 		return k, 0
 31 | 	case string:
 32 | 		return MemHashString(k), xxhash.Sum64String(k)
 33 | 	case []byte:
 34 | 		return MemHash(k), xxhash.Sum64(k)
 35 | 	case byte:
 36 | 		return uint64(k), 0
 37 | 	case int:
 38 | 		return uint64(k), 0
 39 | 	case int32:
 40 | 		return uint64(k), 0
 41 | 	case uint32:
 42 | 		return uint64(k), 0
 43 | 	case int64:
 44 | 		return uint64(k), 0
 45 | 	default:
 46 | 		panic("Key type not supported")
 47 | 	}
 48 | }
 49 | 
 50 | var (
 51 | 	dummyCloserChan <-chan struct{}
 52 | 	tmpDir          string
 53 | )
 54 | 
 55 | // Closer holds the two things we need to close a goroutine and wait for it to
 56 | // finish: a chan to tell the goroutine to shut down, and a WaitGroup with
 57 | // which to wait for it to finish shutting down.
 58 | type Closer struct {
 59 | 	waiting sync.WaitGroup
 60 | 
 61 | 	ctx    context.Context
 62 | 	cancel context.CancelFunc
 63 | }
 64 | 
 65 | // SetTmpDir sets the temporary directory for the temporary buffers.
 66 | func SetTmpDir(dir string) {
 67 | 	tmpDir = dir
 68 | }
 69 | 
 70 | // NewCloser constructs a new Closer, with an initial count on the WaitGroup.
 71 | func NewCloser(initial int) *Closer {
 72 | 	ret := &Closer{}
 73 | 	ret.ctx, ret.cancel = context.WithCancel(context.Background())
 74 | 	ret.waiting.Add(initial)
 75 | 	return ret
 76 | }
 77 | 
 78 | // AddRunning Add()'s delta to the WaitGroup.
 79 | func (lc *Closer) AddRunning(delta int) {
 80 | 	lc.waiting.Add(delta)
 81 | }
 82 | 
 83 | // Ctx can be used to get a context, which would automatically get cancelled when Signal is called.
 84 | func (lc *Closer) Ctx() context.Context {
 85 | 	if lc == nil {
 86 | 		return context.Background()
 87 | 	}
 88 | 	return lc.ctx
 89 | }
 90 | 
 91 | // Signal signals the HasBeenClosed signal.
 92 | func (lc *Closer) Signal() {
 93 | 	// Todo(ibrahim): Change Signal to return error on next badger breaking change.
 94 | 	lc.cancel()
 95 | }
 96 | 
 97 | // HasBeenClosed gets signaled when Signal() is called.
 98 | func (lc *Closer) HasBeenClosed() <-chan struct{} {
 99 | 	if lc == nil {
100 | 		return dummyCloserChan
101 | 	}
102 | 	return lc.ctx.Done()
103 | }
104 | 
105 | // Done calls Done() on the WaitGroup.
106 | func (lc *Closer) Done() {
107 | 	if lc == nil {
108 | 		return
109 | 	}
110 | 	lc.waiting.Done()
111 | }
112 | 
113 | // Wait waits on the WaitGroup. (It waits for NewCloser's initial value, AddRunning, and Done
114 | // calls to balance out.)
115 | func (lc *Closer) Wait() {
116 | 	lc.waiting.Wait()
117 | }
118 | 
119 | // SignalAndWait calls Signal(), then Wait().
120 | func (lc *Closer) SignalAndWait() {
121 | 	lc.Signal()
122 | 	lc.Wait()
123 | }
124 | 
125 | // ZeroOut zeroes out all the bytes in the range [start, end).
126 | func ZeroOut(dst []byte, start, end int) {
127 | 	if start < 0 || start >= len(dst) {
128 | 		return // BAD
129 | 	}
130 | 	if end >= len(dst) {
131 | 		end = len(dst)
132 | 	}
133 | 	if end-start <= 0 {
134 | 		return
135 | 	}
136 | 	Memclr(dst[start:end])
137 | 	// b := dst[start:end]
138 | 	// for i := range b {
139 | 	// 	b[i] = 0x0
140 | 	// }
141 | }
142 | 


--------------------------------------------------------------------------------
/z/z_test.go:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | 
 6 | package z
 7 | 
 8 | import (
 9 | 	"math"
10 | 	"testing"
11 | 
12 | 	"github.com/stretchr/testify/require"
13 | )
14 | 
15 | func verifyHashProduct(t *testing.T, wantKey, wantConflict, key, conflict uint64) {
16 | 	require.Equal(t, wantKey, key)
17 | 	require.Equal(t, wantConflict, conflict)
18 | }
19 | 
20 | func TestKeyToHash(t *testing.T) {
21 | 	var key uint64
22 | 	var conflict uint64
23 | 
24 | 	key, conflict = KeyToHash(uint64(1))
25 | 	verifyHashProduct(t, 1, 0, key, conflict)
26 | 
27 | 	key, conflict = KeyToHash(1)
28 | 	verifyHashProduct(t, 1, 0, key, conflict)
29 | 
30 | 	key, conflict = KeyToHash(int32(2))
31 | 	verifyHashProduct(t, 2, 0, key, conflict)
32 | 
33 | 	key, conflict = KeyToHash(int32(-2))
34 | 	verifyHashProduct(t, math.MaxUint64-1, 0, key, conflict)
35 | 
36 | 	key, conflict = KeyToHash(int64(-2))
37 | 	verifyHashProduct(t, math.MaxUint64-1, 0, key, conflict)
38 | 
39 | 	key, conflict = KeyToHash(uint32(3))
40 | 	verifyHashProduct(t, 3, 0, key, conflict)
41 | 
42 | 	key, conflict = KeyToHash(int64(3))
43 | 	verifyHashProduct(t, 3, 0, key, conflict)
44 | }
45 | 
46 | func TestMulipleSignals(t *testing.T) {
47 | 	closer := NewCloser(0)
48 | 	require.NotPanics(t, func() { closer.Signal() })
49 | 	// Should not panic.
50 | 	require.NotPanics(t, func() { closer.Signal() })
51 | 	require.NotPanics(t, func() { closer.SignalAndWait() })
52 | 
53 | 	// Attempt 2.
54 | 	closer = NewCloser(1)
55 | 	require.NotPanics(t, func() { closer.Done() })
56 | 
57 | 	require.NotPanics(t, func() { closer.SignalAndWait() })
58 | 	// Should not panic.
59 | 	require.NotPanics(t, func() { closer.SignalAndWait() })
60 | 	require.NotPanics(t, func() { closer.Signal() })
61 | }
62 | 
63 | func TestCloser(t *testing.T) {
64 | 	closer := NewCloser(1)
65 | 	go func() {
66 | 		defer closer.Done()
67 | 		<-closer.Ctx().Done()
68 | 	}()
69 | 	closer.SignalAndWait()
70 | }
71 | 
72 | func TestZeroOut(t *testing.T) {
73 | 	dst := make([]byte, 4*1024)
74 | 	fill := func() {
75 | 		for i := 0; i < len(dst); i++ {
76 | 			dst[i] = 0xFF
77 | 		}
78 | 	}
79 | 	check := func(buf []byte, b byte) {
80 | 		for i := 0; i < len(buf); i++ {
81 | 			require.Equalf(t, b, buf[i], "idx: %d", i)
82 | 		}
83 | 	}
84 | 	fill()
85 | 
86 | 	ZeroOut(dst, 0, 1)
87 | 	check(dst[:1], 0x00)
88 | 	check(dst[1:], 0xFF)
89 | 
90 | 	ZeroOut(dst, 0, 1024)
91 | 	check(dst[:1024], 0x00)
92 | 	check(dst[1024:], 0xFF)
93 | 
94 | 	ZeroOut(dst, 0, len(dst))
95 | 	check(dst, 0x00)
96 | }
97 | 


--------------------------------------------------------------------------------