├── .github └── workflows │ └── test.yml ├── LICENSE ├── README.md ├── go.mod ├── go.sum ├── hyrumtoken.go ├── hyrumtoken_test.go └── screenshot.png /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | on: [push] 2 | jobs: 3 | test: 4 | runs-on: ubuntu-latest 5 | steps: 6 | - uses: actions/checkout@v4 7 | - uses: actions/setup-go@v5 8 | with: 9 | go-version: '1.21.x' 10 | - run: go get . 11 | - run: go test 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2024 SSOReady 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | documentation files (the “Software”), to deal in the Software without restriction, including without limitation the 5 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit 6 | persons to whom the Software is furnished to do so, subject to the following conditions: 7 | 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the 9 | Software. 10 | 11 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE 12 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 13 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 14 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # hyrumtoken 2 | 3 | [![Go Reference](https://pkg.go.dev/badge/github.com/ssoready/hyrumtoken.svg)](https://pkg.go.dev/github.com/ssoready/hyrumtoken) 4 | 5 | `hyrumtoken` is a Go package to encrypt pagination tokens, so that your API 6 | clients can't depend on their contents, ordering, or any other characteristics. 7 | 8 | ## Installation 9 | 10 | ```bash 11 | go get github.com/ssoready/hyrumtoken 12 | ``` 13 | 14 | ## Usage 15 | 16 | `hyrumtoken.Marshal/Unmarshal` works like the equivalent `json` functions, 17 | except they take a `key *[32]byte`: 18 | 19 | ```go 20 | var key [32]byte = ... 21 | 22 | // create an encrypted pagination token 23 | token, err := hyrumtoken.Marshal(&key, "any-json-encodable-data") 24 | 25 | // parse an encrypted pagination token 26 | var parsedToken string 27 | err := hyrumtoken.Unmarshal(&key, token, &parsedToken) 28 | ``` 29 | 30 | You can use any data type that works with `json.Marshal` as your pagination 31 | token. 32 | 33 | ## Motivation 34 | 35 | [Hyrum's Law](https://www.hyrumslaw.com/) goes: 36 | 37 | > With a sufficient number of users of an API, it does not matter what you promise in the contract: all observable 38 | behaviors of your system will be depended on by somebody. 39 | 40 | Pagination tokens are one of the most common ways this turns up. I'll illustrate 41 | with a story. 42 | 43 | ### Getting stuck with LIMIT/OFFSET 44 | 45 | I was implementing an audit logging feature. My job was the backend, some other 46 | folks were doing the frontend. To get them going quickly, I gave them an API 47 | documented like this: 48 | 49 | > To list audit log events, do `GET /v1/events?pageToken=...`. For the first 50 | page, use an empty `pageToken`. 51 | > 52 | > That will return `{"events": [...], "nextPageToken": "...", "totalCount": ...}`. 53 | If `nextPageToken` is empty, you've hit the end of the list. 54 | 55 | To keep things real simple, my unblock-the-frontend MVP used `limit/offset` 56 | pagination. The page tokens were just the `offset` values. This wasn't going to 57 | work once we had filters/sorts/millions of events, but whatever! Just rendering 58 | the audit log events was already a good chunk of work for the frontend folks, 59 | and we wanted to work in parallel. 60 | 61 | A week ensues. The frontend folks came back with a UI that had one of these at 62 | the bottom: 63 | 64 | ![](./screenshot.png) 65 | 66 | Weird. The documented API doesn't really promise any affordance of "seeking" to 67 | a random page. "If you're on page 1 and you click on 3, what happens?" The 68 | reply: "We just set the pageToken to 300". 69 | 70 | This happened because folks saw the initial real-world behavior of the API: 71 | 72 | ``` 73 | GET /v1/events 74 | {"events": [... 100 events ...], "nextPageToken": "100", "totalCount": "8927"} 75 | 76 | GET /v1/events?pageToken=100 77 | {"events": [... 100 events ...], "nextPageToken": "200", "totalCount": "8927"} 78 | ``` 79 | 80 | And so it didn't matter what you document. People will guess what you meant, and 81 | it really looks like you meant to make `pageToken` be an offset token. 82 | 83 | The fun part about this story is that I in fact have lied to you. We *knew* 84 | keyset-based pagination was coming, and so we needed a way to encode potentially 85 | URL-unsafe data in `pageToken`. So right from the get-go we were base64-encoding 86 | the token. So the actual requests looked like: 87 | 88 | ``` 89 | GET /v1/events 90 | {"events": [... 100 events ...], "nextPageToken": "MTAwCg==", "totalCount": "8927"} 91 | 92 | GET /v1/events?pageToken=MTAwCg== 93 | {"events": [... 100 events ...], "nextPageToken": "MjAwCg==", "totalCount": "8927"} 94 | ``` 95 | 96 | The effect is the same. If it ends in `==`, you bet your ass the intellectual 97 | curiosity of your coworkers demands they base64-parse it. Parse `MTAwCg==` and 98 | you get back `100\n`. Our company design system had a prebuilt component with a 99 | jump-to-page affordance, and the UX folks put two and two together 100 | instinctively. 101 | 102 | By making an API that looked like it wanted to let you "seek" through the data, 103 | I had invited my colleagues to design and implement a user interface that I had 104 | no plans to support. This problem was on me. 105 | 106 | In a lot of ways, I got lucky here. I can just politely ask my coworkers to 107 | redesign their frontend to only offer a "Load More" button, no "jump to page". 108 | If I had made this API public, paying customers would have read the tea-leaves 109 | of my API, and they'd be broken if I changed anything. We'd probably be stuck 110 | with the limit/offset approach forever. 111 | 112 | ### Binary searching through pagination-token-space 113 | 114 | I've been on the opposite end of this. In the past, I've worked at companies 115 | that had to ETL data out of systems faster than the public API would allow. Each 116 | individual request is slow, but parallel requests increased throughput out of 117 | their API. Problem was figuring out how to usefully do parallel requests over a 118 | paginated list. 119 | 120 | We figured out that their pagination tokens were alphabetically increasing, and 121 | so we made a program that "searched" for the last pagination token, divided up 122 | the pagination token space into *N* chunks, and synced those chunks in parallel. 123 | 124 | Probably not what they intended! But in practice we're now one of the biggest 125 | users of their API, and they can't change their behavior. Even the *alphabetical 126 | ordering* of your pagination tokens can get you stuck. 127 | 128 | At that same company, we would sometimes parse pagination tokens to implement 129 | internal logging of where we were in the list. This might seem gratuitous, but 130 | engineers are always tempted to do this. 131 | 132 | If you didn't want me to parse your sorta-opaque token, you should've made it 133 | actually-opaque. 134 | 135 | ### Encrypt your pagination tokens 136 | 137 | So that's why I like to encrypt my pagination tokens. It seems extreme, but it 138 | eliminates this entire class of problems. Instead of obscurity-by-base64, I just 139 | enforce opacity-by-Salsa20. 140 | 141 | `hyrumtoken` prevents your users from: 142 | 143 | 1. Creating their own pagination tokens to "seek" through your data 144 | 2. Parsing your returned pagination tokens to infer where they are in the data 145 | 3. Having their software be broken if you change what you put inside your 146 | pagination tokens 147 | 148 | If you intend your pagination tokens to be opaque strings, `hyrumtoken` can 149 | enforce that opacity. Concretely, `hyrumtoken` does this: 150 | 151 | 1. JSON-encode the "pagination state" data 152 | 2. Encrypt that using NaCL's [secretbox](https://nacl.cr.yp.to/secretbox.html) 153 | with a random nonce. This requires a secret key, hence the need for a `key 154 | *[32]byte`. 155 | 3. Concatenate the nonce and the encrypted message 156 | 4. Return a base64url-encoded copy 157 | 158 | Secretbox is implemented using Golang's widely-used [`x/crypto/nacl/secretbox` 159 | package](https://pkg.go.dev/golang.org/x/crypto/nacl/secretbox). There are 160 | Secretbox implementations in every language, so it's pretty easy to port or 161 | share tokens between backend languages. 162 | 163 | ## Advanced Usage 164 | 165 | ### Expiring tokens 166 | 167 | This one isn't particularly tied to `hyrumtoken`. 168 | 169 | Your customers may get into the habit of assuming your pagination tokens never 170 | expire (again in the spirit of Hyrum's Law). You can enforce that by having 171 | tokens keep track of their own expiration: 172 | 173 | ```go 174 | type tokenData struct { 175 | ExpireTime time.Time 176 | ID string 177 | } 178 | 179 | // encode 180 | hyrumtoken.Marshal(&key, tokenData{ 181 | ExpireTime: time.Now().Add(time.Hour), 182 | ID: ..., 183 | }) 184 | 185 | // decode 186 | var data tokenData 187 | if err := hyrumtoken.Unmarshal(&key, token, &data); err != nil { 188 | return err 189 | } 190 | if data.ExpireTime.Before(time.Now()) { 191 | return fmt.Errorf("token is expired") 192 | } 193 | ``` 194 | 195 | That way, your customer probably sees they're wrong to assume "tokens never 196 | expire" while they're still developing their software, and that assumption is 197 | still easy to undo. 198 | 199 | ### Rotating keys 200 | 201 | Any time you have keys, you should think about how you're gonna rotate them. It 202 | might be obvious, but you can just have a "primary" key you encode new tokens 203 | with, and a set of "backup" keys you try to decode with. Something like this: 204 | 205 | ```go 206 | var primaryKey [32]byte = ... 207 | var backupKey1 [32]byte = ... 208 | var backupKey2 [32]byte = ... 209 | 210 | // encode 211 | token, err := hyrumtoken.Marshal(&key, data) 212 | 213 | // decode 214 | keys := [][32]byte{primaryKey, backupKey1, backupKey2} 215 | for _, k := range keys { 216 | var data tokenData 217 | if err := hyrumtoken.Unmarshal(&k, token, &data); err == nil { 218 | return &data, nil 219 | } 220 | } 221 | return nil, fmt.Errorf("invalid pagination token") 222 | ``` 223 | 224 | You can use expiring tokens to eventually guarantee the backup keys are never 225 | used, and stop accepting them entirely. 226 | 227 | ### Changing pagination schemes 228 | 229 | You can change from one type of pagination to another by putting both into the 230 | same struct, and then looking at which fields are populated: 231 | 232 | ```go 233 | type tokenData struct { 234 | Offset int 235 | StartID string 236 | } 237 | 238 | var data tokenData 239 | if err := hyrumtoken.Unmarshal(&key, token, &data); err != nil { 240 | return err 241 | } 242 | 243 | if data.Offset != 0 { 244 | // offset-based approach 245 | } 246 | // startid-based approach 247 | ``` 248 | 249 | Expiring tokens also help here, so you can get rid of the old codepath quickly. 250 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/ssoready/hyrumtoken 2 | 3 | go 1.22.3 4 | 5 | require golang.org/x/crypto v0.26.0 6 | 7 | require golang.org/x/sys v0.23.0 // indirect 8 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw= 2 | golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54= 3 | golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM= 4 | golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 5 | -------------------------------------------------------------------------------- /hyrumtoken.go: -------------------------------------------------------------------------------- 1 | // Package hyrumtoken implements opaque pagination tokens. 2 | // 3 | // Token opacity is implemented using NaCl secretbox: 4 | // 5 | // https://pkg.go.dev/golang.org/x/crypto/nacl/secretbox 6 | // 7 | // Marshal and Unmarshal require a key. Tokens are only opaque to those who do 8 | // not have this key. Do not publish this key to your API consumers. 9 | package hyrumtoken 10 | 11 | import ( 12 | "crypto/rand" 13 | "encoding/base64" 14 | "encoding/json" 15 | "fmt" 16 | "io" 17 | 18 | "golang.org/x/crypto/nacl/secretbox" 19 | ) 20 | 21 | // Marshal returns an encrypted, URL-safe serialization of v using key. 22 | // 23 | // Marshal panics if v cannot be JSON-encoded. 24 | // 25 | // Marshal uses a random nonce. Providing the same key and v in multiple 26 | // invocations will produce different results every time. 27 | func Marshal(key *[32]byte, v any) string { 28 | b, err := json.Marshal(v) 29 | if err != nil { 30 | panic(err) 31 | } 32 | 33 | var nonce [24]byte 34 | if _, err := io.ReadFull(rand.Reader, nonce[:]); err != nil { 35 | panic(err) 36 | } 37 | 38 | d := secretbox.Seal(nonce[:], b, &nonce, key) 39 | return base64.URLEncoding.EncodeToString(d) 40 | } 41 | 42 | // Unmarshal uses key to decrypt s and store the decoded value in v. 43 | // 44 | // If s is empty, v is not modified and Unmarshal returns nil. 45 | func Unmarshal(key *[32]byte, s string, v any) error { 46 | if s == "" { 47 | return nil 48 | } 49 | 50 | d, err := base64.URLEncoding.DecodeString(s) 51 | if err != nil { 52 | return fmt.Errorf("decode token: %w", err) 53 | } 54 | 55 | var nonce [24]byte 56 | copy(nonce[:], d[:24]) 57 | 58 | b, ok := secretbox.Open(nil, d[24:], &nonce, key) 59 | if !ok { 60 | return fmt.Errorf("decrypt token: %w", err) 61 | } 62 | 63 | if err := json.Unmarshal(b, v); err != nil { 64 | return fmt.Errorf("unmarshal token data: %w", err) 65 | } 66 | 67 | return nil 68 | } 69 | -------------------------------------------------------------------------------- /hyrumtoken_test.go: -------------------------------------------------------------------------------- 1 | package hyrumtoken_test 2 | 3 | import ( 4 | "crypto/rand" 5 | "reflect" 6 | "testing" 7 | 8 | "github.com/ssoready/hyrumtoken" 9 | ) 10 | 11 | // testkey is a randomized key for testing. Do not use it in production. 12 | var testkey = [32]byte{24, 12, 15, 90, 143, 133, 171, 28, 34, 75, 185, 194, 102, 93, 165, 183, 235, 96, 135, 135, 165, 1, 129, 91, 32, 7, 139, 135, 130, 2, 241, 168} 13 | 14 | func TestEncoder(t *testing.T) { 15 | type data struct { 16 | Foo string 17 | Bar string 18 | } 19 | 20 | in := data{ 21 | Foo: "foo", 22 | Bar: "bar", 23 | } 24 | 25 | encoded := hyrumtoken.Marshal(&testkey, in) 26 | 27 | var out data 28 | err := hyrumtoken.Unmarshal(&testkey, encoded, &out) 29 | if err != nil { 30 | t.Fatalf("unexpected err: %v", err) 31 | } 32 | 33 | if !reflect.DeepEqual(in, out) { 34 | t.Fatalf("round-trip failure") 35 | } 36 | } 37 | 38 | func TestEncoder_Unmarshal_empty(t *testing.T) { 39 | data := 123 40 | if err := hyrumtoken.Unmarshal(&testkey, "", &data); err != nil { 41 | t.Fatalf("unexpected error: %s", err) 42 | } 43 | 44 | if data != 123 { 45 | t.Fatalf("data unexpectedly modified: %d", data) 46 | } 47 | } 48 | 49 | func TestEncoder_Marshal(t *testing.T) { 50 | // test known produced values using fixed, zero rand and secret 51 | r := rand.Reader 52 | rand.Reader = zeroReader{} 53 | defer func() { 54 | rand.Reader = r 55 | }() 56 | 57 | token := hyrumtoken.Marshal(&testkey, 123) 58 | 59 | if token != "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAULRUMRVA4GIqe5Y8N_z8B4J7hw==" { 60 | t.Fatalf("encoding regression, got: %q", token) 61 | } 62 | } 63 | 64 | func TestEncoder_Unmarshal(t *testing.T) { 65 | // inverse of TestEncoder_Marshal 66 | r := rand.Reader 67 | rand.Reader = zeroReader{} 68 | defer func() { 69 | rand.Reader = r 70 | }() 71 | 72 | var data int 73 | if err := hyrumtoken.Unmarshal(&testkey, "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAULRUMRVA4GIqe5Y8N_z8B4J7hw==", &data); err != nil { 74 | t.Fatalf("unmarshal error: %v", err) 75 | } 76 | 77 | if data != 123 { 78 | t.Fatalf("unmarshal regression, got: %d", data) 79 | } 80 | } 81 | 82 | type zeroReader struct{} 83 | 84 | func (z zeroReader) Read(p []byte) (n int, err error) { 85 | for i := 0; i < len(p); i++ { 86 | p[i] = 0 87 | } 88 | return len(p), nil 89 | } 90 | -------------------------------------------------------------------------------- /screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssoready/hyrumtoken/b642a354467e3f8d2286bc92075717e8663646c7/screenshot.png --------------------------------------------------------------------------------