├── .github ├── ISSUE_TEMPLATE │ ├── config.yml │ └── open_an_issue.md ├── config.yml └── workflows │ ├── generated-pr.yml │ ├── go-check.yml │ ├── go-test.yml │ ├── release-check.yml │ ├── releaser.yml │ ├── stale.yml │ └── tagpush.yml ├── LICENSE ├── README.md ├── batch.go ├── batch_test.go ├── codecov.yml ├── coding.go ├── coding_test.go ├── daghelpers.go ├── daghelpers_test.go ├── format.go ├── format_test.go ├── go.mod ├── go.sum ├── merkledag.go ├── navipld.go ├── promise.go ├── version.json └── walker.go /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: Getting Help on IPFS 4 | url: https://ipfs.io/help 5 | about: All information about how and where to get help on IPFS. 6 | - name: IPFS Official Forum 7 | url: https://discuss.ipfs.io 8 | about: Please post general questions, support requests, and discussions here. 9 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/open_an_issue.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Open an issue 3 | about: Only for actionable issues relevant to this repository. 4 | title: '' 5 | labels: need/triage 6 | assignees: '' 7 | 8 | --- 9 | 20 | -------------------------------------------------------------------------------- /.github/config.yml: -------------------------------------------------------------------------------- 1 | # Configuration for welcome - https://github.com/behaviorbot/welcome 2 | 3 | # Configuration for new-issue-welcome - https://github.com/behaviorbot/new-issue-welcome 4 | # Comment to be posted to on first time issues 5 | newIssueWelcomeComment: > 6 | Thank you for submitting your first issue to this repository! A maintainer 7 | will be here shortly to triage and review. 8 | 9 | In the meantime, please double-check that you have provided all the 10 | necessary information to make this process easy! Any information that can 11 | help save additional round trips is useful! We currently aim to give 12 | initial feedback within **two business days**. If this does not happen, feel 13 | free to leave a comment. 14 | 15 | Please keep an eye on how this issue will be labeled, as labels give an 16 | overview of priorities, assignments and additional actions requested by the 17 | maintainers: 18 | 19 | - "Priority" labels will show how urgent this is for the team. 20 | - "Status" labels will show if this is ready to be worked on, blocked, or in progress. 21 | - "Need" labels will indicate if additional input or analysis is required. 22 | 23 | Finally, remember to use https://discuss.ipfs.io if you just need general 24 | support. 25 | 26 | # Configuration for new-pr-welcome - https://github.com/behaviorbot/new-pr-welcome 27 | # Comment to be posted to on PRs from first time contributors in your repository 28 | newPRWelcomeComment: > 29 | Thank you for submitting this PR! 30 | 31 | A maintainer will be here shortly to review it. 32 | 33 | We are super grateful, but we are also overloaded! Help us by making sure 34 | that: 35 | 36 | * The context for this PR is clear, with relevant discussion, decisions 37 | and stakeholders linked/mentioned. 38 | 39 | * Your contribution itself is clear (code comments, self-review for the 40 | rest) and in its best form. Follow the [code contribution 41 | guidelines](https://github.com/ipfs/community/blob/master/CONTRIBUTING.md#code-contribution-guidelines) 42 | if they apply. 43 | 44 | Getting other community members to do a review would be great help too on 45 | complex PRs (you can ask in the chats/forums). If you are unsure about 46 | something, just leave us a comment. 47 | 48 | Next steps: 49 | 50 | * A maintainer will triage and assign priority to this PR, commenting on 51 | any missing things and potentially assigning a reviewer for high 52 | priority items. 53 | 54 | * The PR gets reviews, discussed and approvals as needed. 55 | 56 | * The PR is merged by maintainers when it has been approved and comments addressed. 57 | 58 | We currently aim to provide initial feedback/triaging within **two business 59 | days**. Please keep an eye on any labelling actions, as these will indicate 60 | priorities and status of your contribution. 61 | 62 | We are very grateful for your contribution! 63 | 64 | 65 | # Configuration for first-pr-merge - https://github.com/behaviorbot/first-pr-merge 66 | # Comment to be posted to on pull requests merged by a first time user 67 | # Currently disabled 68 | #firstPRMergeComment: "" 69 | -------------------------------------------------------------------------------- /.github/workflows/generated-pr.yml: -------------------------------------------------------------------------------- 1 | name: Close Generated PRs 2 | 3 | on: 4 | schedule: 5 | - cron: '0 0 * * *' 6 | workflow_dispatch: 7 | 8 | permissions: 9 | issues: write 10 | pull-requests: write 11 | 12 | jobs: 13 | stale: 14 | uses: ipdxco/unified-github-workflows/.github/workflows/reusable-generated-pr.yml@v1 15 | -------------------------------------------------------------------------------- /.github/workflows/go-check.yml: -------------------------------------------------------------------------------- 1 | name: Go Checks 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: ["master"] 7 | workflow_dispatch: 8 | 9 | permissions: 10 | contents: read 11 | 12 | concurrency: 13 | group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.event_name == 'push' && github.sha || github.ref }} 14 | cancel-in-progress: true 15 | 16 | jobs: 17 | go-check: 18 | uses: ipdxco/unified-github-workflows/.github/workflows/go-check.yml@v1.0 19 | -------------------------------------------------------------------------------- /.github/workflows/go-test.yml: -------------------------------------------------------------------------------- 1 | name: Go Test 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: ["master"] 7 | workflow_dispatch: 8 | 9 | permissions: 10 | contents: read 11 | 12 | concurrency: 13 | group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.event_name == 'push' && github.sha || github.ref }} 14 | cancel-in-progress: true 15 | 16 | jobs: 17 | go-test: 18 | uses: ipdxco/unified-github-workflows/.github/workflows/go-test.yml@v1.0 19 | secrets: 20 | CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} 21 | -------------------------------------------------------------------------------- /.github/workflows/release-check.yml: -------------------------------------------------------------------------------- 1 | name: Release Checker 2 | 3 | on: 4 | pull_request_target: 5 | paths: [ 'version.json' ] 6 | types: [ opened, synchronize, reopened, labeled, unlabeled ] 7 | workflow_dispatch: 8 | 9 | permissions: 10 | contents: write 11 | pull-requests: write 12 | 13 | concurrency: 14 | group: ${{ github.workflow }}-${{ github.ref }} 15 | cancel-in-progress: true 16 | 17 | jobs: 18 | release-check: 19 | uses: ipdxco/unified-github-workflows/.github/workflows/release-check.yml@v1.0 20 | -------------------------------------------------------------------------------- /.github/workflows/releaser.yml: -------------------------------------------------------------------------------- 1 | name: Releaser 2 | 3 | on: 4 | push: 5 | paths: [ 'version.json' ] 6 | workflow_dispatch: 7 | 8 | permissions: 9 | contents: write 10 | 11 | concurrency: 12 | group: ${{ github.workflow }}-${{ github.sha }} 13 | cancel-in-progress: true 14 | 15 | jobs: 16 | releaser: 17 | uses: ipdxco/unified-github-workflows/.github/workflows/releaser.yml@v1.0 18 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | name: Close Stale Issues 2 | 3 | on: 4 | schedule: 5 | - cron: '0 0 * * *' 6 | workflow_dispatch: 7 | 8 | permissions: 9 | issues: write 10 | pull-requests: write 11 | 12 | jobs: 13 | stale: 14 | uses: ipdxco/unified-github-workflows/.github/workflows/reusable-stale-issue.yml@v1 15 | -------------------------------------------------------------------------------- /.github/workflows/tagpush.yml: -------------------------------------------------------------------------------- 1 | name: Tag Push Checker 2 | 3 | on: 4 | push: 5 | tags: 6 | - v* 7 | 8 | permissions: 9 | contents: read 10 | issues: write 11 | 12 | concurrency: 13 | group: ${{ github.workflow }}-${{ github.ref }} 14 | cancel-in-progress: true 15 | 16 | jobs: 17 | releaser: 18 | uses: ipdxco/unified-github-workflows/.github/workflows/tagpush.yml@v1.0 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Jeromy Johnson 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | go-ipld-format 2 | ================== 3 | 4 | [![](https://img.shields.io/badge/made%20by-Protocol%20Labs-blue.svg?style=flat-square)](http://ipn.io) 5 | [![](https://img.shields.io/badge/project-IPFS-blue.svg?style=flat-square)](http://ipfs.io/) 6 | [![](https://img.shields.io/badge/freenode-%23ipfs-blue.svg?style=flat-square)](http://webchat.freenode.net/?channels=%23ipfs) 7 | [![Coverage Status](https://codecov.io/gh/ipfs/go-ipld-format/branch/master/graph/badge.svg)](https://codecov.io/gh/ipfs/go-ipld-format/branch/master) 8 | [![Travis CI](https://travis-ci.org/ipfs/go-ipld-format.svg?branch=master)](https://travis-ci.org/ipfs/go-ipld-format) 9 | 10 | > go-ipld-format is a set of interfaces that a type needs to implement in order to be a part of the ipld merkle-forest. 11 | 12 | ## Lead Maintainer 13 | 14 | [Eric Myhre](https://github.com/warpfork) 15 | 16 | ## Table of Contents 17 | 18 | - [Install](#install) 19 | - [Usage](#usage) 20 | - [API](#api) 21 | - [Contribute](#contribute) 22 | - [License](#license) 23 | 24 | ## Install 25 | 26 | ```sh 27 | make install 28 | ``` 29 | 30 | ## Contribute 31 | 32 | PRs are welcome! 33 | 34 | Small note: If editing the Readme, please conform to the [standard-readme](https://github.com/RichardLitt/standard-readme) specification. 35 | 36 | ## License 37 | 38 | MIT © Jeromy Johnson 39 | -------------------------------------------------------------------------------- /batch.go: -------------------------------------------------------------------------------- 1 | package format 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "runtime" 7 | 8 | cid "github.com/ipfs/go-cid" 9 | ) 10 | 11 | // parallelBatchCommits is the number of batch commits that can be in-flight before blocking. 12 | // TODO(ipfs/go-ipfs#4299): Experiment with multiple datastores, storage 13 | // devices, and CPUs to find the right value/formula. 14 | var parallelCommits = runtime.NumCPU() 15 | 16 | // ErrNotCommited is returned when closing a batch that hasn't been successfully 17 | // committed. 18 | var ErrNotCommited = errors.New("error: batch not commited") 19 | 20 | // ErrClosed is returned when operating on a batch that has already been closed. 21 | var ErrClosed = errors.New("error: batch closed") 22 | 23 | // NewBatch returns a node buffer (Batch) that buffers nodes internally and 24 | // commits them to the underlying DAGService in batches. Use this if you intend 25 | // to add or remove a lot of nodes all at once. 26 | // 27 | // If the passed context is canceled, any in-progress commits are aborted. 28 | func NewBatch(ctx context.Context, na NodeAdder, opts ...BatchOption) *Batch { 29 | ctx, cancel := context.WithCancel(ctx) 30 | bopts := defaultBatchOptions 31 | for _, o := range opts { 32 | o(&bopts) 33 | } 34 | 35 | // Commit numCPU batches at once, but split the maximum buffer size over all commits in flight. 36 | bopts.maxSize /= parallelCommits 37 | bopts.maxNodes /= parallelCommits 38 | return &Batch{ 39 | na: na, 40 | ctx: ctx, 41 | cancel: cancel, 42 | commitResults: make(chan error, parallelCommits), 43 | opts: bopts, 44 | } 45 | } 46 | 47 | // Batch is a buffer for batching adds to a dag. 48 | type Batch struct { 49 | na NodeAdder 50 | 51 | ctx context.Context 52 | cancel func() 53 | 54 | activeCommits int 55 | err error 56 | commitResults chan error 57 | 58 | nodes []Node 59 | size int 60 | 61 | opts batchOptions 62 | } 63 | 64 | func (t *Batch) processResults() { 65 | for t.activeCommits > 0 { 66 | select { 67 | case err := <-t.commitResults: 68 | t.activeCommits-- 69 | if err != nil { 70 | t.setError(err) 71 | return 72 | } 73 | default: 74 | return 75 | } 76 | } 77 | } 78 | 79 | func (t *Batch) asyncCommit() { 80 | numBlocks := len(t.nodes) 81 | if numBlocks == 0 { 82 | return 83 | } 84 | if t.activeCommits >= parallelCommits { 85 | select { 86 | case err := <-t.commitResults: 87 | t.activeCommits-- 88 | 89 | if err != nil { 90 | t.setError(err) 91 | return 92 | } 93 | case <-t.ctx.Done(): 94 | t.setError(t.ctx.Err()) 95 | return 96 | } 97 | } 98 | go func(ctx context.Context, b []Node, result chan error, na NodeAdder) { 99 | select { 100 | case result <- na.AddMany(ctx, b): 101 | case <-ctx.Done(): 102 | } 103 | }(t.ctx, t.nodes, t.commitResults, t.na) 104 | 105 | t.activeCommits++ 106 | t.nodes = make([]Node, 0, numBlocks) 107 | t.size = 0 108 | } 109 | 110 | // Add adds a node to the batch and commits the batch if necessary. 111 | func (t *Batch) Add(ctx context.Context, nd Node) error { 112 | return t.AddMany(ctx, []Node{nd}) 113 | } 114 | 115 | // AddMany many calls Add for every given Node, thus batching and 116 | // commiting them as needed. 117 | func (t *Batch) AddMany(ctx context.Context, nodes []Node) error { 118 | if t.err != nil { 119 | return t.err 120 | } 121 | // Not strictly necessary but allows us to catch errors early. 122 | t.processResults() 123 | 124 | if t.err != nil { 125 | return t.err 126 | } 127 | 128 | t.nodes = append(t.nodes, nodes...) 129 | for _, nd := range nodes { 130 | t.size += len(nd.RawData()) 131 | } 132 | 133 | if t.size > t.opts.maxSize || len(t.nodes) > t.opts.maxNodes { 134 | t.asyncCommit() 135 | } 136 | return t.err 137 | } 138 | 139 | // Commit commits batched nodes. 140 | func (t *Batch) Commit() error { 141 | if t.err != nil { 142 | return t.err 143 | } 144 | 145 | t.asyncCommit() 146 | 147 | loop: 148 | for t.activeCommits > 0 { 149 | select { 150 | case err := <-t.commitResults: 151 | t.activeCommits-- 152 | if err != nil { 153 | t.setError(err) 154 | break loop 155 | } 156 | case <-t.ctx.Done(): 157 | t.setError(t.ctx.Err()) 158 | break loop 159 | } 160 | } 161 | 162 | return t.err 163 | } 164 | 165 | func (t *Batch) setError(err error) { 166 | t.err = err 167 | 168 | t.cancel() 169 | 170 | // Drain as much as we can without blocking. 171 | loop: 172 | for { 173 | select { 174 | case <-t.commitResults: 175 | default: 176 | break loop 177 | } 178 | } 179 | 180 | // Be nice and cleanup. These can take a *lot* of memory. 181 | t.commitResults = nil 182 | t.na = nil 183 | t.ctx = nil 184 | t.nodes = nil 185 | t.size = 0 186 | t.activeCommits = 0 187 | } 188 | 189 | // BatchOption provides a way of setting internal options of 190 | // a Batch. 191 | // 192 | // See this post about the "functional options" pattern: 193 | // http://dave.cheney.net/2014/10/17/functional-options-for-friendly-apis 194 | type BatchOption func(o *batchOptions) 195 | 196 | type batchOptions struct { 197 | maxSize int 198 | maxNodes int 199 | } 200 | 201 | var defaultBatchOptions = batchOptions{ 202 | maxSize: 8 << 20, 203 | 204 | // By default, only batch up to 128 nodes at a time. 205 | // The current implementation of flatfs opens this many file 206 | // descriptors at the same time for the optimized batch write. 207 | maxNodes: 128, 208 | } 209 | 210 | // MaxSizeBatchOption sets the maximum amount of buffered data before writing 211 | // blocks. 212 | func MaxSizeBatchOption(size int) BatchOption { 213 | return func(o *batchOptions) { 214 | o.maxSize = size 215 | } 216 | } 217 | 218 | // MaxNodesBatchOption sets the maximum number of buffered nodes before writing 219 | // blocks. 220 | func MaxNodesBatchOption(num int) BatchOption { 221 | return func(o *batchOptions) { 222 | o.maxNodes = num 223 | } 224 | } 225 | 226 | // BufferedDAG implements DAGService using a Batch NodeAdder to wrap add 227 | // operations in the given DAGService. It will trigger Commit() before any 228 | // non-Add operations, but otherwise calling Commit() is left to the user. 229 | type BufferedDAG struct { 230 | ds DAGService 231 | b *Batch 232 | } 233 | 234 | // NewBufferedDAG creates a BufferedDAG using the given DAGService and the 235 | // given options for the Batch NodeAdder. 236 | func NewBufferedDAG(ctx context.Context, ds DAGService, opts ...BatchOption) *BufferedDAG { 237 | return &BufferedDAG{ 238 | ds: ds, 239 | b: NewBatch(ctx, ds, opts...), 240 | } 241 | } 242 | 243 | // Commit calls commit on the Batch. 244 | func (bd *BufferedDAG) Commit() error { 245 | return bd.b.Commit() 246 | } 247 | 248 | // Add adds a new node using Batch. 249 | func (bd *BufferedDAG) Add(ctx context.Context, n Node) error { 250 | return bd.b.Add(ctx, n) 251 | } 252 | 253 | // AddMany adds many nodes using Batch. 254 | func (bd *BufferedDAG) AddMany(ctx context.Context, nds []Node) error { 255 | return bd.b.AddMany(ctx, nds) 256 | } 257 | 258 | // Get commits and gets a node from the DAGService. 259 | func (bd *BufferedDAG) Get(ctx context.Context, c cid.Cid) (Node, error) { 260 | err := bd.b.Commit() 261 | if err != nil { 262 | return nil, err 263 | } 264 | return bd.ds.Get(ctx, c) 265 | } 266 | 267 | // GetMany commits and gets nodes from the DAGService. 268 | func (bd *BufferedDAG) GetMany(ctx context.Context, cs []cid.Cid) <-chan *NodeOption { 269 | err := bd.b.Commit() 270 | if err != nil { 271 | ch := make(chan *NodeOption, 1) 272 | defer close(ch) 273 | ch <- &NodeOption{ 274 | Node: nil, 275 | Err: err, 276 | } 277 | return ch 278 | } 279 | return bd.ds.GetMany(ctx, cs) 280 | } 281 | 282 | // Remove commits and removes a node from the DAGService. 283 | func (bd *BufferedDAG) Remove(ctx context.Context, c cid.Cid) error { 284 | err := bd.b.Commit() 285 | if err != nil { 286 | return err 287 | } 288 | return bd.ds.Remove(ctx, c) 289 | } 290 | 291 | // RemoveMany commits and removes nodes from the DAGService. 292 | func (bd *BufferedDAG) RemoveMany(ctx context.Context, cs []cid.Cid) error { 293 | err := bd.b.Commit() 294 | if err != nil { 295 | return err 296 | } 297 | return bd.ds.RemoveMany(ctx, cs) 298 | } 299 | -------------------------------------------------------------------------------- /batch_test.go: -------------------------------------------------------------------------------- 1 | package format 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "sync" 8 | "testing" 9 | 10 | cid "github.com/ipfs/go-cid" 11 | ) 12 | 13 | // Test dag 14 | type testDag struct { 15 | mu sync.Mutex 16 | nodes map[string]Node 17 | } 18 | 19 | func newTestDag() *testDag { 20 | return &testDag{nodes: make(map[string]Node)} 21 | } 22 | 23 | func (d *testDag) Get(ctx context.Context, cid cid.Cid) (Node, error) { 24 | d.mu.Lock() 25 | defer d.mu.Unlock() 26 | if n, ok := d.nodes[cid.KeyString()]; ok { 27 | return n, nil 28 | } 29 | return nil, ErrNotFound{cid} 30 | } 31 | 32 | func (d *testDag) GetMany(ctx context.Context, cids []cid.Cid) <-chan *NodeOption { 33 | d.mu.Lock() 34 | defer d.mu.Unlock() 35 | out := make(chan *NodeOption, len(cids)) 36 | for _, c := range cids { 37 | if n, ok := d.nodes[c.KeyString()]; ok { 38 | out <- &NodeOption{Node: n} 39 | } else { 40 | out <- &NodeOption{Err: ErrNotFound{c}} 41 | } 42 | } 43 | close(out) 44 | return out 45 | } 46 | 47 | func (d *testDag) Add(ctx context.Context, node Node) error { 48 | d.mu.Lock() 49 | defer d.mu.Unlock() 50 | d.nodes[node.Cid().KeyString()] = node 51 | return nil 52 | } 53 | 54 | func (d *testDag) AddMany(ctx context.Context, nodes []Node) error { 55 | d.mu.Lock() 56 | defer d.mu.Unlock() 57 | for _, n := range nodes { 58 | d.nodes[n.Cid().KeyString()] = n 59 | } 60 | return nil 61 | } 62 | 63 | func (d *testDag) Remove(ctx context.Context, c cid.Cid) error { 64 | d.mu.Lock() 65 | defer d.mu.Unlock() 66 | delete(d.nodes, c.KeyString()) 67 | return nil 68 | } 69 | 70 | func (d *testDag) RemoveMany(ctx context.Context, cids []cid.Cid) error { 71 | d.mu.Lock() 72 | defer d.mu.Unlock() 73 | for _, c := range cids { 74 | delete(d.nodes, c.KeyString()) 75 | } 76 | return nil 77 | } 78 | 79 | var _ DAGService = new(testDag) 80 | 81 | func TestBatch(t *testing.T) { 82 | ctx, cancel := context.WithCancel(context.Background()) 83 | defer cancel() 84 | 85 | d := newTestDag() 86 | b := NewBatch(ctx, d) 87 | for i := 0; i < 1000; i++ { 88 | // It would be great if we could use *many* different nodes here 89 | // but we can't add any dependencies and I don't feel like adding 90 | // any more testing code. 91 | if err := b.Add(ctx, new(EmptyNode)); err != nil { 92 | t.Fatal(err) 93 | } 94 | } 95 | if err := b.Commit(); err != nil { 96 | t.Fatal(err) 97 | } 98 | 99 | n, err := d.Get(ctx, new(EmptyNode).Cid()) 100 | if err != nil { 101 | t.Fatal(err) 102 | } 103 | switch n.(type) { 104 | case *EmptyNode: 105 | default: 106 | t.Fatal("expected the node to exist in the dag") 107 | } 108 | 109 | if len(d.nodes) != 1 { 110 | t.Fatal("should have one node") 111 | } 112 | } 113 | 114 | func TestBufferedDAG(t *testing.T) { 115 | ds := newTestDag() 116 | ctx, cancel := context.WithCancel(context.Background()) 117 | defer cancel() 118 | var bdag DAGService = NewBufferedDAG(ctx, ds) 119 | 120 | for i := 0; i < 1000; i++ { 121 | n := new(EmptyNode) 122 | if err := bdag.Add(ctx, n); err != nil { 123 | t.Fatal(err) 124 | } 125 | if _, err := bdag.Get(ctx, n.Cid()); err != nil { 126 | t.Fatal(err) 127 | } 128 | if err := bdag.Remove(ctx, n.Cid()); err != nil { 129 | t.Fatal(err) 130 | } 131 | } 132 | } 133 | 134 | func TestBatchOptions(t *testing.T) { 135 | ctx, cancel := context.WithCancel(context.Background()) 136 | defer cancel() 137 | 138 | wantMaxSize := 8 << 10 139 | wantMaxNodes := 500 140 | d := newTestDag() 141 | b := NewBatch(ctx, d, MaxSizeBatchOption(wantMaxSize), MaxNodesBatchOption(wantMaxNodes)) 142 | if b.opts.maxSize != wantMaxSize/parallelCommits { 143 | t.Fatalf("maxSize incorrect, want: %d, got: %d", wantMaxSize, b.opts.maxSize) 144 | } 145 | if b.opts.maxNodes != wantMaxNodes/parallelCommits { 146 | t.Fatalf("maxNodes incorrect, want: %d, got: %d", wantMaxNodes, b.opts.maxNodes) 147 | } 148 | } 149 | 150 | func TestErrorTypes(t *testing.T) { 151 | d := newTestDag() 152 | notFoundNode := &EmptyNode{} 153 | _, err := d.Get(context.Background(), notFoundNode.Cid()) 154 | if err == nil { 155 | t.Fatal("should throw NotFound error") 156 | } 157 | 158 | err2 := fmt.Errorf("could not read: %w", err) 159 | 160 | if !errors.Is(err, ErrNotFound{}) { 161 | t.Fatal("should be an ErrNotFound") 162 | } 163 | 164 | if !errors.Is(err2, ErrNotFound{}) { 165 | t.Fatal("should be an ErrNotFound") 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | range: "50...100" 3 | comment: off 4 | -------------------------------------------------------------------------------- /coding.go: -------------------------------------------------------------------------------- 1 | package format 2 | 3 | import ( 4 | "fmt" 5 | 6 | blocks "github.com/ipfs/go-block-format" 7 | ) 8 | 9 | // DecodeBlockFunc functions decode blocks into nodes. 10 | type DecodeBlockFunc func(block blocks.Block) (Node, error) 11 | 12 | // Registry is a structure for storing mappings of multicodec IPLD codec numbers to DecodeBlockFunc functions. 13 | // 14 | // Registry includes no mutexing. If using Registry in a concurrent context, you must handle synchronization yourself. 15 | // (Typically, it is recommended to do initialization earlier in a program, before fanning out goroutines; 16 | // this avoids the need for mutexing overhead.) 17 | // 18 | // Multicodec indicator numbers are specified in 19 | // https://github.com/multiformats/multicodec/blob/master/table.csv . 20 | // You should not use indicator numbers which are not specified in that table 21 | // (however, there is nothing in this implementation that will attempt to stop you, either). 22 | type Registry struct { 23 | decoders map[uint64]DecodeBlockFunc 24 | } 25 | 26 | func (r *Registry) ensureInit() { 27 | if r.decoders != nil { 28 | return 29 | } 30 | r.decoders = make(map[uint64]DecodeBlockFunc) 31 | } 32 | 33 | // Register registers decoder for all blocks with the passed codec. 34 | // 35 | // This will silently replace any existing registered block decoders. 36 | func (r *Registry) Register(codec uint64, decoder DecodeBlockFunc) { 37 | r.ensureInit() 38 | if decoder == nil { 39 | panic("not sensible to attempt to register a nil function") 40 | } 41 | r.decoders[codec] = decoder 42 | } 43 | 44 | func (r *Registry) Decode(block blocks.Block) (Node, error) { 45 | // Short-circuit by cast if we already have a Node. 46 | if node, ok := block.(Node); ok { 47 | return node, nil 48 | } 49 | 50 | ty := block.Cid().Type() 51 | r.ensureInit() 52 | decoder, ok := r.decoders[ty] 53 | 54 | if ok { 55 | return decoder(block) 56 | } else { 57 | // TODO: get the *long* name for this format 58 | return nil, fmt.Errorf("unrecognized object type: %d", ty) 59 | } 60 | } 61 | 62 | // Decode decodes the given block using passed DecodeBlockFunc. 63 | // Note: this is just a helper function, consider using the DecodeBlockFunc itself rather than this helper 64 | func Decode(block blocks.Block, decoder DecodeBlockFunc) (Node, error) { 65 | // Short-circuit by cast if we already have a Node. 66 | if node, ok := block.(Node); ok { 67 | return node, nil 68 | } 69 | 70 | return decoder(block) 71 | } 72 | -------------------------------------------------------------------------------- /coding_test.go: -------------------------------------------------------------------------------- 1 | package format 2 | 3 | import ( 4 | "errors" 5 | "testing" 6 | 7 | blocks "github.com/ipfs/go-block-format" 8 | cid "github.com/ipfs/go-cid" 9 | mh "github.com/multiformats/go-multihash" 10 | ) 11 | 12 | func TestDecode(t *testing.T) { 13 | decoder := func(b blocks.Block) (Node, error) { 14 | node := &EmptyNode{} 15 | if b.RawData() != nil || !b.Cid().Equals(node.Cid()) { 16 | return nil, errors.New("can only decode empty blocks") 17 | } 18 | return node, nil 19 | } 20 | 21 | id, err := cid.Prefix{ 22 | Version: 1, 23 | Codec: cid.Raw, 24 | MhType: mh.IDENTITY, 25 | MhLength: 0, 26 | }.Sum(nil) 27 | 28 | if err != nil { 29 | t.Fatalf("failed to create cid: %s", err) 30 | } 31 | 32 | block, err := blocks.NewBlockWithCid(nil, id) 33 | if err != nil { 34 | t.Fatalf("failed to create empty block: %s", err) 35 | } 36 | node, err := Decode(block, decoder) 37 | if err != nil { 38 | t.Fatalf("failed to decode empty node: %s", err) 39 | } 40 | if !node.Cid().Equals(id) { 41 | t.Fatalf("empty node doesn't have the right cid") 42 | } 43 | 44 | if _, ok := node.(*EmptyNode); !ok { 45 | t.Fatalf("empty node doesn't have the right type") 46 | } 47 | 48 | } 49 | 50 | func TestRegistryDecode(t *testing.T) { 51 | decoder := func(b blocks.Block) (Node, error) { 52 | node := &EmptyNode{} 53 | if b.RawData() != nil || !b.Cid().Equals(node.Cid()) { 54 | return nil, errors.New("can only decode empty blocks") 55 | } 56 | return node, nil 57 | } 58 | 59 | id, err := cid.Prefix{ 60 | Version: 1, 61 | Codec: cid.Raw, 62 | MhType: mh.IDENTITY, 63 | MhLength: 0, 64 | }.Sum(nil) 65 | 66 | if err != nil { 67 | t.Fatalf("failed to create cid: %s", err) 68 | } 69 | 70 | block, err := blocks.NewBlockWithCid(nil, id) 71 | if err != nil { 72 | t.Fatalf("failed to create empty block: %s", err) 73 | } 74 | 75 | reg := Registry{} 76 | _, err = reg.Decode(block) 77 | if err == nil || err.Error() != "unrecognized object type: 85" { 78 | t.Fatalf("expected error, got %v", err) 79 | } 80 | reg.Register(cid.Raw, decoder) 81 | node, err := reg.Decode(block) 82 | if err != nil { 83 | t.Fatalf("failed to decode empty node: %s", err) 84 | } 85 | 86 | if !node.Cid().Equals(id) { 87 | t.Fatalf("empty node doesn't have the right cid") 88 | } 89 | 90 | if _, ok := node.(*EmptyNode); !ok { 91 | t.Fatalf("empty node doesn't have the right type") 92 | } 93 | 94 | } 95 | -------------------------------------------------------------------------------- /daghelpers.go: -------------------------------------------------------------------------------- 1 | package format 2 | 3 | import ( 4 | "context" 5 | 6 | cid "github.com/ipfs/go-cid" 7 | ) 8 | 9 | // GetLinks returns the CIDs of the children of the given node. Prefer this 10 | // method over looking up the node itself and calling `Links()` on it as this 11 | // method may be able to use a link cache. 12 | func GetLinks(ctx context.Context, ng NodeGetter, c cid.Cid) ([]*Link, error) { 13 | if c.Type() == cid.Raw { 14 | return nil, nil 15 | } 16 | if gl, ok := ng.(LinkGetter); ok { 17 | return gl.GetLinks(ctx, c) 18 | } 19 | node, err := ng.Get(ctx, c) 20 | if err != nil { 21 | return nil, err 22 | } 23 | return node.Links(), nil 24 | } 25 | 26 | // GetDAG will fill out all of the links of the given Node. 27 | // It returns an array of NodePromise with the linked nodes all in the proper 28 | // order. 29 | func GetDAG(ctx context.Context, ds NodeGetter, root Node) []*NodePromise { 30 | var cids []cid.Cid 31 | for _, lnk := range root.Links() { 32 | cids = append(cids, lnk.Cid) 33 | } 34 | 35 | return GetNodes(ctx, ds, cids) 36 | } 37 | 38 | // GetNodes returns an array of 'FutureNode' promises, with each corresponding 39 | // to the key with the same index as the passed in keys 40 | func GetNodes(ctx context.Context, ds NodeGetter, keys []cid.Cid) []*NodePromise { 41 | 42 | // Early out if no work to do 43 | if len(keys) == 0 { 44 | return nil 45 | } 46 | 47 | promises := make([]*NodePromise, len(keys)) 48 | for i := range keys { 49 | promises[i] = NewNodePromise(ctx) 50 | } 51 | 52 | dedupedKeys := dedupeKeys(keys) 53 | go func() { 54 | ctx, cancel := context.WithCancel(ctx) 55 | defer cancel() 56 | 57 | nodechan := ds.GetMany(ctx, dedupedKeys) 58 | 59 | for count := 0; count < len(keys); { 60 | select { 61 | case opt, ok := <-nodechan: 62 | if !ok { 63 | for _, p := range promises { 64 | p.Fail(ErrNotFound{}) 65 | } 66 | return 67 | } 68 | 69 | if opt.Err != nil { 70 | for _, p := range promises { 71 | p.Fail(opt.Err) 72 | } 73 | return 74 | } 75 | 76 | nd := opt.Node 77 | c := nd.Cid() 78 | for i, lnk_c := range keys { 79 | if c.Equals(lnk_c) { 80 | count++ 81 | promises[i].Send(nd) 82 | } 83 | } 84 | case <-ctx.Done(): 85 | return 86 | } 87 | } 88 | }() 89 | return promises 90 | } 91 | 92 | func Copy(ctx context.Context, from, to DAGService, root cid.Cid) error { 93 | node, err := from.Get(ctx, root) 94 | if err != nil { 95 | return err 96 | } 97 | links := node.Links() 98 | for _, link := range links { 99 | err := Copy(ctx, from, to, link.Cid) 100 | if err != nil { 101 | return err 102 | } 103 | } 104 | err = to.Add(ctx, node) 105 | if err != nil { 106 | return err 107 | } 108 | return nil 109 | } 110 | 111 | // Remove duplicates from a list of keys 112 | func dedupeKeys(cids []cid.Cid) []cid.Cid { 113 | set := cid.NewSet() 114 | for _, c := range cids { 115 | set.Add(c) 116 | } 117 | return set.Keys() 118 | } 119 | -------------------------------------------------------------------------------- /daghelpers_test.go: -------------------------------------------------------------------------------- 1 | package format 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/ipfs/go-cid" 8 | mh "github.com/multiformats/go-multihash" 9 | ) 10 | 11 | type TestNode struct { 12 | links []*Link 13 | data []byte 14 | builder cid.Builder 15 | } 16 | 17 | var v0CidPrefix = cid.Prefix{ 18 | Codec: cid.DagProtobuf, 19 | MhLength: -1, 20 | MhType: mh.SHA2_256, 21 | Version: 0, 22 | } 23 | 24 | func InitNode(d []byte) *TestNode { 25 | return &TestNode{ 26 | data: d, 27 | builder: v0CidPrefix, 28 | } 29 | } 30 | 31 | func (n *TestNode) Resolve([]string) (interface{}, []string, error) { 32 | return nil, nil, ErrEmptyNode 33 | } 34 | 35 | func (n *TestNode) Tree(string, int) []string { 36 | return nil 37 | } 38 | 39 | func (n *TestNode) ResolveLink([]string) (*Link, []string, error) { 40 | return nil, nil, ErrEmptyNode 41 | } 42 | 43 | func (n *TestNode) Copy() Node { 44 | return &EmptyNode{} 45 | } 46 | 47 | func (n *TestNode) Cid() cid.Cid { 48 | c, err := n.builder.Sum(n.RawData()) 49 | if err != nil { 50 | return cid.Cid{} 51 | } 52 | return c 53 | } 54 | 55 | func (n *TestNode) Links() []*Link { 56 | return n.links 57 | } 58 | 59 | func (n *TestNode) Loggable() map[string]interface{} { 60 | return nil 61 | } 62 | 63 | func (n *TestNode) String() string { 64 | return string(n.data) 65 | } 66 | 67 | func (n *TestNode) RawData() []byte { 68 | return n.data 69 | } 70 | 71 | func (n *TestNode) Size() (uint64, error) { 72 | return 0, nil 73 | } 74 | 75 | func (n *TestNode) Stat() (*NodeStat, error) { 76 | return &NodeStat{}, nil 77 | } 78 | 79 | // AddNodeLink adds a link to another node. 80 | func (n *TestNode) AddNodeLink(name string, that Node) error { 81 | 82 | lnk, err := MakeLink(that) 83 | if err != nil { 84 | return err 85 | } 86 | 87 | lnk.Name = name 88 | 89 | n.AddRawLink(name, lnk) 90 | 91 | return nil 92 | } 93 | 94 | func (n *TestNode) AddRawLink(name string, l *Link) error { 95 | 96 | n.links = append(n.links, &Link{ 97 | Name: name, 98 | Size: l.Size, 99 | Cid: l.Cid, 100 | }) 101 | return nil 102 | } 103 | 104 | func TestCopy(t *testing.T) { 105 | ctx, cancel := context.WithCancel(context.Background()) 106 | defer cancel() 107 | from := newTestDag() 108 | 109 | root := InitNode([]byte("level0")) 110 | l11 := InitNode([]byte("leve1_node1")) 111 | l12 := InitNode([]byte("leve1_node2")) 112 | l21 := InitNode([]byte("leve2_node1")) 113 | l22 := InitNode([]byte("leve2_node2")) 114 | l23 := InitNode([]byte("leve2_node3")) 115 | 116 | l11.AddNodeLink(l21.Cid().String(), l21) 117 | l11.AddNodeLink(l22.Cid().String(), l22) 118 | l11.AddNodeLink(l23.Cid().String(), l23) 119 | root.AddNodeLink(l11.Cid().String(), l11) 120 | root.AddNodeLink(l12.Cid().String(), l12) 121 | 122 | for _, n := range []Node{l23, l22, l21, l12, l11, root} { 123 | err := from.Add(ctx, n) 124 | if err != nil { 125 | t.Fatal(err) 126 | } 127 | } 128 | to := newTestDag() 129 | err := Copy(ctx, from, to, root.Cid()) 130 | if err != nil { 131 | t.Error(err) 132 | } 133 | 134 | r, err := to.Get(ctx, root.Cid()) 135 | if err != nil || len(r.Links()) != 2 { 136 | t.Error("fail to copy dag") 137 | } 138 | l1, err := to.Get(ctx, l11.Cid()) 139 | if err != nil || len(l1.Links()) != 3 { 140 | t.Error("fail to copy dag") 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /format.go: -------------------------------------------------------------------------------- 1 | package format 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | 7 | blocks "github.com/ipfs/go-block-format" 8 | 9 | cid "github.com/ipfs/go-cid" 10 | ) 11 | 12 | type Resolver interface { 13 | // Resolve resolves a path through this node, stopping at any link boundary 14 | // and returning the object found as well as the remaining path to traverse 15 | Resolve(path []string) (interface{}, []string, error) 16 | 17 | // Tree lists all paths within the object under 'path', and up to the given depth. 18 | // To list the entire object (similar to `find .`) pass "" and -1 19 | Tree(path string, depth int) []string 20 | } 21 | 22 | // Node is the base interface all IPLD nodes must implement. 23 | // 24 | // Nodes are **Immutable** and all methods defined on the interface are 25 | // **Thread Safe**. 26 | type Node interface { 27 | blocks.Block 28 | Resolver 29 | 30 | // ResolveLink is a helper function that calls resolve and asserts the 31 | // output is a link 32 | ResolveLink(path []string) (*Link, []string, error) 33 | 34 | // Copy returns a deep copy of this node 35 | Copy() Node 36 | 37 | // Links is a helper function that returns all links within this object 38 | Links() []*Link 39 | 40 | // TODO: not sure if stat deserves to stay 41 | Stat() (*NodeStat, error) 42 | 43 | // Size returns the size in bytes of the serialized object 44 | Size() (uint64, error) 45 | } 46 | 47 | // Link represents an IPFS Merkle DAG Link between Nodes. 48 | type Link struct { 49 | // utf string name. should be unique per object 50 | Name string // utf8 51 | 52 | // cumulative size of target object 53 | Size uint64 54 | 55 | // multihash of the target object 56 | Cid cid.Cid 57 | } 58 | 59 | // NodeStat is a statistics object for a Node. Mostly sizes. 60 | type NodeStat struct { 61 | Hash string 62 | NumLinks int // number of links in link table 63 | BlockSize int // size of the raw, encoded data 64 | LinksSize int // size of the links segment 65 | DataSize int // size of the data segment 66 | CumulativeSize int // cumulative size of object and its references 67 | } 68 | 69 | func (ns NodeStat) String() string { 70 | f := "NodeStat{NumLinks: %d, BlockSize: %d, LinksSize: %d, DataSize: %d, CumulativeSize: %d}" 71 | return fmt.Sprintf(f, ns.NumLinks, ns.BlockSize, ns.LinksSize, ns.DataSize, ns.CumulativeSize) 72 | } 73 | 74 | // MakeLink creates a link to the given node 75 | func MakeLink(n Node) (*Link, error) { 76 | s, err := n.Size() 77 | if err != nil { 78 | return nil, err 79 | } 80 | 81 | return &Link{ 82 | Size: s, 83 | Cid: n.Cid(), 84 | }, nil 85 | } 86 | 87 | // GetNode returns the MDAG Node that this link points to 88 | func (l *Link) GetNode(ctx context.Context, serv NodeGetter) (Node, error) { 89 | return serv.Get(ctx, l.Cid) 90 | } 91 | -------------------------------------------------------------------------------- /format_test.go: -------------------------------------------------------------------------------- 1 | package format 2 | 3 | import ( 4 | "errors" 5 | "testing" 6 | 7 | cid "github.com/ipfs/go-cid" 8 | mh "github.com/multiformats/go-multihash" 9 | ) 10 | 11 | type EmptyNode struct{} 12 | 13 | var ErrEmptyNode error = errors.New("dummy node") 14 | 15 | func (n *EmptyNode) Resolve([]string) (interface{}, []string, error) { 16 | return nil, nil, ErrEmptyNode 17 | } 18 | 19 | func (n *EmptyNode) Tree(string, int) []string { 20 | return nil 21 | } 22 | 23 | func (n *EmptyNode) ResolveLink([]string) (*Link, []string, error) { 24 | return nil, nil, ErrEmptyNode 25 | } 26 | 27 | func (n *EmptyNode) Copy() Node { 28 | return &EmptyNode{} 29 | } 30 | 31 | func (n *EmptyNode) Cid() cid.Cid { 32 | id, err := cid.Prefix{ 33 | Version: 1, 34 | Codec: cid.Raw, 35 | MhType: mh.IDENTITY, 36 | MhLength: 0, 37 | }.Sum(nil) 38 | 39 | if err != nil { 40 | panic("failed to create an empty cid!") 41 | } 42 | return id 43 | } 44 | 45 | func (n *EmptyNode) Links() []*Link { 46 | return nil 47 | } 48 | 49 | func (n *EmptyNode) Loggable() map[string]interface{} { 50 | return nil 51 | } 52 | 53 | func (n *EmptyNode) String() string { 54 | return "[]" 55 | } 56 | 57 | func (n *EmptyNode) RawData() []byte { 58 | return nil 59 | } 60 | 61 | func (n *EmptyNode) Size() (uint64, error) { 62 | return 0, nil 63 | } 64 | 65 | func (n *EmptyNode) Stat() (*NodeStat, error) { 66 | return &NodeStat{}, nil 67 | } 68 | 69 | func TestNodeType(t *testing.T) { 70 | // Type assertion. 71 | var _ Node = &EmptyNode{} 72 | } 73 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/ipfs/go-ipld-format 2 | 3 | go 1.23.8 4 | 5 | require ( 6 | github.com/ipfs/go-block-format v0.2.1 7 | github.com/ipfs/go-cid v0.5.0 8 | github.com/multiformats/go-multihash v0.2.3 9 | ) 10 | 11 | require ( 12 | github.com/ipfs/boxo v0.30.0 // indirect 13 | github.com/klauspost/cpuid/v2 v2.2.10 // indirect 14 | github.com/minio/sha256-simd v1.0.1 // indirect 15 | github.com/mr-tron/base58 v1.2.0 // indirect 16 | github.com/multiformats/go-base32 v0.1.0 // indirect 17 | github.com/multiformats/go-base36 v0.2.0 // indirect 18 | github.com/multiformats/go-multibase v0.2.0 // indirect 19 | github.com/multiformats/go-varint v0.0.7 // indirect 20 | github.com/spaolacci/murmur3 v1.1.0 // indirect 21 | golang.org/x/crypto v0.37.0 // indirect 22 | golang.org/x/sys v0.32.0 // indirect 23 | lukechampine.com/blake3 v1.4.0 // indirect 24 | ) 25 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.0 h1:NMZiJj8QnKe1LgsbDayM4UoHwbvwDRwnI3hwNaAHRnc= 2 | github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.0/go.mod h1:ZXNYxsqcloTdSy/rNShjYzMhyjf0LaoftYK0p+A3h40= 3 | github.com/ipfs/boxo v0.30.0 h1:7afsoxPGGqfoH7Dum/wOTGUB9M5fb8HyKPMlLfBvIEQ= 4 | github.com/ipfs/boxo v0.30.0/go.mod h1:BPqgGGyHB9rZZcPSzah2Dc9C+5Or3U1aQe7EH1H7370= 5 | github.com/ipfs/go-block-format v0.2.1 h1:96kW71XGNNa+mZw/MTzJrCpMhBWCrd9kBLoKm9Iip/Q= 6 | github.com/ipfs/go-block-format v0.2.1/go.mod h1:frtvXHMQhM6zn7HvEQu+Qz5wSTj+04oEH/I+NjDgEjk= 7 | github.com/ipfs/go-cid v0.5.0 h1:goEKKhaGm0ul11IHA7I6p1GmKz8kEYniqFopaB5Otwg= 8 | github.com/ipfs/go-cid v0.5.0/go.mod h1:0L7vmeNXpQpUS9vt+yEARkJ8rOg43DF3iPgn4GIN0mk= 9 | github.com/ipfs/go-test v0.2.1 h1:/D/a8xZ2JzkYqcVcV/7HYlCnc7bv/pKHQiX5TdClkPE= 10 | github.com/ipfs/go-test v0.2.1/go.mod h1:dzu+KB9cmWjuJnXFDYJwC25T3j1GcN57byN+ixmK39M= 11 | github.com/klauspost/cpuid/v2 v2.2.10 h1:tBs3QSyvjDyFTq3uoc/9xFpCuOsJQFNPiAhYdw2skhE= 12 | github.com/klauspost/cpuid/v2 v2.2.10/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0= 13 | github.com/libp2p/go-buffer-pool v0.1.0 h1:oK4mSFcQz7cTQIfqbe4MIj9gLW+mnanjyFtc6cdF0Y8= 14 | github.com/libp2p/go-buffer-pool v0.1.0/go.mod h1:N+vh8gMqimBzdKkSMVuydVDq+UV5QTWy5HSiZacSbPg= 15 | github.com/libp2p/go-libp2p v0.41.1 h1:8ecNQVT5ev/jqALTvisSJeVNvXYJyK4NhQx1nNRXQZE= 16 | github.com/libp2p/go-libp2p v0.41.1/go.mod h1:DcGTovJzQl/I7HMrby5ZRjeD0kQkGiy+9w6aEkSZpRI= 17 | github.com/minio/sha256-simd v1.0.1 h1:6kaan5IFmwTNynnKKpDHe6FWHohJOHhCPchzK49dzMM= 18 | github.com/minio/sha256-simd v1.0.1/go.mod h1:Pz6AKMiUdngCLpeTL/RJY1M9rUuPMYujV5xJjtbRSN8= 19 | github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o= 20 | github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc= 21 | github.com/multiformats/go-base32 v0.1.0 h1:pVx9xoSPqEIQG8o+UbAe7DNi51oej1NtK+aGkbLYxPE= 22 | github.com/multiformats/go-base32 v0.1.0/go.mod h1:Kj3tFY6zNr+ABYMqeUNeGvkIC/UYgtWibDcT0rExnbI= 23 | github.com/multiformats/go-base36 v0.2.0 h1:lFsAbNOGeKtuKozrtBsAkSVhv1p9D0/qedU9rQyccr0= 24 | github.com/multiformats/go-base36 v0.2.0/go.mod h1:qvnKE++v+2MWCfePClUEjE78Z7P2a1UV0xHgWc0hkp4= 25 | github.com/multiformats/go-multiaddr v0.15.0 h1:zB/HeaI/apcZiTDwhY5YqMvNVl/oQYvs3XySU+qeAVo= 26 | github.com/multiformats/go-multiaddr v0.15.0/go.mod h1:JSVUmXDjsVFiW7RjIFMP7+Ev+h1DTbiJgVeTV/tcmP0= 27 | github.com/multiformats/go-multibase v0.2.0 h1:isdYCVLvksgWlMW9OZRYJEa9pZETFivncJHmHnnd87g= 28 | github.com/multiformats/go-multibase v0.2.0/go.mod h1:bFBZX4lKCA/2lyOFSAoKH5SS6oPyjtnzK/XTFDPkNuk= 29 | github.com/multiformats/go-multicodec v0.9.0 h1:pb/dlPnzee/Sxv/j4PmkDRxCOi3hXTz3IbPKOXWJkmg= 30 | github.com/multiformats/go-multicodec v0.9.0/go.mod h1:L3QTQvMIaVBkXOXXtVmYE+LI16i14xuaojr/H7Ai54k= 31 | github.com/multiformats/go-multihash v0.2.3 h1:7Lyc8XfX/IY2jWb/gI7JP+o7JEq9hOa7BFvVU9RSh+U= 32 | github.com/multiformats/go-multihash v0.2.3/go.mod h1:dXgKXCXjBzdscBLk9JkjINiEsCKRVch90MdaGiKsvSM= 33 | github.com/multiformats/go-varint v0.0.7 h1:sWSGR+f/eu5ABZA2ZpYKBILXTTs9JWpdEM/nEGOHFS8= 34 | github.com/multiformats/go-varint v0.0.7/go.mod h1:r8PUYw/fD/SjBCiKOoDlGF6QawOELpZAu9eioSos/OU= 35 | github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= 36 | github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= 37 | golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE= 38 | golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc= 39 | golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0 h1:R84qjqJb5nVJMxqWYb3np9L5ZsaDtB+a39EqjV0JSUM= 40 | golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0/go.mod h1:S9Xr4PYopiDyqSyp5NjCrhFrqg6A5zA2E/iPHPhqnS8= 41 | golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20= 42 | golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= 43 | google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= 44 | google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= 45 | lukechampine.com/blake3 v1.4.0 h1:xDbKOZCVbnZsfzM6mHSYcGRHZ3YrLDzqz8XnV4uaD5w= 46 | lukechampine.com/blake3 v1.4.0/go.mod h1:MQJNQCTnR+kwOP/JEZSxj3MaQjp80FOFSNMMHXcSeX0= 47 | -------------------------------------------------------------------------------- /merkledag.go: -------------------------------------------------------------------------------- 1 | package format 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | 7 | cid "github.com/ipfs/go-cid" 8 | ) 9 | 10 | // ErrNotFound is used to signal when a Node could not be found. The specific 11 | // meaning will depend on the DAGService implementation, which may be trying 12 | // to read nodes locally but also, trying to find them remotely. 13 | // 14 | // The Cid field can be filled in to provide additional context. 15 | type ErrNotFound struct { 16 | Cid cid.Cid 17 | } 18 | 19 | // Error implements the error interface and returns a human-readable 20 | // message for this error. 21 | func (e ErrNotFound) Error() string { 22 | if e.Cid == cid.Undef { 23 | return "ipld: could not find node" 24 | } 25 | 26 | return "ipld: could not find " + e.Cid.String() 27 | } 28 | 29 | // Is allows to check whether any error is of this ErrNotFound type. 30 | // Do not use this directly, but rather errors.Is(yourError, ErrNotFound). 31 | func (e ErrNotFound) Is(err error) bool { 32 | switch err.(type) { 33 | case ErrNotFound: 34 | return true 35 | default: 36 | return false 37 | } 38 | } 39 | 40 | // NotFound returns true. 41 | func (e ErrNotFound) NotFound() bool { 42 | return true 43 | } 44 | 45 | // IsNotFound returns if the given error is or wraps an ErrNotFound 46 | // (equivalent to errors.Is(err, ErrNotFound{})) 47 | func IsNotFound(err error) bool { 48 | return errors.Is(err, ErrNotFound{}) 49 | } 50 | 51 | // Either a node or an error. 52 | type NodeOption struct { 53 | Node Node 54 | Err error 55 | } 56 | 57 | // The basic Node resolution service. 58 | type NodeGetter interface { 59 | // Get retrieves nodes by CID. Depending on the NodeGetter 60 | // implementation, this may involve fetching the Node from a remote 61 | // machine; consider setting a deadline in the context. 62 | Get(context.Context, cid.Cid) (Node, error) 63 | 64 | // GetMany returns a channel of NodeOptions given a set of CIDs. 65 | GetMany(context.Context, []cid.Cid) <-chan *NodeOption 66 | } 67 | 68 | // NodeAdder adds nodes to a DAG. 69 | type NodeAdder interface { 70 | // Add adds a node to this DAG. 71 | Add(context.Context, Node) error 72 | 73 | // AddMany adds many nodes to this DAG. 74 | // 75 | // Consider using the Batch NodeAdder (`NewBatch`) if you make 76 | // extensive use of this function. 77 | AddMany(context.Context, []Node) error 78 | } 79 | 80 | // NodeGetters can optionally implement this interface to make finding linked 81 | // objects faster. 82 | type LinkGetter interface { 83 | NodeGetter 84 | 85 | // TODO(ipfs/go-ipld-format#9): This should return []cid.Cid 86 | 87 | // GetLinks returns the children of the node refered to by the given 88 | // CID. 89 | GetLinks(ctx context.Context, nd cid.Cid) ([]*Link, error) 90 | } 91 | 92 | // DAGService is an IPFS Merkle DAG service. 93 | type DAGService interface { 94 | NodeGetter 95 | NodeAdder 96 | 97 | // Remove removes a node from this DAG. 98 | // 99 | // Remove returns no error if the requested node is not present in this DAG. 100 | Remove(context.Context, cid.Cid) error 101 | 102 | // RemoveMany removes many nodes from this DAG. 103 | // 104 | // It returns success even if the nodes were not present in the DAG. 105 | RemoveMany(context.Context, []cid.Cid) error 106 | } 107 | -------------------------------------------------------------------------------- /navipld.go: -------------------------------------------------------------------------------- 1 | package format 2 | 3 | import ( 4 | "context" 5 | 6 | cid "github.com/ipfs/go-cid" 7 | ) 8 | 9 | // NavigableIPLDNode implements the `NavigableNode` interface wrapping 10 | // an IPLD `Node` and providing support for node promises. 11 | type NavigableIPLDNode struct { 12 | node Node 13 | 14 | // The CID of each child of the node. 15 | childCIDs []cid.Cid 16 | 17 | // Node promises for child nodes requested. 18 | childPromises []*NodePromise 19 | // TODO: Consider encapsulating it in a single structure alongside `childCIDs`. 20 | 21 | nodeGetter NodeGetter 22 | // TODO: Should this be stored in the `Walker`'s context to avoid passing 23 | // it along to every node? It seems like a structure that doesn't need 24 | // to be replicated (the entire DAG will use the same `NodeGetter`). 25 | } 26 | 27 | // NewNavigableIPLDNode returns a `NavigableIPLDNode` wrapping the provided 28 | // `node`. 29 | func NewNavigableIPLDNode(node Node, nodeGetter NodeGetter) *NavigableIPLDNode { 30 | nn := &NavigableIPLDNode{ 31 | node: node, 32 | nodeGetter: nodeGetter, 33 | } 34 | 35 | nn.childCIDs = getLinkCids(node) 36 | nn.childPromises = make([]*NodePromise, len(nn.childCIDs)) 37 | 38 | return nn 39 | } 40 | 41 | // FetchChild implements the `NavigableNode` interface using node promises 42 | // to preload the following child nodes to `childIndex` leaving them ready 43 | // for subsequent `FetchChild` calls. 44 | func (nn *NavigableIPLDNode) FetchChild(ctx context.Context, childIndex uint) (NavigableNode, error) { 45 | // This function doesn't check that `childIndex` is valid, that's 46 | // the `Walker` responsibility. 47 | 48 | // If we drop to <= preloadSize/2 preloading nodes, preload the next 10. 49 | for i := childIndex; i < childIndex+preloadSize/2 && i < uint(len(nn.childPromises)); i++ { 50 | // TODO: Check if canceled. 51 | if nn.childPromises[i] == nil { 52 | nn.preload(ctx, i) 53 | break 54 | } 55 | } 56 | 57 | child, err := nn.getPromiseValue(ctx, childIndex) 58 | 59 | switch err { 60 | case nil: 61 | case context.DeadlineExceeded, context.Canceled: 62 | if ctx.Err() != nil { 63 | return nil, ctx.Err() 64 | } 65 | 66 | // In this case, the context used to *preload* the node (in a previous 67 | // `FetchChild` call) has been canceled. We need to retry the load with 68 | // the current context and we might as well preload some extra nodes 69 | // while we're at it. 70 | nn.preload(ctx, childIndex) 71 | child, err = nn.getPromiseValue(ctx, childIndex) 72 | if err != nil { 73 | return nil, err 74 | } 75 | default: 76 | return nil, err 77 | } 78 | 79 | return NewNavigableIPLDNode(child, nn.nodeGetter), nil 80 | } 81 | 82 | // Number of nodes to preload every time a child is requested. 83 | // TODO: Give more visibility to this constant, it could be an attribute 84 | // set in the `Walker` context that gets passed in `FetchChild`. 85 | const preloadSize = 10 86 | 87 | // Preload at most `preloadSize` child nodes from `beg` through promises 88 | // created using this `ctx`. 89 | func (nn *NavigableIPLDNode) preload(ctx context.Context, beg uint) { 90 | end := beg + preloadSize 91 | if end >= uint(len(nn.childCIDs)) { 92 | end = uint(len(nn.childCIDs)) 93 | } 94 | 95 | copy(nn.childPromises[beg:], GetNodes(ctx, nn.nodeGetter, nn.childCIDs[beg:end])) 96 | } 97 | 98 | // Fetch the actual node (this is the blocking part of the mechanism) 99 | // and invalidate the promise. `preload` should always be called first 100 | // for the `childIndex` being fetch. 101 | // 102 | // TODO: Include `preload` into the beginning of this function? 103 | // (And collapse the two calls in `FetchChild`). 104 | func (nn *NavigableIPLDNode) getPromiseValue(ctx context.Context, childIndex uint) (Node, error) { 105 | value, err := nn.childPromises[childIndex].Get(ctx) 106 | nn.childPromises[childIndex] = nil 107 | return value, err 108 | } 109 | 110 | // Get the CID of all the links of this `node`. 111 | func getLinkCids(node Node) []cid.Cid { 112 | links := node.Links() 113 | out := make([]cid.Cid, 0, len(links)) 114 | 115 | for _, l := range links { 116 | out = append(out, l.Cid) 117 | } 118 | return out 119 | } 120 | 121 | // GetIPLDNode returns the IPLD `Node` wrapped into this structure. 122 | func (nn *NavigableIPLDNode) GetIPLDNode() Node { 123 | return nn.node 124 | } 125 | 126 | // ChildTotal implements the `NavigableNode` returning the number 127 | // of links (of child nodes) in this node. 128 | func (nn *NavigableIPLDNode) ChildTotal() uint { 129 | return uint(len(nn.GetIPLDNode().Links())) 130 | } 131 | 132 | // ExtractIPLDNode is a helper function that takes a `NavigableNode` 133 | // and returns the IPLD `Node` wrapped inside. Used in the `Visitor` 134 | // function. 135 | // TODO: Check for errors to avoid a panic? 136 | func ExtractIPLDNode(node NavigableNode) Node { 137 | return node.GetIPLDNode() 138 | } 139 | 140 | // TODO: `Cleanup` is not supported at the moment in the `Walker`. 141 | // 142 | // Called in `Walker.up()` when the node is not part of the path anymore. 143 | //func (nn *NavigableIPLDNode) Cleanup() { 144 | // // TODO: Ideally this would be the place to issue a context `cancel()` 145 | // // but since the DAG reader uses multiple contexts in the same session 146 | // // (through `Read` and `CtxReadFull`) we would need to store an array 147 | // // with the multiple contexts in `NavigableIPLDNode` with its corresponding 148 | // // cancel functions. 149 | //} 150 | -------------------------------------------------------------------------------- /promise.go: -------------------------------------------------------------------------------- 1 | package format 2 | 3 | import ( 4 | "context" 5 | ) 6 | 7 | // NodePromise provides a promise like interface for a dag Node 8 | // the first call to Get will block until the Node is received 9 | // from its internal channels, subsequent calls will return the 10 | // cached node. 11 | // 12 | // Thread Safety: This is multiple-consumer/single-producer safe. 13 | func NewNodePromise(ctx context.Context) *NodePromise { 14 | return &NodePromise{ 15 | done: make(chan struct{}), 16 | ctx: ctx, 17 | } 18 | } 19 | 20 | type NodePromise struct { 21 | value Node 22 | err error 23 | done chan struct{} 24 | 25 | ctx context.Context 26 | } 27 | 28 | // Call this function to fail a promise. 29 | // 30 | // Once a promise has been failed or fulfilled, further attempts to fail it will 31 | // be silently dropped. 32 | func (np *NodePromise) Fail(err error) { 33 | if np.err != nil || np.value != nil { 34 | // Already filled. 35 | return 36 | } 37 | np.err = err 38 | close(np.done) 39 | } 40 | 41 | // Fulfill this promise. 42 | // 43 | // Once a promise has been fulfilled or failed, calling this function will 44 | // panic. 45 | func (np *NodePromise) Send(nd Node) { 46 | // if promise has a value, don't fail it 47 | if np.err != nil || np.value != nil { 48 | panic("already filled") 49 | } 50 | np.value = nd 51 | close(np.done) 52 | } 53 | 54 | // Get the value of this promise. 55 | // 56 | // This function is safe to call concurrently from any number of goroutines. 57 | func (np *NodePromise) Get(ctx context.Context) (Node, error) { 58 | select { 59 | case <-np.done: 60 | return np.value, np.err 61 | case <-np.ctx.Done(): 62 | return nil, np.ctx.Err() 63 | case <-ctx.Done(): 64 | return nil, ctx.Err() 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /version.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "v0.6.1" 3 | } 4 | -------------------------------------------------------------------------------- /walker.go: -------------------------------------------------------------------------------- 1 | package format 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | ) 7 | 8 | // Walker provides methods to move through a DAG of nodes that implement 9 | // the `NavigableNode` interface. It uses iterative algorithms (instead 10 | // of recursive ones) that expose the `path` of nodes from the root to 11 | // the `ActiveNode` it currently points to. 12 | // 13 | // It provides multiple ways to walk through the DAG (e.g. `Iterate` 14 | // and `Seek`). When using them, you provide a Visitor function that 15 | // will be called for each node the Walker traverses. The Visitor can 16 | // read data from those nodes and, optionally, direct the movement of 17 | // the Walker by calling `Pause` (to stop traversing and return) or 18 | // `NextChild` (to skip a child and its descendants). See the DAG reader 19 | // in `github.com/ipfs/go-unixfs/io/dagreader.go` for a usage example. 20 | // TODO: This example isn't merged yet. 21 | type Walker struct { 22 | 23 | // Sequence of nodes in the DAG from the root to the `ActiveNode`, each 24 | // position in the slice being the parent of the next one. The `ActiveNode` 25 | // resides in the position indexed by `currentDepth` (the slice may contain 26 | // more elements past that point but they should be ignored since the slice 27 | // is not truncated to leverage the already allocated space). 28 | // 29 | // Every time `down` is called the `currentDepth` increases and the child 30 | // of the `ActiveNode` is inserted after it (effectively becoming the new 31 | // `ActiveNode`). 32 | // 33 | // The slice must *always* have a length bigger than zero with the root 34 | // of the DAG at the first position (empty DAGs are not valid). 35 | path []NavigableNode 36 | 37 | // Depth of the `ActiveNode`. It grows downwards, root being 0, its child 1, 38 | // and so on. It controls the effective length of `path` and `childIndex`. 39 | // 40 | // A currentDepth of -1 signals the start case of a new `Walker` that hasn't 41 | // moved yet. Although this state is an invalid index to the slices, it 42 | // allows to centralize all the visit calls in the `down` move (starting at 43 | // zero would require a special visit case inside every walk operation like 44 | // `Iterate()` and `Seek`). This value should never be returned to after 45 | // the first `down` movement, moving up from the root should always return 46 | // `errUpOnRoot`. 47 | currentDepth int 48 | 49 | // This slice has the index of the child each node in `path` is pointing 50 | // to. The child index in the node can be set past all of its child nodes 51 | // (having a value equal to `ChildTotal`) to signal it has visited (or 52 | // skipped) all of them. A leaf node with no children that has its index 53 | // in zero would also comply with this format. 54 | // 55 | // Complement to `path`, not only do we need to know which nodes have been 56 | // traversed to reach the `ActiveNode` but also which child nodes they are 57 | // to correctly have the active path of the DAG. (Reword this paragraph.) 58 | childIndex []uint 59 | 60 | // Flag to signal that a pause in the current walk operation has been 61 | // requested by the user inside `Visitor`. 62 | pauseRequested bool 63 | 64 | // Used to pass information from the central `Walker` structure to the 65 | // distributed `NavigableNode`s (to have a centralized configuration 66 | // structure to control the behavior of all of them), e.g., to tell 67 | // the `NavigableIPLDNode` which context should be used to load node 68 | // promises (but this could later be used in more elaborate ways). 69 | ctx context.Context 70 | } 71 | 72 | // `Walker` implementation details: 73 | // 74 | // The `Iterate` and `Seek` walk operations are implemented through two 75 | // basic move methods `up` and `down`, that change which node is the 76 | // `ActiveNode` (modifying the `path` that leads to it). The `NextChild` 77 | // method allows to change which child the `ActiveNode` is pointing to 78 | // in order to change the direction of the descent. 79 | // 80 | // The `down` method is the analogous of a recursive call and the one in 81 | // charge of visiting (possible new) nodes (through `Visitor`) and performing 82 | // some user-defined logic. A `Pause` method is available to interrupt the 83 | // current walk operation after visiting a node. 84 | // 85 | // Key terms and concepts: 86 | // * Walk operation (e.g., `Iterate`). 87 | // * Move methods: `up` and `down`. 88 | // * Active node. 89 | // * Path to the active node. 90 | 91 | // Function called each time a node is arrived upon in a walk operation 92 | // through the `down` method (not when going back `up`). It is the main 93 | // API to implement DAG functionality (e.g., read and seek a file DAG) 94 | // on top of the `Walker` structure. 95 | // 96 | // Its argument is the current `node` being visited (the `ActiveNode`). 97 | // Any error it returns (apart from the internal `errPauseWalkOperation`) 98 | // will be forwarded to the caller of the walk operation (pausing it). 99 | // 100 | // Any of the exported methods of this API should be allowed to be called 101 | // from within this method, e.g., `NextChild`. 102 | // TODO: Check that. Can `ResetPosition` be called without breaking 103 | // the `Walker` integrity? 104 | type Visitor func(node NavigableNode) error 105 | 106 | // NavigableNode is the interface the nodes of a DAG need to implement in 107 | // order to be traversed by the `Walker`. 108 | type NavigableNode interface { 109 | 110 | // FetchChild returns the child of this node pointed to by `childIndex`. 111 | // A `Context` stored in the `Walker` is passed (`ctx`) that may contain 112 | // configuration attributes stored by the user before initiating the 113 | // walk operation. 114 | FetchChild(ctx context.Context, childIndex uint) (NavigableNode, error) 115 | 116 | // ChildTotal returns the number of children of the `ActiveNode`. 117 | ChildTotal() uint 118 | 119 | // GetIPLDNode returns actual IPLD Node 120 | GetIPLDNode() Node 121 | 122 | // TODO: Evaluate providing the `Cleanup` and `Reset` methods. 123 | 124 | // Cleanup is an optional method that is called by the `Walker` when 125 | // this node leaves the active `path`, i.e., when this node is the 126 | // `ActiveNode` and the `up` movement is called. 127 | //Cleanup() 128 | // Allow this method to return an error? That would imply 129 | // modifying the `Walker` API, `up()` would now return an error 130 | // different than `errUpOnRoot`. 131 | 132 | // Reset is an optional function that is called by the `Walker` when 133 | // `ResetPosition` is called, it is only applied to the root node 134 | // of the DAG. 135 | //Reset() 136 | } 137 | 138 | // NewWalker creates a new `Walker` structure from a `root` 139 | // NavigableNode. 140 | func NewWalker(ctx context.Context, root NavigableNode) *Walker { 141 | return &Walker{ 142 | ctx: ctx, 143 | 144 | path: []NavigableNode{root}, 145 | childIndex: []uint{0}, 146 | 147 | currentDepth: -1, 148 | // Starting position, "on top" of the root node, see `currentDepth`. 149 | } 150 | } 151 | 152 | // ActiveNode returns the `NavigableNode` that `Walker` is pointing 153 | // to at the moment. It changes when `up` or `down` is called. 154 | func (w *Walker) ActiveNode() NavigableNode { 155 | return w.path[w.currentDepth] 156 | // TODO: Add a check for the initial state of `currentDepth` -1? 157 | } 158 | 159 | // ErrDownNoChild signals there is no child at `ActiveChildIndex` in the 160 | // `ActiveNode` to go down to. 161 | var ErrDownNoChild = errors.New("can't go down, the child does not exist") 162 | 163 | // errUpOnRoot signals the end of the DAG after returning to the root. 164 | var errUpOnRoot = errors.New("can't go up, already on root") 165 | 166 | // EndOfDag wraps the `errUpOnRoot` and signals to the user that the 167 | // entire DAG has been iterated. 168 | // 169 | //lint:ignore ST1012 // This is roughly equivalent to io.EOF. 170 | var EndOfDag = errors.New("end of DAG") 171 | 172 | // ErrNextNoChild signals the end of this parent child nodes. 173 | var ErrNextNoChild = errors.New("can't go to the next child, no more child nodes in this parent") 174 | 175 | // errPauseWalkOperation signals the pause of the walk operation. 176 | var errPauseWalkOperation = errors.New("pause in the current walk operation") 177 | 178 | // ErrNilVisitor signals the lack of a `Visitor` function. 179 | var ErrNilVisitor = errors.New("no Visitor function specified") 180 | 181 | // Iterate the DAG through the DFS pre-order walk algorithm, going down 182 | // as much as possible, then `NextChild` to the other siblings, and then up 183 | // (to go down again). The position is saved throughout iterations (and 184 | // can be previously set in `Seek`) allowing `Iterate` to be called 185 | // repeatedly (after a `Pause`) to continue the iteration. 186 | // 187 | // This function returns the errors received from `down` (generated either 188 | // inside the `Visitor` call or any other errors while fetching the child 189 | // nodes), the rest of the move errors are handled within the function and 190 | // are not returned. 191 | func (w *Walker) Iterate(visitor Visitor) error { 192 | 193 | // Iterate until either: the end of the DAG (`errUpOnRoot`), a `Pause` 194 | // is requested (`errPauseWalkOperation`) or an error happens (while 195 | // going down). 196 | for { 197 | 198 | // First, go down as much as possible. 199 | for { 200 | err := w.down(visitor) 201 | 202 | if err == ErrDownNoChild { 203 | break 204 | // Can't keep going down from this node, try to move Next. 205 | } 206 | 207 | if err == errPauseWalkOperation { 208 | return nil 209 | // Pause requested, `errPauseWalkOperation` is just an internal 210 | // error to signal to pause, don't pass it along. 211 | } 212 | 213 | if err != nil { 214 | return err 215 | // `down` is the only movement that can return *any* error. 216 | } 217 | } 218 | 219 | // Can't move down anymore, turn to the next child in the `ActiveNode` 220 | // to go down a different path. If there are no more child nodes 221 | // available, go back up. 222 | for { 223 | err := w.NextChild() 224 | if err == nil { 225 | break 226 | // No error, it turned to the next child. Try to go down again. 227 | } 228 | 229 | // It can't go Next (`ErrNextNoChild`), try to move up. 230 | err = w.up() 231 | if err != nil { 232 | // Can't move up, on the root again (`errUpOnRoot`). 233 | return EndOfDag 234 | } 235 | 236 | // Moved up, try `NextChild` again. 237 | } 238 | 239 | // Turned to the next child (after potentially many up moves), 240 | // try going down again. 241 | } 242 | } 243 | 244 | // Seek a specific node in a downwards manner. The `Visitor` should be 245 | // used to steer the seek selecting at each node which child will the 246 | // seek continue to (extending the `path` in that direction) or pause it 247 | // (if the desired node has been found). The seek always starts from 248 | // the root. It modifies the position so it shouldn't be used in-between 249 | // `Iterate` calls (it can be used to set the position *before* iterating). 250 | // If the visitor returns any non-`nil` errors the seek will stop. 251 | // 252 | // TODO: The seek could be extended to seek from the current position. 253 | // (Is there something in the logic that would prevent it at the moment?) 254 | func (w *Walker) Seek(visitor Visitor) error { 255 | 256 | if visitor == nil { 257 | return ErrNilVisitor 258 | // Although valid, there is no point in calling `Seek` without 259 | // any extra logic, it would just go down to the leftmost leaf, 260 | // so this would probably be a user error. 261 | } 262 | 263 | // Go down until it the desired node is found (that will be signaled 264 | // pausing the seek with `errPauseWalkOperation`) or a leaf node is 265 | // reached (end of the DAG). 266 | for { 267 | err := w.down(visitor) 268 | 269 | if err == errPauseWalkOperation { 270 | return nil 271 | // Found the node, `errPauseWalkOperation` is just an internal 272 | // error to signal to pause, don't pass it along. 273 | } 274 | 275 | if err == ErrDownNoChild { 276 | return nil 277 | // Can't keep going down from this node, either at a leaf node 278 | // or the `Visitor` has moved the child index past the 279 | // available index (probably because none indicated that the 280 | // target node could be down from there). 281 | } 282 | 283 | if err != nil { 284 | return err 285 | // `down()` is the only movement that can return *any* error. 286 | } 287 | } 288 | // TODO: Copied from the first part of `Iterate()` (although conceptually 289 | // different from it). Could this be encapsulated in a function to avoid 290 | // repeating code? The way the pause signal is handled it wouldn't seem 291 | // very useful: the `errPauseWalkOperation` needs to be processed at this 292 | // depth to return from the function (and pause the seek, returning 293 | // from another function here wouldn't cause it to stop). 294 | } 295 | 296 | // Go down one level in the DAG to the child of the `ActiveNode` 297 | // pointed to by `ActiveChildIndex` and perform some logic on it by 298 | // through the user-specified `visitor`. 299 | // 300 | // This should always be the first move in any walk operation 301 | // (to visit the root node and move the `currentDepth` away 302 | // from the negative value). 303 | func (w *Walker) down(visitor Visitor) error { 304 | child, err := w.fetchChild() 305 | if err != nil { 306 | return err 307 | } 308 | 309 | w.extendPath(child) 310 | 311 | return w.visitActiveNode(visitor) 312 | } 313 | 314 | // Fetch the child from the `ActiveNode` through the `FetchChild` 315 | // method of the `NavigableNode` interface. 316 | func (w *Walker) fetchChild() (NavigableNode, error) { 317 | if w.currentDepth == -1 { 318 | // First time `down()` is called, `currentDepth` is -1, 319 | // return the root node. Don't check available child nodes 320 | // (as the `Walker` is not actually on any node just yet 321 | // and `ActiveChildIndex` is of no use yet). 322 | return w.path[0], nil 323 | } 324 | 325 | // Check if the child to fetch exists. 326 | if w.ActiveChildIndex() >= w.ActiveNode().ChildTotal() { 327 | return nil, ErrDownNoChild 328 | } 329 | 330 | return w.ActiveNode().FetchChild(w.ctx, w.ActiveChildIndex()) 331 | 332 | // TODO: Maybe call `extendPath` here and hide it away 333 | // from `down`. 334 | } 335 | 336 | // Increase the `currentDepth` and extend the `path` to the fetched 337 | // `child` node (which now becomes the new `ActiveNode`) 338 | func (w *Walker) extendPath(child NavigableNode) { 339 | w.currentDepth++ 340 | 341 | // Extend the slices if needed (doubling its capacity). 342 | if w.currentDepth >= len(w.path) { 343 | w.path = append(w.path, make([]NavigableNode, len(w.path))...) 344 | w.childIndex = append(w.childIndex, make([]uint, len(w.childIndex))...) 345 | // TODO: Check the performance of this grow mechanism. 346 | } 347 | 348 | // `child` now becomes the `ActiveNode()`. 349 | w.path[w.currentDepth] = child 350 | w.childIndex[w.currentDepth] = 0 351 | } 352 | 353 | // Call the `Visitor` on the `ActiveNode`. This function should only be 354 | // called from `down`. This is a wrapper function to `Visitor` to process 355 | // the `Pause` signal and do other minor checks (taking this logic away 356 | // from `down`). 357 | func (w *Walker) visitActiveNode(visitor Visitor) error { 358 | if visitor == nil { 359 | return nil 360 | // No need to check `pauseRequested` as `Pause` should 361 | // only be called from within the `Visitor`. 362 | } 363 | 364 | err := visitor(w.ActiveNode()) 365 | 366 | if w.pauseRequested { 367 | // If a pause was requested make sure an error is returned 368 | // that will cause the current walk operation to return. If 369 | // `Visitor` didn't return an error set an artificial one 370 | // generated by the `Walker`. 371 | if err == nil { 372 | err = errPauseWalkOperation 373 | } 374 | 375 | w.pauseRequested = false 376 | } 377 | 378 | return err 379 | } 380 | 381 | // Go up from the `ActiveNode`. The only possible error this method 382 | // can return is to signal it's already at the root and can't go up. 383 | func (w *Walker) up() error { 384 | if w.currentDepth < 1 { 385 | return errUpOnRoot 386 | } 387 | 388 | w.currentDepth-- 389 | 390 | // w.ActiveNode().Cleanup() 391 | // If `Cleanup` is supported this would be the place to call it. 392 | 393 | return nil 394 | } 395 | 396 | // NextChild increases the child index of the `ActiveNode` to point 397 | // to the next child (which may exist or may be the end of the available 398 | // child nodes). 399 | // 400 | // This method doesn't change the `ActiveNode`, it just changes where 401 | // is it pointing to next, it could be interpreted as "turn to the next 402 | // child". 403 | func (w *Walker) NextChild() error { 404 | w.incrementActiveChildIndex() 405 | 406 | if w.ActiveChildIndex() == w.ActiveNode().ChildTotal() { 407 | return ErrNextNoChild 408 | // At the end of the available children, signal it. 409 | } 410 | 411 | return nil 412 | } 413 | 414 | // incrementActiveChildIndex increments the child index of the `ActiveNode` to 415 | // point to the next child (if it exists) or to the position past all of 416 | // the child nodes (`ChildTotal`) to signal that all of its children have 417 | // been visited/skipped (if already at that last position, do nothing). 418 | func (w *Walker) incrementActiveChildIndex() { 419 | if w.ActiveChildIndex()+1 <= w.ActiveNode().ChildTotal() { 420 | w.childIndex[w.currentDepth]++ 421 | } 422 | } 423 | 424 | // ActiveChildIndex returns the index of the child the `ActiveNode()` 425 | // is pointing to. 426 | func (w *Walker) ActiveChildIndex() uint { 427 | return w.childIndex[w.currentDepth] 428 | } 429 | 430 | // SetContext changes the internal `Walker` (that is provided to the 431 | // `NavigableNode`s when calling `FetchChild`) with the one passed 432 | // as argument. 433 | func (w *Walker) SetContext(ctx context.Context) { 434 | w.ctx = ctx 435 | } 436 | 437 | // Pause the current walk operation. This function must be called from 438 | // within the `Visitor` function. 439 | func (w *Walker) Pause() { 440 | w.pauseRequested = true 441 | } 442 | --------------------------------------------------------------------------------