├── .github
└── workflows
│ ├── generated-pr.yml
│ ├── go-check.yml
│ ├── go-test.yml
│ ├── release-check.yml
│ ├── releaser.yml
│ ├── stale.yml
│ └── tagpush.yml
├── LICENSE.md
├── README.md
├── data
├── builder
│ ├── builder.go
│ ├── dir_test.go
│ ├── directory.go
│ ├── dirshard.go
│ ├── file.go
│ ├── file_test.go
│ ├── quick
│ │ ├── quick.go
│ │ └── quick_test.go
│ └── util.go
├── datatypes.go
├── doc.go
├── errors.go
├── fixtures
│ ├── directory.unixfs
│ ├── directory
│ │ └── file.txt
│ ├── file.txt
│ ├── file.txt.unixfs
│ ├── raw.unixfs
│ ├── symlink.txt
│ └── symlink.txt.unixfs
├── format_test.go
├── gen
│ └── main.go
├── ipldsch_minima.go
├── ipldsch_satisfaction.go
├── ipldsch_types.go
├── marshal.go
├── permissions.go
├── unmarshal.go
└── wirenumbers.go
├── directory
└── basicdir.go
├── file
├── deferred.go
├── file.go
├── file_test.go
├── fixtures
│ ├── QmT78zSuBmuS4z925WZfrqQ1qHaJ56DQaTfyMUF7F8ff5o.car
│ └── QmT8EC9sJq63SkDZ1mWLbWWyVV66PuqyHWpKkH4pcVyY4H.car
├── large_file_test.go
├── shard.go
└── wrapped.go
├── go.mod
├── go.sum
├── hamt
├── errors.go
├── fixtures
│ └── wikipedia-cryptographic-hash-function.car
├── shardeddir.go
├── shardeddir_test.go
├── util.go
└── util_test.go
├── iter
├── iter.go
└── iterlink.go
├── pathpbnode.go
├── reification.go
├── signaling.go
├── signalling_test.go
├── test
├── doc.go
└── partial_file_access_test.go
├── testutil
├── directory.go
├── doc.go
├── generator.go
└── namegen
│ └── namegen.go
├── utils
└── utils.go
└── version.json
/.github/workflows/generated-pr.yml:
--------------------------------------------------------------------------------
1 | name: Close Generated PRs
2 |
3 | on:
4 | schedule:
5 | - cron: '0 0 * * *'
6 | workflow_dispatch:
7 |
8 | permissions:
9 | issues: write
10 | pull-requests: write
11 |
12 | jobs:
13 | stale:
14 | uses: ipdxco/unified-github-workflows/.github/workflows/reusable-generated-pr.yml@v1
15 |
--------------------------------------------------------------------------------
/.github/workflows/go-check.yml:
--------------------------------------------------------------------------------
1 | name: Go Checks
2 |
3 | on:
4 | pull_request:
5 | push:
6 | branches: ["main"]
7 | workflow_dispatch:
8 |
9 | permissions:
10 | contents: read
11 |
12 | concurrency:
13 | group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.event_name == 'push' && github.sha || github.ref }}
14 | cancel-in-progress: true
15 |
16 | jobs:
17 | go-check:
18 | uses: ipdxco/unified-github-workflows/.github/workflows/go-check.yml@v1.0
19 |
--------------------------------------------------------------------------------
/.github/workflows/go-test.yml:
--------------------------------------------------------------------------------
1 | name: Go Test
2 |
3 | on:
4 | pull_request:
5 | push:
6 | branches: ["main"]
7 | workflow_dispatch:
8 |
9 | permissions:
10 | contents: read
11 |
12 | concurrency:
13 | group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.event_name == 'push' && github.sha || github.ref }}
14 | cancel-in-progress: true
15 |
16 | jobs:
17 | go-test:
18 | uses: ipdxco/unified-github-workflows/.github/workflows/go-test.yml@v1.0
19 | secrets:
20 | CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
21 |
--------------------------------------------------------------------------------
/.github/workflows/release-check.yml:
--------------------------------------------------------------------------------
1 | name: Release Checker
2 |
3 | on:
4 | pull_request_target:
5 | paths: [ 'version.json' ]
6 | types: [ opened, synchronize, reopened, labeled, unlabeled ]
7 | workflow_dispatch:
8 |
9 | permissions:
10 | contents: write
11 | pull-requests: write
12 |
13 | concurrency:
14 | group: ${{ github.workflow }}-${{ github.ref }}
15 | cancel-in-progress: true
16 |
17 | jobs:
18 | release-check:
19 | uses: ipdxco/unified-github-workflows/.github/workflows/release-check.yml@v1.0
20 |
--------------------------------------------------------------------------------
/.github/workflows/releaser.yml:
--------------------------------------------------------------------------------
1 | name: Releaser
2 |
3 | on:
4 | push:
5 | paths: [ 'version.json' ]
6 | workflow_dispatch:
7 |
8 | permissions:
9 | contents: write
10 |
11 | concurrency:
12 | group: ${{ github.workflow }}-${{ github.sha }}
13 | cancel-in-progress: true
14 |
15 | jobs:
16 | releaser:
17 | uses: ipdxco/unified-github-workflows/.github/workflows/releaser.yml@v1.0
18 |
--------------------------------------------------------------------------------
/.github/workflows/stale.yml:
--------------------------------------------------------------------------------
1 | name: Close Stale Issues
2 |
3 | on:
4 | schedule:
5 | - cron: '0 0 * * *'
6 | workflow_dispatch:
7 |
8 | permissions:
9 | issues: write
10 | pull-requests: write
11 |
12 | jobs:
13 | stale:
14 | uses: ipdxco/unified-github-workflows/.github/workflows/reusable-stale-issue.yml@v1
15 |
--------------------------------------------------------------------------------
/.github/workflows/tagpush.yml:
--------------------------------------------------------------------------------
1 | name: Tag Push Checker
2 |
3 | on:
4 | push:
5 | tags:
6 | - v*
7 |
8 | permissions:
9 | contents: read
10 | issues: write
11 |
12 | concurrency:
13 | group: ${{ github.workflow }}-${{ github.ref }}
14 | cancel-in-progress: true
15 |
16 | jobs:
17 | releaser:
18 | uses: ipdxco/unified-github-workflows/.github/workflows/tagpush.yml@v1.0
19 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | The contents of this repository are Copyright (c) corresponding authors and
2 | contributors, licensed under the `Permissive License Stack` meaning either of:
3 |
4 | - Apache-2.0 Software License: https://www.apache.org/licenses/LICENSE-2.0
5 | ([...4tr2kfsq](https://dweb.link/ipfs/bafkreiankqxazcae4onkp436wag2lj3ccso4nawxqkkfckd6cg4tr2kfsq))
6 |
7 | - MIT Software License: https://opensource.org/licenses/MIT
8 | ([...vljevcba](https://dweb.link/ipfs/bafkreiepofszg4gfe2gzuhojmksgemsub2h4uy2gewdnr35kswvljevcba))
9 |
10 | You may not use the contents of this repository except in compliance
11 | with one of the listed Licenses. For an extended clarification of the
12 | intent behind the choice of Licensing please refer to
13 | https://protocol.ai/blog/announcing-the-permissive-license-stack/
14 |
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the terms listed in this notice is distributed on
17 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
18 | either express or implied. See each License for the specific language
19 | governing permissions and limitations under that License.
20 |
21 |
22 | `SPDX-License-Identifier: Apache-2.0 OR MIT`
23 |
24 | Verbatim copies of both licenses are included below:
25 |
26 | Apache-2.0 Software License
27 |
28 | ```
29 | Apache License
30 | Version 2.0, January 2004
31 | http://www.apache.org/licenses/
32 |
33 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
34 |
35 | 1. Definitions.
36 |
37 | "License" shall mean the terms and conditions for use, reproduction,
38 | and distribution as defined by Sections 1 through 9 of this document.
39 |
40 | "Licensor" shall mean the copyright owner or entity authorized by
41 | the copyright owner that is granting the License.
42 |
43 | "Legal Entity" shall mean the union of the acting entity and all
44 | other entities that control, are controlled by, or are under common
45 | control with that entity. For the purposes of this definition,
46 | "control" means (i) the power, direct or indirect, to cause the
47 | direction or management of such entity, whether by contract or
48 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
49 | outstanding shares, or (iii) beneficial ownership of such entity.
50 |
51 | "You" (or "Your") shall mean an individual or Legal Entity
52 | exercising permissions granted by this License.
53 |
54 | "Source" form shall mean the preferred form for making modifications,
55 | including but not limited to software source code, documentation
56 | source, and configuration files.
57 |
58 | "Object" form shall mean any form resulting from mechanical
59 | transformation or translation of a Source form, including but
60 | not limited to compiled object code, generated documentation,
61 | and conversions to other media types.
62 |
63 | "Work" shall mean the work of authorship, whether in Source or
64 | Object form, made available under the License, as indicated by a
65 | copyright notice that is included in or attached to the work
66 | (an example is provided in the Appendix below).
67 |
68 | "Derivative Works" shall mean any work, whether in Source or Object
69 | form, that is based on (or derived from) the Work and for which the
70 | editorial revisions, annotations, elaborations, or other modifications
71 | represent, as a whole, an original work of authorship. For the purposes
72 | of this License, Derivative Works shall not include works that remain
73 | separable from, or merely link (or bind by name) to the interfaces of,
74 | the Work and Derivative Works thereof.
75 |
76 | "Contribution" shall mean any work of authorship, including
77 | the original version of the Work and any modifications or additions
78 | to that Work or Derivative Works thereof, that is intentionally
79 | submitted to Licensor for inclusion in the Work by the copyright owner
80 | or by an individual or Legal Entity authorized to submit on behalf of
81 | the copyright owner. For the purposes of this definition, "submitted"
82 | means any form of electronic, verbal, or written communication sent
83 | to the Licensor or its representatives, including but not limited to
84 | communication on electronic mailing lists, source code control systems,
85 | and issue tracking systems that are managed by, or on behalf of, the
86 | Licensor for the purpose of discussing and improving the Work, but
87 | excluding communication that is conspicuously marked or otherwise
88 | designated in writing by the copyright owner as "Not a Contribution."
89 |
90 | "Contributor" shall mean Licensor and any individual or Legal Entity
91 | on behalf of whom a Contribution has been received by Licensor and
92 | subsequently incorporated within the Work.
93 |
94 | 2. Grant of Copyright License. Subject to the terms and conditions of
95 | this License, each Contributor hereby grants to You a perpetual,
96 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
97 | copyright license to reproduce, prepare Derivative Works of,
98 | publicly display, publicly perform, sublicense, and distribute the
99 | Work and such Derivative Works in Source or Object form.
100 |
101 | 3. Grant of Patent License. Subject to the terms and conditions of
102 | this License, each Contributor hereby grants to You a perpetual,
103 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
104 | (except as stated in this section) patent license to make, have made,
105 | use, offer to sell, sell, import, and otherwise transfer the Work,
106 | where such license applies only to those patent claims licensable
107 | by such Contributor that are necessarily infringed by their
108 | Contribution(s) alone or by combination of their Contribution(s)
109 | with the Work to which such Contribution(s) was submitted. If You
110 | institute patent litigation against any entity (including a
111 | cross-claim or counterclaim in a lawsuit) alleging that the Work
112 | or a Contribution incorporated within the Work constitutes direct
113 | or contributory patent infringement, then any patent licenses
114 | granted to You under this License for that Work shall terminate
115 | as of the date such litigation is filed.
116 |
117 | 4. Redistribution. You may reproduce and distribute copies of the
118 | Work or Derivative Works thereof in any medium, with or without
119 | modifications, and in Source or Object form, provided that You
120 | meet the following conditions:
121 |
122 | (a) You must give any other recipients of the Work or
123 | Derivative Works a copy of this License; and
124 |
125 | (b) You must cause any modified files to carry prominent notices
126 | stating that You changed the files; and
127 |
128 | (c) You must retain, in the Source form of any Derivative Works
129 | that You distribute, all copyright, patent, trademark, and
130 | attribution notices from the Source form of the Work,
131 | excluding those notices that do not pertain to any part of
132 | the Derivative Works; and
133 |
134 | (d) If the Work includes a "NOTICE" text file as part of its
135 | distribution, then any Derivative Works that You distribute must
136 | include a readable copy of the attribution notices contained
137 | within such NOTICE file, excluding those notices that do not
138 | pertain to any part of the Derivative Works, in at least one
139 | of the following places: within a NOTICE text file distributed
140 | as part of the Derivative Works; within the Source form or
141 | documentation, if provided along with the Derivative Works; or,
142 | within a display generated by the Derivative Works, if and
143 | wherever such third-party notices normally appear. The contents
144 | of the NOTICE file are for informational purposes only and
145 | do not modify the License. You may add Your own attribution
146 | notices within Derivative Works that You distribute, alongside
147 | or as an addendum to the NOTICE text from the Work, provided
148 | that such additional attribution notices cannot be construed
149 | as modifying the License.
150 |
151 | You may add Your own copyright statement to Your modifications and
152 | may provide additional or different license terms and conditions
153 | for use, reproduction, or distribution of Your modifications, or
154 | for any such Derivative Works as a whole, provided Your use,
155 | reproduction, and distribution of the Work otherwise complies with
156 | the conditions stated in this License.
157 |
158 | 5. Submission of Contributions. Unless You explicitly state otherwise,
159 | any Contribution intentionally submitted for inclusion in the Work
160 | by You to the Licensor shall be under the terms and conditions of
161 | this License, without any additional terms or conditions.
162 | Notwithstanding the above, nothing herein shall supersede or modify
163 | the terms of any separate license agreement you may have executed
164 | with Licensor regarding such Contributions.
165 |
166 | 6. Trademarks. This License does not grant permission to use the trade
167 | names, trademarks, service marks, or product names of the Licensor,
168 | except as required for reasonable and customary use in describing the
169 | origin of the Work and reproducing the content of the NOTICE file.
170 |
171 | 7. Disclaimer of Warranty. Unless required by applicable law or
172 | agreed to in writing, Licensor provides the Work (and each
173 | Contributor provides its Contributions) on an "AS IS" BASIS,
174 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
175 | implied, including, without limitation, any warranties or conditions
176 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
177 | PARTICULAR PURPOSE. You are solely responsible for determining the
178 | appropriateness of using or redistributing the Work and assume any
179 | risks associated with Your exercise of permissions under this License.
180 |
181 | 8. Limitation of Liability. In no event and under no legal theory,
182 | whether in tort (including negligence), contract, or otherwise,
183 | unless required by applicable law (such as deliberate and grossly
184 | negligent acts) or agreed to in writing, shall any Contributor be
185 | liable to You for damages, including any direct, indirect, special,
186 | incidental, or consequential damages of any character arising as a
187 | result of this License or out of the use or inability to use the
188 | Work (including but not limited to damages for loss of goodwill,
189 | work stoppage, computer failure or malfunction, or any and all
190 | other commercial damages or losses), even if such Contributor
191 | has been advised of the possibility of such damages.
192 |
193 | 9. Accepting Warranty or Additional Liability. While redistributing
194 | the Work or Derivative Works thereof, You may choose to offer,
195 | and charge a fee for, acceptance of support, warranty, indemnity,
196 | or other liability obligations and/or rights consistent with this
197 | License. However, in accepting such obligations, You may act only
198 | on Your own behalf and on Your sole responsibility, not on behalf
199 | of any other Contributor, and only if You agree to indemnify,
200 | defend, and hold each Contributor harmless for any liability
201 | incurred by, or claims asserted against, such Contributor by reason
202 | of your accepting any such warranty or additional liability.
203 |
204 | END OF TERMS AND CONDITIONS
205 | ```
206 |
207 |
208 | MIT Software License
209 |
210 | ```
211 | Permission is hereby granted, free of charge, to any person obtaining a copy
212 | of this software and associated documentation files (the "Software"), to deal
213 | in the Software without restriction, including without limitation the rights
214 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
215 | copies of the Software, and to permit persons to whom the Software is
216 | furnished to do so, subject to the following conditions:
217 |
218 | The above copyright notice and this permission notice shall be included in
219 | all copies or substantial portions of the Software.
220 |
221 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
222 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
223 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
224 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
225 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
226 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
227 | THE SOFTWARE.
228 | ```
229 |
230 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # go-unixfsnode
2 |
3 | This is an IPLD ADL that provides string based pathing for protobuf nodes. The top level node behaves like a map where LookupByString returns the Hash property on the Link in the protobufs list of Links whos Name property matches the key. This should enable selector traversals that work based of paths.
4 |
5 | Note that while it works internally with go-codec-dagpb, the Reify method (used to get a UnixFSNode from a DagPB node should actually work successfully with go-ipld-prime-proto nodes)
6 |
7 | ## Usage
8 |
9 | The primary interaction with this package is to register an ADL on a link system. This is done with via a helper method.
10 |
11 | ```go
12 | AddUnixFSReificationToLinkSystem(lsys *ipld.LinkSystem)
13 | ```
14 |
15 | For link systems which have UnixFS reification registered, two ADLs will be available to the [`InterpretAs`](https://ipld.io/specs/selectors/) selector: 'unixfs' and 'unixfs-preload'. The different between these two ADLs is that the preload variant will access all blocks within a UnixFS Object (file or directory) when that object is accessed by a selector traversal. The non-preload variant in contrast will only access the subset of blocks strictly needed for the traversal. In practice, this means the subset of a sharded directory needed to access a specific file, or the sub-range of a file directly accessed by a range selector.
16 |
17 |
18 | ## License
19 |
20 | Apache-2.0/MIT © Protocol Labs
21 |
--------------------------------------------------------------------------------
/data/builder/builder.go:
--------------------------------------------------------------------------------
1 | package builder
2 |
3 | import (
4 | "errors"
5 | "strconv"
6 | "time"
7 |
8 | "github.com/ipfs/go-unixfsnode/data"
9 | "github.com/ipld/go-ipld-prime"
10 | "github.com/ipld/go-ipld-prime/fluent/qp"
11 | )
12 |
13 | // BuildUnixFS provides a clean, validated interface to building data structures
14 | // that match the UnixFS protobuf encoded in the Data member of a ProtoNode
15 | // with sensible defaults
16 | //
17 | // smallFileData, err := BuildUnixFS(func(b *Builder) {
18 | // Data(b, []byte{"hello world"})
19 | // Mtime(b, func(tb TimeBuilder) {
20 | // Time(tb, time.Now())
21 | // })
22 | // })
23 | func BuildUnixFS(fn func(*Builder)) (data.UnixFSData, error) {
24 | nd, err := qp.BuildMap(data.Type.UnixFSData, -1, func(ma ipld.MapAssembler) {
25 | b := &Builder{MapAssembler: ma}
26 | fn(b)
27 | if !b.hasBlockSizes {
28 | qp.MapEntry(ma, data.Field__BlockSizes, qp.List(0, func(ipld.ListAssembler) {}))
29 | }
30 | if !b.hasDataType {
31 | qp.MapEntry(ma, data.Field__DataType, qp.Int(data.Data_File))
32 | }
33 | })
34 | if err != nil {
35 | return nil, err
36 | }
37 | return nd.(data.UnixFSData), nil
38 | }
39 |
40 | // Builder is an interface for making UnixFS data nodes
41 | type Builder struct {
42 | ipld.MapAssembler
43 | hasDataType bool
44 | hasBlockSizes bool
45 | }
46 |
47 | // DataType sets the default on a builder for a UnixFS node - default is File
48 | func DataType(b *Builder, dataType int64) {
49 | _, ok := data.DataTypeNames[dataType]
50 | if !ok {
51 | panic(data.ErrInvalidDataType{DataType: dataType})
52 | }
53 | qp.MapEntry(b.MapAssembler, data.Field__DataType, qp.Int(dataType))
54 | b.hasDataType = true
55 | }
56 |
57 | // Data sets the data member inside the UnixFS data
58 | func Data(b *Builder, dataBytes []byte) {
59 | qp.MapEntry(b.MapAssembler, data.Field__Data, qp.Bytes(dataBytes))
60 | }
61 |
62 | // FileSize sets the file size which should be the size of actual bytes underneath
63 | // this node for large files, w/o additional bytes to encode intermediate nodes
64 | func FileSize(b *Builder, fileSize uint64) {
65 | qp.MapEntry(b.MapAssembler, data.Field__FileSize, qp.Int(int64(fileSize)))
66 | }
67 |
68 | // BlockSizes encodes block sizes for each child node
69 | func BlockSizes(b *Builder, blockSizes []uint64) {
70 | qp.MapEntry(b.MapAssembler, data.Field__BlockSizes, qp.List(int64(len(blockSizes)), func(la ipld.ListAssembler) {
71 | for _, bs := range blockSizes {
72 | qp.ListEntry(la, qp.Int(int64(bs)))
73 | }
74 | }))
75 | b.hasBlockSizes = true
76 | }
77 |
78 | // HashType sets the hash function for this node -- only applicable to HAMT
79 | func HashType(b *Builder, hashType uint64) {
80 | qp.MapEntry(b.MapAssembler, data.Field__HashType, qp.Int(int64(hashType)))
81 | }
82 |
83 | // Fanout sets the fanout in a HAMT tree
84 | func Fanout(b *Builder, fanout uint64) {
85 | qp.MapEntry(b.MapAssembler, data.Field__Fanout, qp.Int(int64(fanout)))
86 | }
87 |
88 | // Permissions sets file permissions for the Mode member of the UnixFS node
89 | func Permissions(b *Builder, mode int) {
90 | mode = mode & 0xFFF
91 | qp.MapEntry(b.MapAssembler, data.Field__Mode, qp.Int(int64(mode)))
92 | }
93 |
94 | func parseModeString(modeString string) (uint64, error) {
95 | if len(modeString) > 0 && modeString[0] == '0' {
96 | return strconv.ParseUint(modeString, 8, 32)
97 | }
98 | return strconv.ParseUint(modeString, 10, 32)
99 | }
100 |
101 | // PermissionsString sets file permissions for the Mode member of the UnixFS node,
102 | // parsed from a typical octect encoded permission string (eg '0755')
103 | func PermissionsString(b *Builder, modeString string) {
104 | mode64, err := parseModeString(modeString)
105 | if err != nil {
106 | panic(err)
107 | }
108 | mode64 = mode64 & 0xFFF
109 | qp.MapEntry(b.MapAssembler, data.Field__Mode, qp.Int(int64(mode64)))
110 | }
111 |
112 | // Mtime sets the modification time for this node using the time builder interface
113 | // and associated methods
114 | func Mtime(b *Builder, fn func(tb TimeBuilder)) {
115 | qp.MapEntry(b.MapAssembler, data.Field__Mtime, qp.Map(-1, func(ma ipld.MapAssembler) {
116 | fn(ma)
117 | }))
118 | }
119 |
120 | // TimeBuilder is a simple interface for constructing the time member of UnixFS data
121 | type TimeBuilder ipld.MapAssembler
122 |
123 | // Time sets the modification time from a golang time value
124 | func Time(ma TimeBuilder, t time.Time) {
125 | Seconds(ma, t.Unix())
126 | FractionalNanoseconds(ma, int32(t.Nanosecond()))
127 | }
128 |
129 | // Seconds sets the seconds for a modification time
130 | func Seconds(ma TimeBuilder, seconds int64) {
131 | qp.MapEntry(ma, data.Field__Seconds, qp.Int(seconds))
132 |
133 | }
134 |
135 | // FractionalNanoseconds sets the nanoseconds for a modification time (must
136 | // be between 0 & a billion)
137 | func FractionalNanoseconds(ma TimeBuilder, nanoseconds int32) {
138 | if nanoseconds < 0 || nanoseconds > 999999999 {
139 | panic(errors.New("mtime-nsecs must be within the range [0,999999999]"))
140 | }
141 | qp.MapEntry(ma, data.Field__Nanoseconds, qp.Int(int64(nanoseconds)))
142 | }
143 |
--------------------------------------------------------------------------------
/data/builder/dir_test.go:
--------------------------------------------------------------------------------
1 | package builder
2 |
3 | import (
4 | "bytes"
5 | "fmt"
6 | "io"
7 | "os"
8 | "path/filepath"
9 | "strconv"
10 | "testing"
11 |
12 | "github.com/ipfs/go-cid"
13 | "github.com/ipfs/go-test/random"
14 | "github.com/ipfs/go-unixfsnode"
15 | dagpb "github.com/ipld/go-codec-dagpb"
16 | "github.com/ipld/go-ipld-prime"
17 | cidlink "github.com/ipld/go-ipld-prime/linking/cid"
18 | "github.com/multiformats/go-multihash"
19 | "github.com/stretchr/testify/require"
20 | )
21 |
22 | func mkEntries(cnt int, ls *ipld.LinkSystem) ([]dagpb.PBLink, error) {
23 | entries := make([]dagpb.PBLink, 0, cnt)
24 | for i := 0; i < cnt; i++ {
25 | r := bytes.NewBufferString(fmt.Sprintf("%d", i))
26 | e, err := mkEntry(r, fmt.Sprintf("file %d", i), ls)
27 | if err != nil {
28 | return nil, err
29 | }
30 | entries = append(entries, e)
31 | }
32 | return entries, nil
33 | }
34 |
35 | func mkEntry(r io.Reader, name string, ls *ipld.LinkSystem) (dagpb.PBLink, error) {
36 | f, s, err := BuildUnixFSFile(r, "", ls)
37 | if err != nil {
38 | return nil, err
39 | }
40 | return BuildUnixFSDirectoryEntry(name, int64(s), f)
41 | }
42 |
43 | func TestBuildUnixFSFileWrappedInDirectory_Reference(t *testing.T) {
44 | for _, tc := range referenceTestCases {
45 | t.Run(strconv.Itoa(tc.size), func(t *testing.T) {
46 | buf := make([]byte, tc.size)
47 | random.NewSeededRand(0xdeadbeef).Read(buf)
48 | r := bytes.NewReader(buf)
49 |
50 | ls := cidlink.DefaultLinkSystem()
51 | storage := cidlink.Memory{}
52 | ls.StorageReadOpener = storage.OpenRead
53 | ls.StorageWriteOpener = storage.OpenWrite
54 |
55 | e, err := mkEntry(r, fmt.Sprintf("%d", tc.size), &ls)
56 | require.NoError(t, err)
57 | d, sz, err := BuildUnixFSDirectory([]dagpb.PBLink{e}, &ls)
58 | require.NoError(t, err)
59 | require.Equal(t, tc.wrappedExpected.String(), d.(cidlink.Link).Cid.String())
60 |
61 | // check sz is the stored size of all blocks in the generated DAG
62 | var totStored int
63 | for _, blk := range storage.Bag {
64 | totStored += len(blk)
65 | }
66 | require.Equal(t, totStored, int(sz))
67 | })
68 | }
69 | }
70 |
71 | // Cross-impl reference test: directory of files with single character
72 | // names, starting from ' ' and ending with '~', but excluding the special
73 | // characters '/' and '.'. Each file should contain a single byte with the
74 | // same value as the character in its name. Files are added to a sharded
75 | // directory with a fanout of 16, using CIDv1 throughout, and should result
76 | // in the root CID of:
77 | //
78 | // bafybeihnipspiyy3dctpcx7lv655qpiuy52d7b2fzs52dtrjqwmvbiux44
79 | func TestBuildUnixFSDirectoryShardAltFanout_Reference(t *testing.T) {
80 | ls := cidlink.DefaultLinkSystem()
81 | storage := cidlink.Memory{}
82 | ls.StorageReadOpener = storage.OpenRead
83 | ls.StorageWriteOpener = storage.OpenWrite
84 | entries := make([]dagpb.PBLink, 0)
85 | for ch := ' '; ch <= '~'; ch++ {
86 | if ch == '/' || ch == '.' {
87 | continue
88 | }
89 | s := string(ch)
90 | r := bytes.NewBuffer([]byte(s))
91 | e, err := mkEntry(r, s, &ls)
92 | require.NoError(t, err)
93 | entries = append(entries, e)
94 | }
95 | lnk, sz, err := BuildUnixFSShardedDirectory(16, multihash.MURMUR3X64_64, entries, &ls)
96 | require.NoError(t, err)
97 | var totStored int
98 | for _, blk := range storage.Bag {
99 | totStored += len(blk)
100 | }
101 | require.Equal(t, totStored, int(sz))
102 | require.Equal(t, "bafybeihnipspiyy3dctpcx7lv655qpiuy52d7b2fzs52dtrjqwmvbiux44", lnk.String())
103 | }
104 |
105 | func TestBuildUnixFSDirectory(t *testing.T) {
106 | ls := cidlink.DefaultLinkSystem()
107 | storage := cidlink.Memory{}
108 | ls.StorageReadOpener = storage.OpenRead
109 | ls.StorageWriteOpener = storage.OpenWrite
110 |
111 | testSizes := []int{100, 1000, 50000}
112 | for _, cnt := range testSizes {
113 | entries, err := mkEntries(cnt, &ls)
114 | if err != nil {
115 | t.Fatal(err)
116 | }
117 |
118 | dl, _, err := BuildUnixFSDirectory(entries, &ls)
119 | if err != nil {
120 | t.Fatal(err)
121 | }
122 |
123 | pbn, err := ls.Load(ipld.LinkContext{}, dl, dagpb.Type.PBNode)
124 | if err != nil {
125 | t.Fatal(err)
126 | }
127 | ufd, err := unixfsnode.Reify(ipld.LinkContext{}, pbn, &ls)
128 | if err != nil {
129 | t.Fatal(err)
130 | }
131 | observedCnt := 0
132 |
133 | li := ufd.MapIterator()
134 | for !li.Done() {
135 | _, _, err := li.Next()
136 | if err != nil {
137 | t.Fatal(err)
138 | }
139 | observedCnt++
140 | }
141 | if observedCnt != cnt {
142 | fmt.Printf("%+v\n", ufd)
143 | t.Fatalf("unexpected number of dir entries %d vs %d", observedCnt, cnt)
144 | }
145 | }
146 | }
147 |
148 | func TestBuildUnixFSRecursive(t *testing.T) {
149 | // only the top CID is of interest, but this tree is correct and can be used for future validation
150 | fixture := fentry{
151 | "rootDir",
152 | "",
153 | mustCidDecode("bafybeihswl3f7pa7fueyayewcvr3clkdz7oetv4jolyejgw26p6l3qzlbm"),
154 | []fentry{
155 | {"a", "aaa", mustCidDecode("bafkreieygsdw3t5qlsywpjocjfj6xjmmjlejwgw7k7zi6l45bgxra7xi6a"), nil},
156 | {
157 | "b",
158 | "",
159 | mustCidDecode("bafybeibohj54uixf2mso4t53suyarv6cfuxt6b5cj6qjsqaa2ezfxnu5pu"),
160 | []fentry{
161 | {"1", "111", mustCidDecode("bafkreihw4cq6flcbsrnjvj77rkfkudhlyevdxteydkjjvvopqefasdqrvy"), nil},
162 | {"2", "222", mustCidDecode("bafkreie3q4kremt4bhhjdxletm7znjr3oqeo6jt4rtcxcaiu4yuxgdfwd4"), nil},
163 | },
164 | },
165 | {"c", "ccc", mustCidDecode("bafkreide3ksevvet74uks3x7vnxhp4ltfi6zpwbsifmbwn6324fhusia7y"), nil},
166 | },
167 | }
168 |
169 | ls := cidlink.DefaultLinkSystem()
170 | storage := cidlink.Memory{}
171 | ls.StorageReadOpener = storage.OpenRead
172 | ls.StorageWriteOpener = storage.OpenWrite
173 |
174 | dir := t.TempDir()
175 | makeFixture(t, dir, fixture)
176 |
177 | lnk, sz, err := BuildUnixFSRecursive(filepath.Join(dir, fixture.name), &ls)
178 | require.NoError(t, err)
179 | require.Equal(t, fixture.expectedLnk.String(), lnk.String())
180 | require.Equal(t, uint64(245), sz)
181 | }
182 |
183 | func TestBuildUnixFSRecursiveLargeSharded(t *testing.T) {
184 | // only the top CID is of interest, but this tree is correct and can be used for future validation
185 | fixture := fentry{
186 | "rootDir",
187 | "",
188 | mustCidDecode("bafybeigyvxs6og5jbmpaa43qbhhd5swklqcfzqdrtjgfh53qjon6hpjaye"),
189 | make([]fentry, 0),
190 | }
191 |
192 | for i := 0; i < 1344; i++ {
193 | name := fmt.Sprintf("long name to fill out bytes to make the sharded directory test flip over the sharded directory limit because link names are included in the directory entry %d", i)
194 | fixture.children = append(fixture.children, fentry{name, name, cid.Undef, nil})
195 | }
196 |
197 | ls := cidlink.DefaultLinkSystem()
198 | storage := cidlink.Memory{}
199 | ls.StorageReadOpener = storage.OpenRead
200 | ls.StorageWriteOpener = storage.OpenWrite
201 |
202 | dir := t.TempDir()
203 | makeFixture(t, dir, fixture)
204 |
205 | lnk, sz, err := BuildUnixFSRecursive(filepath.Join(dir, fixture.name), &ls)
206 | require.NoError(t, err)
207 | require.Equal(t, fixture.expectedLnk.String(), lnk.String())
208 | require.Equal(t, uint64(515735), sz)
209 | }
210 |
211 | // Same as TestBuildUnixFSRecursiveLargeSharded but it's one file less which flips
212 | // it back to the un-sharded format. So we're testing the boundary condition and
213 | // the proper construction of large DAGs.
214 | func TestBuildUnixFSRecursiveLargeUnsharded(t *testing.T) {
215 | // only the top CID is of interest, but this tree is correct and can be used for future validation
216 | fixture := fentry{
217 | "rootDir",
218 | "",
219 | mustCidDecode("bafybeihecq4rpl4nw3cgfb2uiwltgsmw5sutouvuldv5fxn4gfbihvnalq"),
220 | make([]fentry, 0),
221 | }
222 |
223 | for i := 0; i < 1343; i++ {
224 | name := fmt.Sprintf("long name to fill out bytes to make the sharded directory test flip over the sharded directory limit because link names are included in the directory entry %d", i)
225 | fixture.children = append(fixture.children, fentry{name, name, cid.Undef, nil})
226 | }
227 |
228 | ls := cidlink.DefaultLinkSystem()
229 | storage := cidlink.Memory{}
230 | ls.StorageReadOpener = storage.OpenRead
231 | ls.StorageWriteOpener = storage.OpenWrite
232 |
233 | dir := t.TempDir()
234 | makeFixture(t, dir, fixture)
235 |
236 | lnk, sz, err := BuildUnixFSRecursive(filepath.Join(dir, fixture.name), &ls)
237 | require.NoError(t, err)
238 | require.Equal(t, fixture.expectedLnk.String(), lnk.String())
239 | require.Equal(t, uint64(490665), sz)
240 | }
241 |
242 | type fentry struct {
243 | name string
244 | content string
245 | expectedLnk cid.Cid
246 | children []fentry
247 | }
248 |
249 | func makeFixture(t *testing.T, dir string, fixture fentry) {
250 | path := filepath.Join(dir, fixture.name)
251 | if fixture.children != nil {
252 | require.NoError(t, os.Mkdir(path, 0755))
253 | for _, c := range fixture.children {
254 | makeFixture(t, path, c)
255 | }
256 | } else {
257 | os.WriteFile(path, []byte(fixture.content), 0644)
258 | }
259 | }
260 |
261 | func mustCidDecode(s string) cid.Cid {
262 | c, err := cid.Decode(s)
263 | if err != nil {
264 | panic(err)
265 | }
266 | return c
267 | }
268 |
--------------------------------------------------------------------------------
/data/builder/directory.go:
--------------------------------------------------------------------------------
1 | package builder
2 |
3 | import (
4 | "fmt"
5 | "io/fs"
6 | "os"
7 | "path"
8 |
9 | "github.com/ipfs/go-unixfsnode/data"
10 | dagpb "github.com/ipld/go-codec-dagpb"
11 | "github.com/ipld/go-ipld-prime"
12 | cidlink "github.com/ipld/go-ipld-prime/linking/cid"
13 | "github.com/multiformats/go-multihash"
14 | )
15 |
16 | // https://github.com/ipfs/go-ipfs/pull/8114/files#diff-eec963b47a6e1080d9d8023b4e438e6e3591b4154f7379a7e728401d2055374aR319
17 | const shardSplitThreshold = 262144
18 |
19 | // https://github.com/ipfs/go-unixfs/blob/ec6bb5a4c5efdc3a5bce99151b294f663ee9c08d/io/directory.go#L29
20 | const defaultShardWidth = 256
21 |
22 | // BuildUnixFSRecursive returns a link pointing to the UnixFS node representing
23 | // the file or directory tree pointed to by `root`
24 | func BuildUnixFSRecursive(root string, ls *ipld.LinkSystem) (ipld.Link, uint64, error) {
25 | info, err := os.Lstat(root)
26 | if err != nil {
27 | return nil, 0, err
28 | }
29 |
30 | m := info.Mode()
31 | switch {
32 | case m.IsDir():
33 | var tsize uint64
34 | entries, err := os.ReadDir(root)
35 | if err != nil {
36 | return nil, 0, err
37 | }
38 | lnks := make([]dagpb.PBLink, 0, len(entries))
39 | for _, e := range entries {
40 | lnk, sz, err := BuildUnixFSRecursive(path.Join(root, e.Name()), ls)
41 | if err != nil {
42 | return nil, 0, err
43 | }
44 | tsize += sz
45 | entry, err := BuildUnixFSDirectoryEntry(e.Name(), int64(sz), lnk)
46 | if err != nil {
47 | return nil, 0, err
48 | }
49 | lnks = append(lnks, entry)
50 | }
51 | return BuildUnixFSDirectory(lnks, ls)
52 | case m.Type() == fs.ModeSymlink:
53 | content, err := os.Readlink(root)
54 | if err != nil {
55 | return nil, 0, err
56 | }
57 | outLnk, sz, err := BuildUnixFSSymlink(content, ls)
58 | if err != nil {
59 | return nil, 0, err
60 | }
61 | return outLnk, sz, nil
62 | case m.IsRegular():
63 | fp, err := os.Open(root)
64 | if err != nil {
65 | return nil, 0, err
66 | }
67 | defer fp.Close()
68 | outLnk, sz, err := BuildUnixFSFile(fp, "", ls)
69 | if err != nil {
70 | return nil, 0, err
71 | }
72 | return outLnk, sz, nil
73 | default:
74 | return nil, 0, fmt.Errorf("cannot encode non regular file: %s", root)
75 | }
76 | }
77 |
78 | // estimateDirSize estimates if a directory is big enough that it warrents sharding.
79 | // The estimate is the sum over the len(linkName) + bytelen(linkHash)
80 | // https://github.com/ipfs/go-unixfs/blob/master/io/directory.go#L152-L162
81 | func estimateDirSize(entries []dagpb.PBLink) int {
82 | s := 0
83 | for _, e := range entries {
84 | s += len(e.Name.Must().String())
85 | lnk := e.Hash.Link()
86 | cl, ok := lnk.(cidlink.Link)
87 | if ok {
88 | s += cl.ByteLen()
89 | } else if lnk == nil {
90 | s += 0
91 | } else {
92 | s += len(lnk.Binary())
93 | }
94 | }
95 | return s
96 | }
97 |
98 | // BuildUnixFSDirectory creates a directory link over a collection of entries.
99 | func BuildUnixFSDirectory(entries []dagpb.PBLink, ls *ipld.LinkSystem) (ipld.Link, uint64, error) {
100 | if estimateDirSize(entries) > shardSplitThreshold {
101 | return BuildUnixFSShardedDirectory(defaultShardWidth, multihash.MURMUR3X64_64, entries, ls)
102 | }
103 | ufd, err := BuildUnixFS(func(b *Builder) {
104 | DataType(b, data.Data_Directory)
105 | })
106 | if err != nil {
107 | return nil, 0, err
108 | }
109 | pbb := dagpb.Type.PBNode.NewBuilder()
110 | pbm, err := pbb.BeginMap(2)
111 | if err != nil {
112 | return nil, 0, err
113 | }
114 | if err = pbm.AssembleKey().AssignString("Data"); err != nil {
115 | return nil, 0, err
116 | }
117 | if err = pbm.AssembleValue().AssignBytes(data.EncodeUnixFSData(ufd)); err != nil {
118 | return nil, 0, err
119 | }
120 | if err = pbm.AssembleKey().AssignString("Links"); err != nil {
121 | return nil, 0, err
122 | }
123 | lnks, err := pbm.AssembleValue().BeginList(int64(len(entries)))
124 | if err != nil {
125 | return nil, 0, err
126 | }
127 | // sorting happens in codec-dagpb
128 | var totalSize uint64
129 | for _, e := range entries {
130 | totalSize += uint64(e.Tsize.Must().Int())
131 | if err := lnks.AssembleValue().AssignNode(e); err != nil {
132 | return nil, 0, err
133 | }
134 | }
135 | if err := lnks.Finish(); err != nil {
136 | return nil, 0, err
137 | }
138 | if err := pbm.Finish(); err != nil {
139 | return nil, 0, err
140 | }
141 | node := pbb.Build()
142 | lnk, sz, err := sizedStore(ls, fileLinkProto, node)
143 | if err != nil {
144 | return nil, 0, err
145 | }
146 | return lnk, totalSize + sz, err
147 | }
148 |
--------------------------------------------------------------------------------
/data/builder/dirshard.go:
--------------------------------------------------------------------------------
1 | package builder
2 |
3 | import (
4 | "fmt"
5 | "hash"
6 |
7 | bitfield "github.com/ipfs/go-bitfield"
8 | "github.com/ipfs/go-unixfsnode/data"
9 | "github.com/ipfs/go-unixfsnode/hamt"
10 | dagpb "github.com/ipld/go-codec-dagpb"
11 | "github.com/ipld/go-ipld-prime"
12 | "github.com/multiformats/go-multihash"
13 | "github.com/spaolacci/murmur3"
14 | )
15 |
16 | type shard struct {
17 | // metadata about the shard
18 | hasher uint64
19 | size int
20 | sizeLg2 int
21 | width int
22 | depth int
23 |
24 | children map[int]entry
25 | }
26 |
27 | // a shard entry is either another shard, or a direct link.
28 | type entry struct {
29 | *shard
30 | *hamtLink
31 | }
32 |
33 | // a hamtLink is a member of the hamt - the file/directory pointed to, but
34 | // stored with it's hashed key used for addressing.
35 | type hamtLink struct {
36 | hash hashBits
37 | dagpb.PBLink
38 | }
39 |
40 | // BuildUnixFSShardedDirectory will build a hamt of unixfs hamt shards encoing a directory with more entries
41 | // than is typically allowed to fit in a standard IPFS single-block unixFS directory.
42 | func BuildUnixFSShardedDirectory(size int, hasher uint64, entries []dagpb.PBLink, ls *ipld.LinkSystem) (ipld.Link, uint64, error) {
43 | // hash the entries
44 | var h hash.Hash
45 | var err error
46 | // TODO: use the multihash registry once murmur3 behavior is encoded there.
47 | // https://github.com/multiformats/go-multihash/pull/150
48 | if hasher == hamt.HashMurmur3 {
49 | h = murmur3.New64()
50 | } else {
51 | h, err = multihash.GetHasher(hasher)
52 | if err != nil {
53 | return nil, 0, err
54 | }
55 | }
56 | hamtEntries := make([]hamtLink, 0, len(entries))
57 | for _, e := range entries {
58 | name := e.Name.Must().String()
59 | h.Reset()
60 | h.Write([]byte(name))
61 | sum := h.Sum(nil)
62 | hamtEntries = append(hamtEntries, hamtLink{
63 | sum,
64 | e,
65 | })
66 | }
67 |
68 | sizeLg2, err := logtwo(size)
69 | if err != nil {
70 | return nil, 0, err
71 | }
72 |
73 | sharder := shard{
74 | hasher: hasher,
75 | size: size,
76 | sizeLg2: sizeLg2,
77 | width: len(fmt.Sprintf("%X", size-1)),
78 | depth: 0,
79 |
80 | children: make(map[int]entry),
81 | }
82 |
83 | for _, entry := range hamtEntries {
84 | err := sharder.add(entry)
85 | if err != nil {
86 | return nil, 0, err
87 | }
88 | }
89 |
90 | return sharder.serialize(ls)
91 | }
92 |
93 | func (s *shard) add(lnk hamtLink) error {
94 | // get the bucket for lnk
95 | bucket, err := lnk.hash.Slice(s.depth*s.sizeLg2, s.sizeLg2)
96 | if err != nil {
97 | return err
98 | }
99 |
100 | current, ok := s.children[bucket]
101 | if !ok {
102 | // no bucket, make one with this entry
103 | s.children[bucket] = entry{nil, &lnk}
104 | return nil
105 | } else if current.shard != nil {
106 | // existing shard, add this link to the shard
107 | return current.shard.add(lnk)
108 | }
109 | // make a shard for current and lnk
110 | newShard := entry{
111 | &shard{
112 | hasher: s.hasher,
113 | size: s.size,
114 | sizeLg2: s.sizeLg2,
115 | width: s.width,
116 | depth: s.depth + 1,
117 | children: make(map[int]entry),
118 | },
119 | nil,
120 | }
121 | // add existing link from this bucket to the new shard
122 | if err := newShard.add(*current.hamtLink); err != nil {
123 | return err
124 | }
125 | // replace bucket with shard
126 | s.children[bucket] = newShard
127 | // add new link to the new shard
128 | return newShard.add(lnk)
129 | }
130 |
131 | func (s *shard) formatLinkName(name string, idx int) string {
132 | return fmt.Sprintf("%0*X%s", s.width, idx, name)
133 | }
134 |
135 | // bitmap calculates the bitmap of which links in the shard are set.
136 | func (s *shard) bitmap() ([]byte, error) {
137 | bm, err := bitfield.NewBitfield(s.size)
138 | if err != nil {
139 | return nil, err
140 | }
141 | for i := 0; i < s.size; i++ {
142 | if _, ok := s.children[i]; ok {
143 | bm.SetBit(i)
144 | }
145 | }
146 | return bm.Bytes(), nil
147 | }
148 |
149 | // serialize stores the concrete representation of this shard in the link system and
150 | // returns a link to it.
151 | func (s *shard) serialize(ls *ipld.LinkSystem) (ipld.Link, uint64, error) {
152 | bm, err := s.bitmap()
153 | if err != nil {
154 | return nil, 0, err
155 | }
156 | ufd, err := BuildUnixFS(func(b *Builder) {
157 | DataType(b, data.Data_HAMTShard)
158 | HashType(b, s.hasher)
159 | Data(b, bm)
160 | Fanout(b, uint64(s.size))
161 | })
162 | if err != nil {
163 | return nil, 0, err
164 | }
165 | pbb := dagpb.Type.PBNode.NewBuilder()
166 | pbm, err := pbb.BeginMap(2)
167 | if err != nil {
168 | return nil, 0, err
169 | }
170 | if err = pbm.AssembleKey().AssignString("Data"); err != nil {
171 | return nil, 0, err
172 | }
173 | if err = pbm.AssembleValue().AssignBytes(data.EncodeUnixFSData(ufd)); err != nil {
174 | return nil, 0, err
175 | }
176 | if err = pbm.AssembleKey().AssignString("Links"); err != nil {
177 | return nil, 0, err
178 | }
179 |
180 | lnkBuilder := dagpb.Type.PBLinks.NewBuilder()
181 | lnks, err := lnkBuilder.BeginList(int64(len(s.children)))
182 | if err != nil {
183 | return nil, 0, err
184 | }
185 | // sorting happens in codec-dagpb
186 | var totalSize uint64
187 | for idx, e := range s.children {
188 | var lnk dagpb.PBLink
189 | if e.shard != nil {
190 | ipldLnk, sz, err := e.shard.serialize(ls)
191 | if err != nil {
192 | return nil, 0, err
193 | }
194 | totalSize += sz
195 | fullName := s.formatLinkName("", idx)
196 | lnk, err = BuildUnixFSDirectoryEntry(fullName, int64(sz), ipldLnk)
197 | if err != nil {
198 | return nil, 0, err
199 | }
200 | } else {
201 | fullName := s.formatLinkName(e.Name.Must().String(), idx)
202 | sz := e.Tsize.Must().Int()
203 | totalSize += uint64(sz)
204 | lnk, err = BuildUnixFSDirectoryEntry(fullName, sz, e.Hash.Link())
205 | }
206 | if err != nil {
207 | return nil, 0, err
208 | }
209 | if err := lnks.AssembleValue().AssignNode(lnk); err != nil {
210 | return nil, 0, err
211 | }
212 | }
213 | if err := lnks.Finish(); err != nil {
214 | return nil, 0, err
215 | }
216 | pbm.AssembleValue().AssignNode(lnkBuilder.Build())
217 | if err := pbm.Finish(); err != nil {
218 | return nil, 0, err
219 | }
220 | node := pbb.Build()
221 | lnk, sz, err := sizedStore(ls, fileLinkProto, node)
222 | if err != nil {
223 | return nil, 0, err
224 | }
225 | return lnk, totalSize + sz, nil
226 | }
227 |
--------------------------------------------------------------------------------
/data/builder/file.go:
--------------------------------------------------------------------------------
1 | package builder
2 |
3 | import (
4 | "fmt"
5 | "io"
6 |
7 | chunk "github.com/ipfs/boxo/chunker"
8 | "github.com/ipfs/go-cid"
9 | "github.com/ipfs/go-unixfsnode/data"
10 | dagpb "github.com/ipld/go-codec-dagpb"
11 | "github.com/ipld/go-ipld-prime"
12 | "github.com/ipld/go-ipld-prime/datamodel"
13 | cidlink "github.com/ipld/go-ipld-prime/linking/cid"
14 | basicnode "github.com/ipld/go-ipld-prime/node/basic"
15 | "github.com/multiformats/go-multicodec"
16 | multihash "github.com/multiformats/go-multihash/core"
17 |
18 | // raw needed for opening as bytes
19 | _ "github.com/ipld/go-ipld-prime/codec/raw"
20 | )
21 |
22 | type fileShardMeta struct {
23 | link datamodel.Link
24 | byteSize uint64
25 | storedSize uint64
26 | }
27 |
28 | type fileShards []fileShardMeta
29 |
30 | func (fs fileShards) totalByteSize() uint64 {
31 | var total uint64
32 | for _, f := range fs {
33 | total += f.byteSize
34 | }
35 | return total
36 | }
37 |
38 | func (fs fileShards) totalStoredSize() uint64 {
39 | var total uint64
40 | for _, f := range fs {
41 | total += f.storedSize
42 | }
43 | return total
44 | }
45 |
46 | func (fs fileShards) byteSizes() []uint64 {
47 | sizes := make([]uint64, len(fs))
48 | for i, f := range fs {
49 | sizes[i] = f.byteSize
50 | }
51 | return sizes
52 | }
53 |
54 | // BuildUnixFSFile creates a dag of ipld Nodes representing file data.
55 | // This recreates the functionality previously found in
56 | // github.com/ipfs/go-unixfs/importer/balanced, but tailored to the
57 | // go-unixfsnode & ipld-prime data layout of nodes.
58 | // We make some assumptions in building files with this builder to reduce
59 | // complexity, namely:
60 | // - we assume we are using CIDv1, which has implied that the leaf
61 | // data nodes are stored as raw bytes.
62 | // ref: https://github.com/ipfs/go-mfs/blob/1b1fd06cff048caabeddb02d4dbf22d2274c7971/file.go#L50
63 | func BuildUnixFSFile(r io.Reader, chunker string, ls *ipld.LinkSystem) (ipld.Link, uint64, error) {
64 | src, err := chunk.FromString(r, chunker)
65 | if err != nil {
66 | return nil, 0, err
67 | }
68 |
69 | var prev fileShards
70 | depth := 1
71 | for {
72 | next, err := fileTreeRecursive(depth, prev, src, ls)
73 | if err != nil {
74 | return nil, 0, err
75 | }
76 |
77 | if prev != nil && prev[0].link == next.link {
78 | if next.link == nil {
79 | node := basicnode.NewBytes([]byte{})
80 | link, err := ls.Store(ipld.LinkContext{}, leafLinkProto, node)
81 | return link, 0, err
82 | }
83 | return next.link, next.storedSize, nil
84 | }
85 |
86 | prev = []fileShardMeta{next}
87 | depth++
88 | }
89 | }
90 |
91 | var fileLinkProto = cidlink.LinkPrototype{
92 | Prefix: cid.Prefix{
93 | Version: 1,
94 | Codec: uint64(multicodec.DagPb),
95 | MhType: multihash.SHA2_256,
96 | MhLength: 32,
97 | },
98 | }
99 |
100 | var leafLinkProto = cidlink.LinkPrototype{
101 | Prefix: cid.Prefix{
102 | Version: 1,
103 | Codec: uint64(multicodec.Raw),
104 | MhType: multihash.SHA2_256,
105 | MhLength: 32,
106 | },
107 | }
108 |
109 | // fileTreeRecursive packs a file into chunks recursively, returning a root for
110 | // this level of recursion, the number of file bytes consumed for this level of
111 | // recursion and and the number of bytes used to store this level of recursion.
112 | func fileTreeRecursive(
113 | depth int,
114 | children fileShards,
115 | src chunk.Splitter,
116 | ls *ipld.LinkSystem,
117 | ) (fileShardMeta, error) {
118 | if depth == 1 {
119 | // file leaf, next chunk, encode as raw bytes, store and retuen
120 | if len(children) > 0 {
121 | return fileShardMeta{}, fmt.Errorf("leaf nodes cannot have children")
122 | }
123 | leaf, err := src.NextBytes()
124 | if err != nil {
125 | if err == io.EOF {
126 | return fileShardMeta{}, nil
127 | }
128 | return fileShardMeta{}, err
129 | }
130 | node := basicnode.NewBytes(leaf)
131 | l, sz, err := sizedStore(ls, leafLinkProto, node)
132 | if err != nil {
133 | return fileShardMeta{}, err
134 | }
135 | return fileShardMeta{link: l, byteSize: uint64(len(leaf)), storedSize: sz}, nil
136 | }
137 |
138 | // depth > 1
139 |
140 | if children == nil {
141 | children = make(fileShards, 0)
142 | }
143 |
144 | // fill up the links for this level, if we need to go beyond
145 | // DefaultLinksPerBlock we'll end up back here making a parallel tree
146 | for len(children) < DefaultLinksPerBlock {
147 | // descend down toward the leaves
148 | next, err := fileTreeRecursive(depth-1, nil, src, ls)
149 | if err != nil {
150 | return fileShardMeta{}, err
151 | } else if next.link == nil { // eof
152 | break
153 | }
154 | children = append(children, next)
155 | }
156 |
157 | if len(children) == 0 {
158 | // empty case
159 | return fileShardMeta{}, nil
160 | } else if len(children) == 1 {
161 | // degenerate case
162 | return children[0], nil
163 | }
164 |
165 | // make the unixfs node
166 | node, err := BuildUnixFS(func(b *Builder) {
167 | FileSize(b, children.totalByteSize())
168 | BlockSizes(b, children.byteSizes())
169 | })
170 | if err != nil {
171 | return fileShardMeta{}, err
172 | }
173 | pbn, err := packFileChildren(node, children)
174 | if err != nil {
175 | return fileShardMeta{}, err
176 | }
177 |
178 | link, sz, err := sizedStore(ls, fileLinkProto, pbn)
179 | if err != nil {
180 | return fileShardMeta{}, err
181 | }
182 | return fileShardMeta{
183 | link: link,
184 | byteSize: children.totalByteSize(),
185 | storedSize: children.totalStoredSize() + sz,
186 | }, nil
187 | }
188 |
189 | func packFileChildren(node data.UnixFSData, children fileShards) (datamodel.Node, error) {
190 | dpbb := dagpb.Type.PBNode.NewBuilder()
191 | pbm, err := dpbb.BeginMap(2)
192 | if err != nil {
193 | return nil, err
194 | }
195 | pblb, err := pbm.AssembleEntry("Links")
196 | if err != nil {
197 | return nil, err
198 | }
199 | pbl, err := pblb.BeginList(int64(len(children)))
200 | if err != nil {
201 | return nil, err
202 | }
203 | for _, c := range children {
204 | pbln, err := BuildUnixFSDirectoryEntry("", int64(c.storedSize), c.link)
205 | if err != nil {
206 | return nil, err
207 | }
208 | if err = pbl.AssembleValue().AssignNode(pbln); err != nil {
209 | return nil, err
210 | }
211 | }
212 | if err = pbl.Finish(); err != nil {
213 | return nil, err
214 | }
215 | if err = pbm.AssembleKey().AssignString("Data"); err != nil {
216 | return nil, err
217 | }
218 | if err = pbm.AssembleValue().AssignBytes(data.EncodeUnixFSData(node)); err != nil {
219 | return nil, err
220 | }
221 | if err = pbm.Finish(); err != nil {
222 | return nil, err
223 | }
224 | return dpbb.Build(), nil
225 | }
226 |
227 | // BuildUnixFSDirectoryEntry creates the link to a file or directory as it appears within a unixfs directory.
228 | func BuildUnixFSDirectoryEntry(name string, size int64, hash ipld.Link) (dagpb.PBLink, error) {
229 | dpbl := dagpb.Type.PBLink.NewBuilder()
230 | lma, err := dpbl.BeginMap(3)
231 | if err != nil {
232 | return nil, err
233 | }
234 | if err = lma.AssembleKey().AssignString("Hash"); err != nil {
235 | return nil, err
236 | }
237 | if err = lma.AssembleValue().AssignLink(hash); err != nil {
238 | return nil, err
239 | }
240 | if err = lma.AssembleKey().AssignString("Name"); err != nil {
241 | return nil, err
242 | }
243 | if err = lma.AssembleValue().AssignString(name); err != nil {
244 | return nil, err
245 | }
246 | if err = lma.AssembleKey().AssignString("Tsize"); err != nil {
247 | return nil, err
248 | }
249 | if err = lma.AssembleValue().AssignInt(size); err != nil {
250 | return nil, err
251 | }
252 | if err = lma.Finish(); err != nil {
253 | return nil, err
254 | }
255 | return dpbl.Build().(dagpb.PBLink), nil
256 | }
257 |
258 | // BuildUnixFSSymlink builds a symlink entry in a unixfs tree
259 | func BuildUnixFSSymlink(content string, ls *ipld.LinkSystem) (ipld.Link, uint64, error) {
260 | // make the unixfs node.
261 | node, err := BuildUnixFS(func(b *Builder) {
262 | DataType(b, data.Data_Symlink)
263 | Data(b, []byte(content))
264 | })
265 | if err != nil {
266 | return nil, 0, err
267 | }
268 |
269 | dpbb := dagpb.Type.PBNode.NewBuilder()
270 | pbm, err := dpbb.BeginMap(2)
271 | if err != nil {
272 | return nil, 0, err
273 | }
274 | pblb, err := pbm.AssembleEntry("Links")
275 | if err != nil {
276 | return nil, 0, err
277 | }
278 | pbl, err := pblb.BeginList(0)
279 | if err != nil {
280 | return nil, 0, err
281 | }
282 | if err = pbl.Finish(); err != nil {
283 | return nil, 0, err
284 | }
285 | if err = pbm.AssembleKey().AssignString("Data"); err != nil {
286 | return nil, 0, err
287 | }
288 | if err = pbm.AssembleValue().AssignBytes(data.EncodeUnixFSData(node)); err != nil {
289 | return nil, 0, err
290 | }
291 | if err = pbm.Finish(); err != nil {
292 | return nil, 0, err
293 | }
294 | pbn := dpbb.Build()
295 |
296 | return sizedStore(ls, fileLinkProto, pbn)
297 | }
298 |
299 | // Constants below are from
300 | // https://github.com/ipfs/go-unixfs/blob/ec6bb5a4c5efdc3a5bce99151b294f663ee9c08d/importer/helpers/helpers.go
301 |
302 | // BlockSizeLimit specifies the maximum size an imported block can have.
303 | var BlockSizeLimit = 1048576 // 1 MB
304 |
305 | // rough estimates on expected sizes
306 | var roughLinkBlockSize = 1 << 13 // 8KB
307 | var roughLinkSize = 34 + 8 + 5 // sha256 multihash + size + no name + protobuf framing
308 |
309 | // DefaultLinksPerBlock governs how the importer decides how many links there
310 | // will be per block. This calculation is based on expected distributions of:
311 | // - the expected distribution of block sizes
312 | // - the expected distribution of link sizes
313 | // - desired access speed
314 | //
315 | // For now, we use:
316 | //
317 | // var roughLinkBlockSize = 1 << 13 // 8KB
318 | // var roughLinkSize = 34 + 8 + 5 // sha256 multihash + size + no name
319 | // // + protobuf framing
320 | // var DefaultLinksPerBlock = (roughLinkBlockSize / roughLinkSize)
321 | // = ( 8192 / 47 )
322 | // = (approximately) 174
323 | var DefaultLinksPerBlock = roughLinkBlockSize / roughLinkSize
324 |
--------------------------------------------------------------------------------
/data/builder/file_test.go:
--------------------------------------------------------------------------------
1 | package builder
2 |
3 | import (
4 | "bytes"
5 | "context"
6 | "strconv"
7 | "testing"
8 |
9 | "github.com/ipfs/go-cid"
10 | "github.com/ipfs/go-test/random"
11 | "github.com/ipfs/go-unixfsnode/file"
12 | dagpb "github.com/ipld/go-codec-dagpb"
13 | "github.com/ipld/go-ipld-prime"
14 | cidlink "github.com/ipld/go-ipld-prime/linking/cid"
15 | "github.com/stretchr/testify/require"
16 | )
17 |
18 | // referenceTestCases using older IPFS libraries, both bare forms of files sharded across raw leaves
19 | // with CIDv1 and the same but wrapped in a directory with the name of the number of bytes.
20 | var referenceTestCases = []struct {
21 | size int
22 | bareExpected cid.Cid
23 | wrappedExpected cid.Cid
24 | }{
25 | {
26 | size: 1024,
27 | bareExpected: cid.MustParse("bafkreigwqvgm5f6vgdv7wjkttdhgnkpbazhvuzvrqzaje4scb4moeinjum"),
28 | wrappedExpected: cid.MustParse("bafybeib7rloaw4vl56brrnsetobsopu23e5ezoqxq4zorxxtljoeafcpca"),
29 | },
30 | {
31 | size: 10 * 1024,
32 | bareExpected: cid.MustParse("bafkreihaxm6boumj2cwzbs3t3mnktfsgcf25ratcvtcf5kqnsymgk2gxqy"),
33 | wrappedExpected: cid.MustParse("bafybeieogamws33kfbtpk5mdhoo2wkxwmd7dwnduyvo7wo65ll75d36xgi"),
34 | },
35 | {
36 | size: 100 * 1024,
37 | bareExpected: cid.MustParse("bafkreia7ockt35s5ki5qzrm37bp57woott6bju6gw64wl7rus7xwjcoemq"),
38 | wrappedExpected: cid.MustParse("bafybeicywdnaqrwj3t7xltqgtaoi3ebk6fi2oyam6gsqle3bl4piucpzua"),
39 | },
40 | {
41 | size: 10 * 1024 * 1024,
42 | // https://github.com/ipfs/go-unixfs/blob/a7243ebfc36eaa89d79a39d3cef3fa1e60f7e49e/importer/importer_test.go#L49C1-L49C1
43 | // QmZN1qquw84zhV4j6vT56tCcmFxaDaySL1ezTXFvMdNmrK, but with --cid-version=1 all the way through the DAG
44 | bareExpected: cid.MustParse("bafybeibxlkafr6oqgflgjcjfbl5db6agozxdknpludvh7ym54oa5qoowbm"),
45 | wrappedExpected: cid.MustParse("bafybeigqbp6jog6fvxbpq4opzcgn5rsp7xqrk7xa4zbgnqo6htjmolt3iy"),
46 | },
47 | }
48 |
49 | func TestBuildUnixFSFile_Reference(t *testing.T) {
50 | for _, tc := range referenceTestCases {
51 | t.Run(strconv.Itoa(tc.size), func(t *testing.T) {
52 | buf := make([]byte, tc.size)
53 | random.NewSeededRand(0xdeadbeef).Read(buf)
54 | r := bytes.NewReader(buf)
55 |
56 | ls := cidlink.DefaultLinkSystem()
57 | storage := cidlink.Memory{}
58 | ls.StorageReadOpener = storage.OpenRead
59 | ls.StorageWriteOpener = storage.OpenWrite
60 |
61 | f, sz, err := BuildUnixFSFile(r, "", &ls)
62 | require.NoError(t, err)
63 | require.Equal(t, tc.bareExpected.String(), f.(cidlink.Link).Cid.String())
64 |
65 | // check sz is the stored size of all blocks in the generated DAG
66 | var totStored int
67 | for _, blk := range storage.Bag {
68 | totStored += len(blk)
69 | }
70 | require.Equal(t, totStored, int(sz))
71 | })
72 | }
73 | }
74 |
75 | func TestUnixFSFileRoundtrip(t *testing.T) {
76 | buf := make([]byte, 10*1024*1024)
77 | random.NewSeededRand(0xdeadbeef).Read(buf)
78 | r := bytes.NewReader(buf)
79 |
80 | ls := cidlink.DefaultLinkSystem()
81 | storage := cidlink.Memory{}
82 | ls.StorageReadOpener = storage.OpenRead
83 | ls.StorageWriteOpener = storage.OpenWrite
84 |
85 | f, _, err := BuildUnixFSFile(r, "", &ls)
86 | if err != nil {
87 | t.Fatal(err)
88 | }
89 |
90 | // get back the root node substrate from the link at the top of the builder.
91 | fr, err := ls.Load(ipld.LinkContext{}, f, dagpb.Type.PBNode)
92 | if err != nil {
93 | t.Fatal(err)
94 | }
95 |
96 | ufn, err := file.NewUnixFSFile(context.Background(), fr, &ls)
97 | if err != nil {
98 | t.Fatal(err)
99 | }
100 | // read back out the file.
101 | out, err := ufn.AsBytes()
102 | if err != nil {
103 | t.Fatal(err)
104 | }
105 | if !bytes.Equal(out, buf) {
106 | t.Fatal("Not equal")
107 | }
108 | }
109 |
--------------------------------------------------------------------------------
/data/builder/quick/quick.go:
--------------------------------------------------------------------------------
1 | // Package quickbuilder is designed as a replacement for the existing ipfs-files
2 | // constructor for a simple way to generate synthetic directory trees.
3 | package quickbuilder
4 |
5 | import (
6 | "bytes"
7 |
8 | "github.com/ipfs/go-unixfsnode/data/builder"
9 | dagpb "github.com/ipld/go-codec-dagpb"
10 | "github.com/ipld/go-ipld-prime"
11 | )
12 |
13 | // A Node represents the most basic form of a file or directory
14 | type Node interface {
15 | Size() (int64, error)
16 | Link() ipld.Link
17 | }
18 |
19 | type lnkNode struct {
20 | link ipld.Link
21 | size int64
22 | ls *ipld.LinkSystem
23 | }
24 |
25 | func (ln *lnkNode) Size() (int64, error) {
26 | return ln.size, nil
27 | }
28 |
29 | func (ln *lnkNode) Link() ipld.Link {
30 | return ln.link
31 | }
32 |
33 | // Builder provides the linksystem context for saving files & directories
34 | type Builder struct {
35 | ls *ipld.LinkSystem
36 | }
37 |
38 | // NewMapDirectory creates a unixfs directory from a list of named entries
39 | func (b *Builder) NewMapDirectory(entries map[string]Node) Node {
40 | lnks := make([]dagpb.PBLink, 0, len(entries))
41 | for name, e := range entries {
42 | sz, _ := e.Size()
43 | entry, err := builder.BuildUnixFSDirectoryEntry(name, sz, e.Link())
44 | if err != nil {
45 | return nil
46 | }
47 | lnks = append(lnks, entry)
48 | }
49 | n, size, err := builder.BuildUnixFSDirectory(lnks, b.ls)
50 | if err != nil {
51 | panic(err)
52 | }
53 | return &lnkNode{
54 | n,
55 | int64(size),
56 | b.ls,
57 | }
58 | }
59 |
60 | // NewBytesFile creates a unixfs file from byte contents
61 | func (b *Builder) NewBytesFile(data []byte) Node {
62 | n, size, err := builder.BuildUnixFSFile(bytes.NewReader(data), "", b.ls)
63 | if err != nil {
64 | panic(err)
65 | }
66 | return &lnkNode{
67 | n,
68 | int64(size),
69 | b.ls,
70 | }
71 | }
72 |
73 | // Store provides a builder context for making unixfs files and directories
74 | func Store(ls *ipld.LinkSystem, cb func(b *Builder) error) error {
75 | b := Builder{ls}
76 | return cb(&b)
77 | }
78 |
--------------------------------------------------------------------------------
/data/builder/quick/quick_test.go:
--------------------------------------------------------------------------------
1 | package quickbuilder_test
2 |
3 | import (
4 | "testing"
5 |
6 | quickbuilder "github.com/ipfs/go-unixfsnode/data/builder/quick"
7 | cidlink "github.com/ipld/go-ipld-prime/linking/cid"
8 | "github.com/ipld/go-ipld-prime/storage/memstore"
9 | )
10 |
11 | func TestQuickBuilder(t *testing.T) {
12 | ls := cidlink.DefaultLinkSystem()
13 | store := memstore.Store{Bag: make(map[string][]byte)}
14 | ls.SetReadStorage(&store)
15 | ls.SetWriteStorage(&store)
16 | err := quickbuilder.Store(&ls, func(b *quickbuilder.Builder) error {
17 | b.NewMapDirectory(map[string]quickbuilder.Node{
18 | "file.txt": b.NewBytesFile([]byte("1")),
19 | "foo? #<'": b.NewMapDirectory(map[string]quickbuilder.Node{
20 | "file.txt": b.NewBytesFile([]byte("2")),
21 | "bar": b.NewMapDirectory(map[string]quickbuilder.Node{
22 | "file.txt": b.NewBytesFile([]byte("3")),
23 | }),
24 | }),
25 | })
26 | return nil
27 | })
28 | if err != nil {
29 | t.Fatal(err)
30 | }
31 |
32 | if len(store.Bag) != 6 {
33 | t.Fatal("unexpected number of stored nodes")
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/data/builder/util.go:
--------------------------------------------------------------------------------
1 | package builder
2 |
3 | import (
4 | "fmt"
5 | "io"
6 | "math/bits"
7 |
8 | "github.com/ipld/go-ipld-prime"
9 | "github.com/ipld/go-ipld-prime/codec"
10 | "github.com/ipld/go-ipld-prime/datamodel"
11 | )
12 |
13 | // Common code from go-unixfs/hamt/util.go
14 |
15 | // hashBits is a helper for pulling out sections of a hash
16 | type hashBits []byte
17 |
18 | func mkmask(n int) byte {
19 | return (1 << uint(n)) - 1
20 | }
21 |
22 | // Slice returns the 'width' bits of the hashBits value as an integer, or an
23 | // error if there aren't enough bits.
24 | func (hb hashBits) Slice(offset, width int) (int, error) {
25 | if offset+width > len(hb)*8 {
26 | return 0, fmt.Errorf("sharded directory too deep")
27 | }
28 | return hb.slice(offset, width), nil
29 | }
30 |
31 | func (hb hashBits) slice(offset, width int) int {
32 | curbi := offset / 8
33 | leftb := 8 - (offset % 8)
34 |
35 | curb := hb[curbi]
36 | if width == leftb {
37 | out := int(mkmask(width) & curb)
38 | return out
39 | } else if width < leftb {
40 | a := curb & mkmask(leftb) // mask out the high bits we don't want
41 | b := a & ^mkmask(leftb-width) // mask out the low bits we don't want
42 | c := b >> uint(leftb-width) // shift whats left down
43 | return int(c)
44 | } else {
45 | out := int(mkmask(leftb) & curb)
46 | out <<= uint(width - leftb)
47 | out += hb.slice(offset+leftb, width-leftb)
48 | return out
49 | }
50 | }
51 |
52 | func logtwo(v int) (int, error) {
53 | if v <= 0 {
54 | return 0, fmt.Errorf("hamt size should be a power of two")
55 | }
56 | lg2 := bits.TrailingZeros(uint(v))
57 | if 1< math.MaxUint32 {
142 | return errors.New("mode should be a 32 bit value")
143 | }
144 | remaining = remaining[n:]
145 | qp.MapEntry(ma, Field__Mode, qp.Int(int64(mode)))
146 | case Data_MtimeWireNum:
147 | if wireType != protowire.BytesType {
148 | return ErrWrongWireType{"UnixFSData", Field__Mtime, protowire.BytesType, wireType}
149 | }
150 | mTimeBytes, n := protowire.ConsumeBytes(remaining)
151 | if n < 0 {
152 | return protowire.ParseError(n)
153 | }
154 | remaining = remaining[n:]
155 | qp.MapEntry(ma, Field__Mtime, qp.Map(-1, func(ma ipld.MapAssembler) {
156 | err := consumeUnixTime(mTimeBytes, ma)
157 | if err != nil {
158 | panic(err)
159 | }
160 | }))
161 | default:
162 | n := protowire.ConsumeFieldValue(fieldNum, wireType, remaining)
163 | if n < 0 {
164 | return protowire.ParseError(n)
165 | }
166 | remaining = remaining[n:]
167 | }
168 | }
169 | if !packedBlockSizes {
170 | if la == nil {
171 | qp.MapEntry(ma, Field__BlockSizes, qp.List(0, func(ipld.ListAssembler) {}))
172 | } else {
173 | err := la.Finish()
174 | if err != nil {
175 | return err
176 | }
177 | nd := bsa.Build()
178 | qp.MapEntry(ma, Field__BlockSizes, qp.Node(nd))
179 | }
180 | }
181 | return nil
182 | }
183 |
184 | func consumeBlockSizes(remaining []byte, count int64, la ipld.ListAssembler) error {
185 | for i := 0; i < int(count); i++ {
186 | blockSize, n := protowire.ConsumeVarint(remaining)
187 | if n < 0 {
188 | return protowire.ParseError(n)
189 | }
190 | remaining = remaining[n:]
191 | qp.ListEntry(la, qp.Int(int64(blockSize)))
192 | }
193 | if len(remaining) > 0 {
194 | return errors.New("did not consume all block sizes")
195 | }
196 | return nil
197 | }
198 |
199 | func consumeUnixTime(remaining []byte, ma ipld.MapAssembler) error {
200 | for len(remaining) != 0 {
201 | fieldNum, wireType, n := protowire.ConsumeTag(remaining)
202 | if n < 0 {
203 | return protowire.ParseError(n)
204 | }
205 | remaining = remaining[n:]
206 |
207 | switch fieldNum {
208 | case UnixTime_SecondsWireNum:
209 | if wireType != protowire.VarintType {
210 | return ErrWrongWireType{"UnixTime", Field__Seconds, protowire.VarintType, wireType}
211 | }
212 | seconds, n := protowire.ConsumeVarint(remaining)
213 | if n < 0 {
214 | return protowire.ParseError(n)
215 | }
216 | remaining = remaining[n:]
217 | qp.MapEntry(ma, Field__Seconds, qp.Int(int64(seconds)))
218 | case UnixTime_FractionalNanosecondsWireNum:
219 | if wireType != protowire.Fixed32Type {
220 | return ErrWrongWireType{"UnixTime", Field__Nanoseconds, protowire.Fixed32Type, wireType}
221 | }
222 | fractionalNanoseconds, n := protowire.ConsumeFixed32(remaining)
223 | if n < 0 {
224 | return protowire.ParseError(n)
225 | }
226 | remaining = remaining[n:]
227 | qp.MapEntry(ma, Field__Nanoseconds, qp.Int(int64(fractionalNanoseconds)))
228 | default:
229 | n := protowire.ConsumeFieldValue(fieldNum, wireType, remaining)
230 | if n < 0 {
231 | return protowire.ParseError(n)
232 | }
233 | remaining = remaining[n:]
234 | }
235 | }
236 | return nil
237 | }
238 | func DecodeUnixTime(src []byte) (UnixTime, error) {
239 | nd, err := qp.BuildMap(Type.UnixTime, -1, func(ma ipld.MapAssembler) {
240 | err := consumeUnixTime(src, ma)
241 | if err != nil {
242 | panic(err)
243 | }
244 | })
245 | if err != nil {
246 | return nil, err
247 | }
248 | return nd.(UnixTime), err
249 | }
250 |
251 | func DecodeUnixFSMetadata(src []byte) (UnixFSMetadata, error) {
252 | nd, err := qp.BuildMap(Type.UnixFSMetadata, -1, func(ma ipld.MapAssembler) {
253 | err := consumeUnixFSMetadata(src, ma)
254 | if err != nil {
255 | panic(err)
256 | }
257 | })
258 | if err != nil {
259 | return nil, err
260 | }
261 | return nd.(UnixFSMetadata), nil
262 | }
263 |
264 | func consumeUnixFSMetadata(remaining []byte, ma ipld.MapAssembler) error {
265 | for len(remaining) != 0 {
266 |
267 | fieldNum, wireType, n := protowire.ConsumeTag(remaining)
268 | if n < 0 {
269 | return protowire.ParseError(n)
270 | }
271 | remaining = remaining[n:]
272 |
273 | switch fieldNum {
274 | case Metadata_MimeTypeWireNum:
275 | if wireType != protowire.BytesType {
276 | return ErrWrongWireType{"UnixFSMetadata", Field__MimeType, protowire.VarintType, wireType}
277 | }
278 | mimeTypeBytes, n := protowire.ConsumeBytes(remaining)
279 | if n < 0 {
280 | return protowire.ParseError(n)
281 | }
282 | remaining = remaining[n:]
283 | qp.MapEntry(ma, Field__MimeType, qp.String(string(mimeTypeBytes)))
284 | default:
285 | n := protowire.ConsumeFieldValue(fieldNum, wireType, remaining)
286 | if n < 0 {
287 | return protowire.ParseError(n)
288 | }
289 | remaining = remaining[n:]
290 | }
291 | }
292 | return nil
293 | }
294 |
--------------------------------------------------------------------------------
/data/wirenumbers.go:
--------------------------------------------------------------------------------
1 | package data
2 |
3 | import "google.golang.org/protobuf/encoding/protowire"
4 |
5 | const (
6 | Data_DataTypeWireNum protowire.Number = 1
7 | Data_DataWireNum protowire.Number = 2
8 | Data_FileSizeWireNum protowire.Number = 3
9 | Data_BlockSizesWireNum protowire.Number = 4
10 | Data_HashTypeWireNum protowire.Number = 5
11 | Data_FanoutWireNum protowire.Number = 6
12 | Data_ModeWireNum protowire.Number = 7
13 | Data_MtimeWireNum protowire.Number = 8
14 | UnixTime_SecondsWireNum protowire.Number = 1
15 | UnixTime_FractionalNanosecondsWireNum protowire.Number = 2
16 | Metadata_MimeTypeWireNum protowire.Number = 1
17 | )
18 |
--------------------------------------------------------------------------------
/directory/basicdir.go:
--------------------------------------------------------------------------------
1 | package directory
2 |
3 | import (
4 | "context"
5 |
6 | "github.com/ipfs/go-unixfsnode/data"
7 | "github.com/ipfs/go-unixfsnode/iter"
8 | "github.com/ipfs/go-unixfsnode/utils"
9 | dagpb "github.com/ipld/go-codec-dagpb"
10 | "github.com/ipld/go-ipld-prime"
11 | "github.com/ipld/go-ipld-prime/schema"
12 | )
13 |
14 | var _ ipld.Node = UnixFSBasicDir(nil)
15 | var _ schema.TypedNode = UnixFSBasicDir(nil)
16 | var _ ipld.ADL = UnixFSBasicDir(nil)
17 |
18 | type UnixFSBasicDir = *_UnixFSBasicDir
19 |
20 | type _UnixFSBasicDir struct {
21 | _substrate dagpb.PBNode
22 | }
23 |
24 | func NewUnixFSBasicDir(ctx context.Context, substrate dagpb.PBNode, nddata data.UnixFSData, _ *ipld.LinkSystem) (ipld.Node, error) {
25 | if nddata.FieldDataType().Int() != data.Data_Directory {
26 | return nil, data.ErrWrongNodeType{Expected: data.Data_Directory, Actual: nddata.FieldDataType().Int()}
27 | }
28 | return &_UnixFSBasicDir{_substrate: substrate}, nil
29 | }
30 |
31 | func (n UnixFSBasicDir) Kind() ipld.Kind {
32 | return n._substrate.Kind()
33 | }
34 |
35 | // LookupByString looks for the key in the list of links with a matching name
36 | func (n UnixFSBasicDir) LookupByString(key string) (ipld.Node, error) {
37 | links := n._substrate.FieldLinks()
38 | link := utils.Lookup(links, key)
39 | if link == nil {
40 | return nil, schema.ErrNoSuchField{Type: nil /*TODO*/, Field: ipld.PathSegmentOfString(key)}
41 | }
42 | return link, nil
43 | }
44 |
45 | func (n UnixFSBasicDir) LookupByNode(key ipld.Node) (ipld.Node, error) {
46 | ks, err := key.AsString()
47 | if err != nil {
48 | return nil, err
49 | }
50 | return n.LookupByString(ks)
51 | }
52 |
53 | func (n UnixFSBasicDir) LookupByIndex(idx int64) (ipld.Node, error) {
54 | return n._substrate.LookupByIndex(idx)
55 | }
56 |
57 | func (n UnixFSBasicDir) LookupBySegment(seg ipld.PathSegment) (ipld.Node, error) {
58 | return n.LookupByString(seg.String())
59 | }
60 |
61 | func (n UnixFSBasicDir) MapIterator() ipld.MapIterator {
62 | return iter.NewUnixFSDirMapIterator(&_UnixFSBasicDir__ListItr{n._substrate.Links.Iterator()}, nil)
63 | }
64 |
65 | // ListIterator returns an iterator which yields key-value pairs
66 | // traversing the node.
67 | // If the node kind is anything other than a list, nil will be returned.
68 | //
69 | // The iterator will yield every entry in the list; that is, it
70 | // can be expected that itr.Next will be called node.Length times
71 | // before itr.Done becomes true.
72 | func (n UnixFSBasicDir) ListIterator() ipld.ListIterator {
73 | return nil
74 | }
75 |
76 | // Length returns the length of a list, or the number of entries in a map,
77 | // or -1 if the node is not of list nor map kind.
78 | func (n UnixFSBasicDir) Length() int64 {
79 | return n._substrate.FieldLinks().Length()
80 | }
81 |
82 | func (n UnixFSBasicDir) IsAbsent() bool {
83 | return false
84 | }
85 |
86 | func (n UnixFSBasicDir) IsNull() bool {
87 | return false
88 | }
89 |
90 | func (n UnixFSBasicDir) AsBool() (bool, error) {
91 | return n._substrate.AsBool()
92 | }
93 |
94 | func (n UnixFSBasicDir) AsInt() (int64, error) {
95 | return n._substrate.AsInt()
96 | }
97 |
98 | func (n UnixFSBasicDir) AsFloat() (float64, error) {
99 | return n._substrate.AsFloat()
100 | }
101 |
102 | func (n UnixFSBasicDir) AsString() (string, error) {
103 | return n._substrate.AsString()
104 | }
105 |
106 | func (n UnixFSBasicDir) AsBytes() ([]byte, error) {
107 | return n._substrate.AsBytes()
108 | }
109 |
110 | func (n UnixFSBasicDir) AsLink() (ipld.Link, error) {
111 | return n._substrate.AsLink()
112 | }
113 |
114 | func (n UnixFSBasicDir) Prototype() ipld.NodePrototype {
115 | // TODO: should this return something?
116 | // probobly not until we write the write interfaces
117 | return nil
118 | }
119 |
120 | // satisfy schema.TypedNode
121 | func (UnixFSBasicDir) Type() schema.Type {
122 | return nil /*TODO:typelit*/
123 | }
124 |
125 | func (n UnixFSBasicDir) Representation() ipld.Node {
126 | return n._substrate.Representation()
127 | }
128 |
129 | // Native map accessors
130 |
131 | func (n UnixFSBasicDir) Iterator() *iter.UnixFSDir__Itr {
132 | return iter.NewUnixFSDirIterator(&_UnixFSBasicDir__ListItr{n._substrate.Links.Iterator()}, nil)
133 | }
134 |
135 | func (n UnixFSBasicDir) Lookup(key dagpb.String) dagpb.Link {
136 | return utils.Lookup(n._substrate.FieldLinks(), key.String())
137 | }
138 |
139 | // direct access to the links and data
140 |
141 | func (n UnixFSBasicDir) FieldLinks() dagpb.PBLinks {
142 | return n._substrate.FieldLinks()
143 | }
144 |
145 | func (n UnixFSBasicDir) FieldData() dagpb.MaybeBytes {
146 | return n._substrate.FieldData()
147 | }
148 |
149 | // Substrate returns the underlying PBNode -- note: only the substrate will encode successfully to protobuf if writing
150 | func (n UnixFSBasicDir) Substrate() ipld.Node {
151 | return n._substrate
152 | }
153 |
154 | type _UnixFSBasicDir__ListItr struct {
155 | _substrate *dagpb.PBLinks__Itr
156 | }
157 |
158 | func (itr *_UnixFSBasicDir__ListItr) Next() (int64, dagpb.PBLink, error) {
159 | idx, v := itr._substrate.Next()
160 | return idx, v, nil
161 | }
162 |
163 | func (itr *_UnixFSBasicDir__ListItr) Done() bool {
164 | return itr._substrate.Done()
165 | }
166 |
--------------------------------------------------------------------------------
/file/deferred.go:
--------------------------------------------------------------------------------
1 | package file
2 |
3 | import (
4 | "context"
5 | "io"
6 |
7 | dagpb "github.com/ipld/go-codec-dagpb"
8 | "github.com/ipld/go-ipld-prime"
9 | )
10 |
11 | func newDeferredFileNode(ctx context.Context, lsys *ipld.LinkSystem, root ipld.Link) LargeBytesNode {
12 | dfn := deferredFileNode{
13 | LargeBytesNode: nil,
14 | root: root,
15 | lsys: lsys,
16 | ctx: ctx,
17 | }
18 | dfn.LargeBytesNode = &deferred{&dfn}
19 | return &dfn
20 | }
21 |
22 | type deferredFileNode struct {
23 | LargeBytesNode
24 |
25 | root ipld.Link
26 | lsys *ipld.LinkSystem
27 | ctx context.Context
28 | }
29 |
30 | func (d *deferredFileNode) resolve() error {
31 | if d.lsys == nil {
32 | return nil
33 | }
34 | target, err := d.lsys.Load(ipld.LinkContext{Ctx: d.ctx}, d.root, protoFor(d.root))
35 | if err != nil {
36 | return err
37 | }
38 |
39 | asFSNode, err := NewUnixFSFile(d.ctx, target, d.lsys)
40 | if err != nil {
41 | return err
42 | }
43 | d.LargeBytesNode = asFSNode
44 | d.root = nil
45 | d.lsys = nil
46 | d.ctx = nil
47 | return nil
48 | }
49 |
50 | type deferred struct {
51 | *deferredFileNode
52 | }
53 |
54 | type deferredReader struct {
55 | io.ReadSeeker
56 | *deferredFileNode
57 | }
58 |
59 | func (d *deferred) AsLargeBytes() (io.ReadSeeker, error) {
60 | return &deferredReader{nil, d.deferredFileNode}, nil
61 | }
62 |
63 | func (d *deferredReader) Read(p []byte) (int, error) {
64 | if d.ReadSeeker == nil {
65 | if err := d.deferredFileNode.resolve(); err != nil {
66 | return 0, err
67 | }
68 | rs, err := d.deferredFileNode.AsLargeBytes()
69 | if err != nil {
70 | return 0, err
71 | }
72 | d.ReadSeeker = rs
73 | }
74 | return d.ReadSeeker.Read(p)
75 | }
76 |
77 | func (d *deferredReader) Seek(offset int64, whence int) (int64, error) {
78 | if d.ReadSeeker == nil {
79 | if err := d.deferredFileNode.resolve(); err != nil {
80 | return 0, err
81 | }
82 | rs, err := d.deferredFileNode.AsLargeBytes()
83 | if err != nil {
84 | return 0, err
85 | }
86 | d.ReadSeeker = rs
87 | }
88 | return d.ReadSeeker.Seek(offset, whence)
89 | }
90 |
91 | func (d *deferred) Kind() ipld.Kind {
92 | return ipld.Kind_Bytes
93 | }
94 |
95 | func (d *deferred) AsBytes() ([]byte, error) {
96 | if err := d.deferredFileNode.resolve(); err != nil {
97 | return []byte{}, err
98 | }
99 |
100 | return d.deferredFileNode.AsBytes()
101 | }
102 |
103 | func (d *deferred) AsBool() (bool, error) {
104 | return false, ipld.ErrWrongKind{TypeName: "bool", MethodName: "AsBool", AppropriateKind: ipld.KindSet_JustBytes}
105 | }
106 |
107 | func (d *deferred) AsInt() (int64, error) {
108 | return 0, ipld.ErrWrongKind{TypeName: "int", MethodName: "AsInt", AppropriateKind: ipld.KindSet_JustBytes}
109 | }
110 |
111 | func (d *deferred) AsFloat() (float64, error) {
112 | return 0, ipld.ErrWrongKind{TypeName: "float", MethodName: "AsFloat", AppropriateKind: ipld.KindSet_JustBytes}
113 | }
114 |
115 | func (d *deferred) AsString() (string, error) {
116 | return "", ipld.ErrWrongKind{TypeName: "string", MethodName: "AsString", AppropriateKind: ipld.KindSet_JustBytes}
117 | }
118 |
119 | func (d *deferred) AsLink() (ipld.Link, error) {
120 | return nil, ipld.ErrWrongKind{TypeName: "link", MethodName: "AsLink", AppropriateKind: ipld.KindSet_JustBytes}
121 | }
122 |
123 | func (d *deferred) AsNode() (ipld.Node, error) {
124 | return nil, nil
125 | }
126 |
127 | func (d *deferred) Size() int {
128 | return 0
129 | }
130 |
131 | func (d *deferred) IsAbsent() bool {
132 | return false
133 | }
134 |
135 | func (d *deferred) IsNull() bool {
136 | if err := d.deferredFileNode.resolve(); err != nil {
137 | return true
138 | }
139 | return d.deferredFileNode.IsNull()
140 | }
141 |
142 | func (d *deferred) Length() int64 {
143 | return 0
144 | }
145 |
146 | func (d *deferred) ListIterator() ipld.ListIterator {
147 | return nil
148 | }
149 |
150 | func (d *deferred) MapIterator() ipld.MapIterator {
151 | return nil
152 | }
153 |
154 | func (d *deferred) LookupByIndex(idx int64) (ipld.Node, error) {
155 | return nil, ipld.ErrWrongKind{}
156 | }
157 |
158 | func (d *deferred) LookupByString(key string) (ipld.Node, error) {
159 | return nil, ipld.ErrWrongKind{}
160 | }
161 |
162 | func (d *deferred) LookupByNode(key ipld.Node) (ipld.Node, error) {
163 | return nil, ipld.ErrWrongKind{}
164 | }
165 |
166 | func (d *deferred) LookupBySegment(seg ipld.PathSegment) (ipld.Node, error) {
167 | return nil, ipld.ErrWrongKind{}
168 | }
169 |
170 | // shardded files / nodes look like dagpb nodes.
171 | func (d *deferred) Prototype() ipld.NodePrototype {
172 | return dagpb.Type.PBNode
173 | }
174 |
--------------------------------------------------------------------------------
/file/file.go:
--------------------------------------------------------------------------------
1 | package file
2 |
3 | import (
4 | "context"
5 | "io"
6 |
7 | "github.com/ipld/go-ipld-prime"
8 | "github.com/ipld/go-ipld-prime/adl"
9 | "github.com/ipld/go-ipld-prime/datamodel"
10 | )
11 |
12 | // NewUnixFSFile attempts to construct an ipld node from the base protobuf node representing the
13 | // root of a unixfs File.
14 | // It provides a `bytes` view over the file, along with access to io.Reader streaming access
15 | // to file data.
16 | func NewUnixFSFile(ctx context.Context, substrate ipld.Node, lsys *ipld.LinkSystem) (LargeBytesNode, error) {
17 | if substrate.Kind() == ipld.Kind_Bytes {
18 | // A raw / single-node file.
19 | return &singleNodeFile{substrate}, nil
20 | }
21 | // see if it's got children.
22 | links, err := substrate.LookupByString("Links")
23 | if err != nil {
24 | return nil, err
25 | }
26 | if links.Length() == 0 {
27 | // no children.
28 | return newWrappedNode(substrate)
29 | }
30 |
31 | return &shardNodeFile{
32 | ctx: ctx,
33 | lsys: lsys,
34 | substrate: substrate,
35 | }, nil
36 | }
37 |
38 | // NewUnixFSFileWithPreload is the same as NewUnixFSFile but it performs a full load of constituent
39 | // blocks where the file spans multiple blocks. This is useful where a system needs to watch the
40 | // LinkSystem for block loads to determine which blocks make up this file.
41 | // NewUnixFSFileWithPreload is used by the "unixfs-preload" reifier.
42 | func NewUnixFSFileWithPreload(ctx context.Context, substrate ipld.Node, lsys *ipld.LinkSystem) (LargeBytesNode, error) {
43 | f, err := NewUnixFSFile(ctx, substrate, lsys)
44 | if err != nil {
45 | return nil, err
46 | }
47 | r, err := f.AsLargeBytes()
48 | if err != nil {
49 | return nil, err
50 | }
51 | if _, err := io.Copy(io.Discard, r); err != nil {
52 | return nil, err
53 | }
54 | return f, nil
55 | }
56 |
57 | // A LargeBytesNode is an ipld.Node that can be streamed over. It is guaranteed to have a Bytes type.
58 | type LargeBytesNode interface {
59 | adl.ADL
60 | AsLargeBytes() (io.ReadSeeker, error)
61 | }
62 |
63 | type singleNodeFile struct {
64 | ipld.Node
65 | }
66 |
67 | func (f *singleNodeFile) AsLargeBytes() (io.ReadSeeker, error) {
68 | return &singleNodeReader{f, 0}, nil
69 | }
70 |
71 | func (f *singleNodeFile) Substrate() datamodel.Node {
72 | return f.Node
73 | }
74 |
75 | type singleNodeReader struct {
76 | ipld.Node
77 | offset int
78 | }
79 |
80 | func (f *singleNodeReader) Read(p []byte) (int, error) {
81 | buf, err := f.Node.AsBytes()
82 | if err != nil {
83 | return 0, err
84 | }
85 | if f.offset >= len(buf) {
86 | return 0, io.EOF
87 | }
88 | n := copy(p, buf[f.offset:])
89 | f.offset += n
90 | return n, nil
91 | }
92 |
93 | func (f *singleNodeReader) Seek(offset int64, whence int) (int64, error) {
94 | buf, err := f.Node.AsBytes()
95 | if err != nil {
96 | return 0, err
97 | }
98 |
99 | switch whence {
100 | case io.SeekStart:
101 | f.offset = int(offset)
102 | case io.SeekCurrent:
103 | f.offset += int(offset)
104 | case io.SeekEnd:
105 | f.offset = len(buf) + int(offset)
106 | }
107 | if f.offset < 0 {
108 | return 0, io.EOF
109 | }
110 | return int64(f.offset), nil
111 | }
112 |
--------------------------------------------------------------------------------
/file/file_test.go:
--------------------------------------------------------------------------------
1 | package file_test
2 |
3 | import (
4 | "bytes"
5 | "context"
6 | "fmt"
7 | "io"
8 | "testing"
9 |
10 | "github.com/ipfs/go-test/random"
11 | "github.com/ipfs/go-unixfsnode"
12 | "github.com/ipfs/go-unixfsnode/data/builder"
13 | "github.com/ipfs/go-unixfsnode/directory"
14 | "github.com/ipfs/go-unixfsnode/file"
15 | "github.com/ipld/go-car/v2/blockstore"
16 | dagpb "github.com/ipld/go-codec-dagpb"
17 | "github.com/ipld/go-ipld-prime"
18 | cidlink "github.com/ipld/go-ipld-prime/linking/cid"
19 | "github.com/ipld/go-ipld-prime/node/basicnode"
20 | )
21 |
22 | func TestRootV0File(t *testing.T) {
23 | baseFile := "./fixtures/QmT78zSuBmuS4z925WZfrqQ1qHaJ56DQaTfyMUF7F8ff5o.car"
24 | root, ls := open(baseFile, t)
25 | file, err := file.NewUnixFSFile(context.Background(), root, ls)
26 | if err != nil {
27 | t.Fatal(err)
28 | }
29 | fc, err := file.AsBytes()
30 | if err != nil {
31 | t.Fatal(err)
32 | }
33 | if !bytes.Equal(fc, []byte("hello world\n")) {
34 | t.Errorf("file content does not match: %s", string(fc))
35 | }
36 | }
37 |
38 | func TestNamedV0File(t *testing.T) {
39 | baseFile := "./fixtures/QmT8EC9sJq63SkDZ1mWLbWWyVV66PuqyHWpKkH4pcVyY4H.car"
40 | root, ls := open(baseFile, t)
41 | dir, err := unixfsnode.Reify(ipld.LinkContext{}, root, ls)
42 | if err != nil {
43 | t.Fatal(err)
44 | }
45 | dpbn := dir.(directory.UnixFSBasicDir)
46 | name, link := dpbn.Iterator().Next()
47 | if name.String() != "b.txt" {
48 | t.Fatal("unexpected filename")
49 | }
50 | fileNode, err := ls.Load(ipld.LinkContext{}, link.Link(), dagpb.Type.PBNode)
51 | if err != nil {
52 | t.Fatal(err)
53 | }
54 | file, err := file.NewUnixFSFile(context.Background(), fileNode, ls)
55 | if err != nil {
56 | t.Fatal(err)
57 | }
58 | fc, err := file.AsBytes()
59 | if err != nil {
60 | t.Fatal(err)
61 | }
62 | if !bytes.Equal(fc, []byte("hello world\n")) {
63 | t.Errorf("file content does not match: %s", string(fc))
64 | }
65 | }
66 |
67 | func TestFileSeeker(t *testing.T) {
68 | ls := cidlink.DefaultLinkSystem()
69 | storage := cidlink.Memory{}
70 | ls.StorageReadOpener = storage.OpenRead
71 | ls.StorageWriteOpener = storage.OpenWrite
72 |
73 | // Make random file with 1024 bytes.
74 | buf := make([]byte, 1024)
75 | random.NewSeededRand(0xdeadbeef).Read(buf)
76 | r := bytes.NewReader(buf)
77 |
78 | // Build UnixFS File as a single chunk
79 | f, _, err := builder.BuildUnixFSFile(r, "size-1024", &ls)
80 | if err != nil {
81 | t.Fatal(err)
82 | }
83 |
84 | // Load the file.
85 | fr, err := ls.Load(ipld.LinkContext{}, f, basicnode.Prototype.Bytes)
86 | if err != nil {
87 | t.Fatal(err)
88 | }
89 |
90 | // Create it.
91 | ufn, err := file.NewUnixFSFile(context.Background(), fr, &ls)
92 | if err != nil {
93 | t.Fatal(err)
94 | }
95 |
96 | rs, err := ufn.AsLargeBytes()
97 | if err != nil {
98 | t.Fatal(err)
99 | }
100 |
101 | testSeekIn1024ByteFile(t, rs)
102 | }
103 |
104 | func open(car string, t *testing.T) (ipld.Node, *ipld.LinkSystem) {
105 | baseStore, err := blockstore.OpenReadOnly(car)
106 | if err != nil {
107 | t.Fatal(err)
108 | }
109 | ls := cidlink.DefaultLinkSystem()
110 | ls.StorageReadOpener = func(lctx ipld.LinkContext, l ipld.Link) (io.Reader, error) {
111 | cl, ok := l.(cidlink.Link)
112 | if !ok {
113 | return nil, fmt.Errorf("couldn't load link")
114 | }
115 | blk, err := baseStore.Get(lctx.Ctx, cl.Cid)
116 | if err != nil {
117 | return nil, err
118 | }
119 | return bytes.NewBuffer(blk.RawData()), nil
120 | }
121 | carRoots, err := baseStore.Roots()
122 | if err != nil {
123 | t.Fatal(err)
124 | }
125 | root, err := ls.Load(ipld.LinkContext{}, cidlink.Link{Cid: carRoots[0]}, dagpb.Type.PBNode)
126 | if err != nil {
127 | t.Fatal(err)
128 | }
129 | return root, &ls
130 | }
131 |
132 | func testSeekIn1024ByteFile(t *testing.T, rs io.ReadSeeker) {
133 | // Seek from the start and try reading
134 | offset, err := rs.Seek(128, io.SeekStart)
135 | if err != nil {
136 | t.Fatal(err)
137 | }
138 |
139 | if offset != 128 {
140 | t.Fatalf("expected offset %d, got %d", 484, offset)
141 | }
142 |
143 | readBuf := make([]byte, 256)
144 | _, err = io.ReadFull(rs, readBuf)
145 | if err != nil {
146 | t.Fatal(err)
147 | }
148 |
149 | // Validate we can detect the offset with SeekCurrent
150 | offset, err = rs.Seek(0, io.SeekCurrent)
151 | if err != nil {
152 | t.Fatal(err)
153 | }
154 |
155 | if offset != 384 {
156 | t.Fatalf("expected offset %d, got %d", 384, offset)
157 | }
158 |
159 | // Validate we can read after moving with SeekCurrent
160 | offset, err = rs.Seek(100, io.SeekCurrent)
161 | if err != nil {
162 | t.Fatal(err)
163 | }
164 | if offset != 484 {
165 | t.Fatalf("expected offset %d, got %d", 484, offset)
166 | }
167 |
168 | _, err = io.ReadFull(rs, readBuf)
169 | if err != nil {
170 | t.Fatal(err)
171 | }
172 |
173 | offset, err = rs.Seek(0, io.SeekCurrent)
174 | if err != nil {
175 | t.Fatal(err)
176 | }
177 |
178 | if offset != 740 {
179 | t.Fatalf("expected offset %d, got %d", 740, offset)
180 | }
181 |
182 | // Validate we can read after moving with SeekEnd
183 | offset, err = rs.Seek(-400, io.SeekEnd)
184 | if err != nil {
185 | t.Fatal(err)
186 | }
187 |
188 | if offset != 624 {
189 | t.Fatalf("expected offset %d, got %d", 624, offset)
190 | }
191 |
192 | _, err = io.ReadFull(rs, readBuf)
193 | if err != nil {
194 | t.Fatal(err)
195 | }
196 |
197 | offset, err = rs.Seek(0, io.SeekCurrent)
198 | if err != nil {
199 | t.Fatal(err)
200 | }
201 |
202 | if offset != 880 {
203 | t.Fatalf("expected offset %d, got %d", 880, offset)
204 | }
205 | }
206 |
--------------------------------------------------------------------------------
/file/fixtures/QmT78zSuBmuS4z925WZfrqQ1qHaJ56DQaTfyMUF7F8ff5o.car:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipfs/go-unixfsnode/55bf436685936f51a82e553bab4776b54dda7932/file/fixtures/QmT78zSuBmuS4z925WZfrqQ1qHaJ56DQaTfyMUF7F8ff5o.car
--------------------------------------------------------------------------------
/file/fixtures/QmT8EC9sJq63SkDZ1mWLbWWyVV66PuqyHWpKkH4pcVyY4H.car:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipfs/go-unixfsnode/55bf436685936f51a82e553bab4776b54dda7932/file/fixtures/QmT8EC9sJq63SkDZ1mWLbWWyVV66PuqyHWpKkH4pcVyY4H.car
--------------------------------------------------------------------------------
/file/large_file_test.go:
--------------------------------------------------------------------------------
1 | //go:build !race
2 |
3 | package file_test
4 |
5 | import (
6 | "bytes"
7 | "context"
8 | "fmt"
9 | "io"
10 | "strconv"
11 | "sync"
12 | "testing"
13 |
14 | "github.com/ipfs/go-cid"
15 | "github.com/ipfs/go-test/random"
16 | "github.com/ipfs/go-unixfsnode/data/builder"
17 | "github.com/ipfs/go-unixfsnode/file"
18 | dagpb "github.com/ipld/go-codec-dagpb"
19 | "github.com/ipld/go-ipld-prime"
20 | cidlink "github.com/ipld/go-ipld-prime/linking/cid"
21 | )
22 |
23 | func TestLargeFileReader(t *testing.T) {
24 | if testing.Short() || strconv.IntSize == 32 {
25 | t.Skip()
26 | }
27 | buf := make([]byte, 512*1024*1024)
28 | random.NewSeededRand(0xdeadbeef).Read(buf)
29 | r := bytes.NewReader(buf)
30 |
31 | ls := cidlink.DefaultLinkSystem()
32 | storage := cidlink.Memory{}
33 | ls.StorageReadOpener = storage.OpenRead
34 | ls.StorageWriteOpener = storage.OpenWrite
35 |
36 | f, _, err := builder.BuildUnixFSFile(r, "", &ls)
37 | if err != nil {
38 | t.Fatal(err)
39 | }
40 |
41 | // get back the root node substrate from the link at the top of the builder.
42 | fr, err := ls.Load(ipld.LinkContext{}, f, dagpb.Type.PBNode)
43 | if err != nil {
44 | t.Fatal(err)
45 | }
46 |
47 | ufn, err := file.NewUnixFSFile(context.Background(), fr, &ls)
48 | if err != nil {
49 | t.Fatal(err)
50 | }
51 | // read back out the file.
52 | for i := 0; i < len(buf); i += 100 * 1024 * 1024 {
53 | rs, err := ufn.AsLargeBytes()
54 | if err != nil {
55 | t.Fatal(err)
56 | }
57 | _, err = rs.Seek(int64(i), io.SeekStart)
58 | if err != nil {
59 | t.Fatal(err)
60 | }
61 | ob, err := io.ReadAll(rs)
62 | if err != nil {
63 | t.Fatal(err)
64 | }
65 | if !bytes.Equal(ob, buf[i:]) {
66 | t.Fatal("Not equal at offset", i, "expected", len(buf[i:]), "got", len(ob))
67 | }
68 | }
69 | }
70 |
71 | func TestLargeFileSeeker(t *testing.T) {
72 | ls := cidlink.DefaultLinkSystem()
73 | storage := cidlink.Memory{}
74 | ls.StorageReadOpener = storage.OpenRead
75 | ls.StorageWriteOpener = storage.OpenWrite
76 |
77 | // Make random file with 1024 bytes.
78 | buf := make([]byte, 1024)
79 | random.NewSeededRand(0xdeadbeef).Read(buf)
80 | r := bytes.NewReader(buf)
81 |
82 | // Build UnixFS File chunked in 256 byte parts.
83 | f, _, err := builder.BuildUnixFSFile(r, "size-256", &ls)
84 | if err != nil {
85 | t.Fatal(err)
86 | }
87 |
88 | // Load the file.
89 | fr, err := ls.Load(ipld.LinkContext{}, f, dagpb.Type.PBNode)
90 | if err != nil {
91 | t.Fatal(err)
92 | }
93 |
94 | // Create it.
95 | ufn, err := file.NewUnixFSFile(context.Background(), fr, &ls)
96 | if err != nil {
97 | t.Fatal(err)
98 | }
99 |
100 | rs, err := ufn.AsLargeBytes()
101 | if err != nil {
102 | t.Fatal(err)
103 | }
104 |
105 | testSeekIn1024ByteFile(t, rs)
106 | }
107 |
108 | func TestLargeFileReaderReadsOnlyNecessaryBlocks(t *testing.T) {
109 | tracker, ls := mockTrackingLinkSystem()
110 |
111 | // Make random file with 1024 bytes.
112 | buf := make([]byte, 1024)
113 | random.NewSeededRand(0xdeadbeef).Read(buf)
114 | r := bytes.NewReader(buf)
115 |
116 | // Build UnixFS File chunked in 256 byte parts.
117 | f, _, err := builder.BuildUnixFSFile(r, "size-256", ls)
118 | if err != nil {
119 | t.Fatal(err)
120 | }
121 |
122 | // Load the file.
123 | fr, err := ls.Load(ipld.LinkContext{}, f, dagpb.Type.PBNode)
124 | if err != nil {
125 | t.Fatal(err)
126 | }
127 |
128 | // Create it.
129 | ufn, err := file.NewUnixFSFile(context.Background(), fr, ls)
130 | if err != nil {
131 | t.Fatal(err)
132 | }
133 |
134 | // Prepare tracker for read.
135 | tracker.resetTracker()
136 |
137 | rs, err := ufn.AsLargeBytes()
138 | if err != nil {
139 | t.Fatal(err)
140 | }
141 |
142 | // Move the pointer to the 2nd block of the file.
143 | _, err = rs.Seek(256, io.SeekStart)
144 | if err != nil {
145 | t.Fatal(err)
146 | }
147 |
148 | // Read the 3rd and 4th blocks of the file.
149 | portion := make([]byte, 512)
150 | _, err = io.ReadAtLeast(rs, portion, 512)
151 | if err != nil {
152 | t.Fatal(err)
153 | }
154 |
155 | // Just be sure we read the right bytes.
156 | if !bytes.Equal(portion, buf[256:768]) {
157 | t.Fatal(fmt.Errorf("did not read correct bytes"))
158 | }
159 |
160 | // We must have read 2 CIDs for each of the 2 blocks!
161 | if l := len(tracker.cids); l != 2 {
162 | t.Fatal(fmt.Errorf("expected to have read 2 blocks, read %d", l))
163 | }
164 | }
165 |
166 | type trackingReadOpener struct {
167 | cidlink.Memory
168 | mu sync.Mutex
169 | cids []cid.Cid
170 | }
171 |
172 | func (ro *trackingReadOpener) resetTracker() {
173 | ro.mu.Lock()
174 | ro.cids = nil
175 | ro.mu.Unlock()
176 | }
177 |
178 | func (ro *trackingReadOpener) OpenRead(lnkCtx ipld.LinkContext, lnk ipld.Link) (io.Reader, error) {
179 | cidLink, ok := lnk.(cidlink.Link)
180 | if !ok {
181 | return nil, fmt.Errorf("invalid link type for loading: %v", lnk)
182 | }
183 |
184 | ro.mu.Lock()
185 | ro.cids = append(ro.cids, cidLink.Cid)
186 | ro.mu.Unlock()
187 |
188 | return ro.Memory.OpenRead(lnkCtx, lnk)
189 | }
190 |
191 | func mockTrackingLinkSystem() (*trackingReadOpener, *ipld.LinkSystem) {
192 | ls := cidlink.DefaultLinkSystem()
193 | storage := &trackingReadOpener{Memory: cidlink.Memory{}}
194 |
195 | ls.StorageWriteOpener = storage.OpenWrite
196 | ls.StorageReadOpener = storage.OpenRead
197 | ls.TrustedStorage = true
198 |
199 | return storage, &ls
200 | }
201 |
--------------------------------------------------------------------------------
/file/shard.go:
--------------------------------------------------------------------------------
1 | package file
2 |
3 | import (
4 | "context"
5 | "io"
6 | "sync"
7 |
8 | "github.com/ipfs/go-cid"
9 | "github.com/ipfs/go-unixfsnode/data"
10 | dagpb "github.com/ipld/go-codec-dagpb"
11 | "github.com/ipld/go-ipld-prime"
12 | "github.com/ipld/go-ipld-prime/adl"
13 | cidlink "github.com/ipld/go-ipld-prime/linking/cid"
14 | "github.com/ipld/go-ipld-prime/node/basicnode"
15 | "github.com/multiformats/go-multicodec"
16 | )
17 |
18 | type shardNodeFile struct {
19 | ctx context.Context
20 | lsys *ipld.LinkSystem
21 | substrate ipld.Node
22 |
23 | // unixfs data unpacked from the substrate. access via .unpack()
24 | metadata data.UnixFSData
25 | unpackLk sync.Once
26 | }
27 |
28 | var _ adl.ADL = (*shardNodeFile)(nil)
29 |
30 | type shardNodeReader struct {
31 | *shardNodeFile
32 | rdr io.Reader
33 | offset int64
34 | len int64
35 | }
36 |
37 | func (s *shardNodeReader) makeReader() (io.Reader, error) {
38 | links, err := s.shardNodeFile.substrate.LookupByString("Links")
39 | if err != nil {
40 | return nil, err
41 | }
42 | readers := make([]io.Reader, 0)
43 | lnkIter := links.ListIterator()
44 | at := int64(0)
45 | for !lnkIter.Done() {
46 | lnkIdx, lnk, err := lnkIter.Next()
47 | if err != nil {
48 | return nil, err
49 | }
50 | childSize, tr, err := s.linkSize(lnk, int(lnkIdx))
51 | if err != nil {
52 | return nil, err
53 | }
54 | if s.offset >= at+childSize {
55 | at += childSize
56 | continue
57 | }
58 | if tr == nil {
59 | lnkhash, err := lnk.LookupByString("Hash")
60 | if err != nil {
61 | return nil, err
62 | }
63 | lnklnk, err := lnkhash.AsLink()
64 | if err != nil {
65 | return nil, err
66 | }
67 | target := newDeferredFileNode(s.ctx, s.lsys, lnklnk)
68 | tr, err = target.AsLargeBytes()
69 | if err != nil {
70 | return nil, err
71 | }
72 | }
73 | // fastforward the first one if needed.
74 | if at < s.offset {
75 | _, err := tr.Seek(s.offset-at, io.SeekStart)
76 | if err != nil {
77 | return nil, err
78 | }
79 | }
80 | at += childSize
81 | readers = append(readers, tr)
82 | }
83 | if len(readers) == 0 {
84 | return nil, io.EOF
85 | }
86 | s.len = at
87 | return io.MultiReader(readers...), nil
88 | }
89 |
90 | func (s *shardNodeFile) unpack() (data.UnixFSData, error) {
91 | var retErr error
92 | s.unpackLk.Do(func() {
93 | nodeData, err := s.substrate.LookupByString("Data")
94 | if err != nil {
95 | retErr = err
96 | return
97 | }
98 | nodeDataBytes, err := nodeData.AsBytes()
99 | if err != nil {
100 | retErr = err
101 | return
102 | }
103 | ud, err := data.DecodeUnixFSData(nodeDataBytes)
104 | if err != nil {
105 | retErr = err
106 | return
107 | }
108 | s.metadata = ud
109 | })
110 | return s.metadata, retErr
111 | }
112 |
113 | // returns the size of the n'th link from this shard.
114 | // the io.ReadSeeker of the child will be return if it was loaded as part of the size calculation.
115 | func (s *shardNodeFile) linkSize(lnk ipld.Node, position int) (int64, io.ReadSeeker, error) {
116 | lnkhash, err := lnk.LookupByString("Hash")
117 | if err != nil {
118 | return 0, nil, err
119 | }
120 | lnklnk, err := lnkhash.AsLink()
121 | if err != nil {
122 | return 0, nil, err
123 | }
124 | _, c, err := cid.CidFromBytes([]byte(lnklnk.Binary()))
125 | if err != nil {
126 | return 0, nil, err
127 | }
128 |
129 | // efficiency shortcut: for raw blocks, the size will match the bytes of content
130 | if c.Prefix().Codec == cid.Raw {
131 | size, err := lnk.LookupByString("Tsize")
132 | if err != nil {
133 | return 0, nil, err
134 | }
135 | sz, err := size.AsInt()
136 | return sz, nil, err
137 | }
138 |
139 | // check if there are blocksizes written, use them if there are.
140 | // both err and md can be nil if this was not the first time unpack()
141 | // was called but there was an error on the first call.
142 | md, err := s.unpack()
143 | if err == nil && md != nil {
144 | pn, err := md.BlockSizes.LookupByIndex(int64(position))
145 | if err == nil {
146 | innerNum, err := pn.AsInt()
147 | if err == nil {
148 | return innerNum, nil, nil
149 | }
150 | }
151 | }
152 |
153 | // open the link and get its size.
154 | target := newDeferredFileNode(s.ctx, s.lsys, lnklnk)
155 | tr, err := target.AsLargeBytes()
156 | if err != nil {
157 | return 0, nil, err
158 | }
159 |
160 | end, err := tr.Seek(0, io.SeekEnd)
161 | if err != nil {
162 | return end, nil, err
163 | }
164 | _, err = tr.Seek(0, io.SeekStart)
165 | return end, tr, err
166 | }
167 |
168 | func (s *shardNodeReader) Read(p []byte) (int, error) {
169 | // build reader
170 | if s.rdr == nil {
171 | rdr, err := s.makeReader()
172 | if err != nil {
173 | return 0, err
174 | }
175 | s.rdr = rdr
176 | }
177 | n, err := s.rdr.Read(p)
178 | s.offset += int64(n)
179 | return n, err
180 | }
181 |
182 | func (s *shardNodeReader) Seek(offset int64, whence int) (int64, error) {
183 | if s.rdr != nil {
184 | s.rdr = nil
185 | }
186 | switch whence {
187 | case io.SeekStart:
188 | s.offset = offset
189 | case io.SeekCurrent:
190 | s.offset += offset
191 | case io.SeekEnd:
192 | s.offset = s.length() + offset
193 | }
194 | return s.offset, nil
195 | }
196 |
197 | func (s *shardNodeFile) length() int64 {
198 | // see if we have size specified in the unixfs data. errors fall back to length from links
199 | nodeData, err := s.unpack()
200 | if err != nil || nodeData == nil {
201 | return s.lengthFromLinks()
202 | }
203 | if nodeData.FileSize.Exists() {
204 | if fs, err := nodeData.FileSize.Must().AsInt(); err == nil {
205 | return int64(fs)
206 | }
207 | }
208 |
209 | return s.lengthFromLinks()
210 | }
211 |
212 | func (s *shardNodeFile) lengthFromLinks() int64 {
213 | links, err := s.substrate.LookupByString("Links")
214 | if err != nil {
215 | return 0
216 | }
217 | size := int64(0)
218 | li := links.ListIterator()
219 | for !li.Done() {
220 | idx, l, err := li.Next()
221 | if err != nil {
222 | return 0
223 | }
224 | ll, _, err := s.linkSize(l, int(idx))
225 | if err != nil {
226 | return 0
227 | }
228 | size += ll
229 | }
230 | return size
231 | }
232 |
233 | func (s *shardNodeFile) AsLargeBytes() (io.ReadSeeker, error) {
234 | return &shardNodeReader{s, nil, 0, 0}, nil
235 | }
236 |
237 | func (s *shardNodeFile) Substrate() ipld.Node {
238 | return s.substrate
239 | }
240 |
241 | func protoFor(link ipld.Link) ipld.NodePrototype {
242 | if lc, ok := link.(cidlink.Link); ok {
243 | if lc.Cid.Prefix().Codec == uint64(multicodec.DagPb) {
244 | return dagpb.Type.PBNode
245 | }
246 | }
247 | return basicnode.Prototype.Any
248 | }
249 |
250 | func (s *shardNodeFile) Kind() ipld.Kind {
251 | return ipld.Kind_Bytes
252 | }
253 |
254 | func (s *shardNodeFile) AsBytes() ([]byte, error) {
255 | rdr, err := s.AsLargeBytes()
256 | if err != nil {
257 | return nil, err
258 | }
259 | return io.ReadAll(rdr)
260 | }
261 |
262 | func (s *shardNodeFile) AsBool() (bool, error) {
263 | return false, ipld.ErrWrongKind{TypeName: "bool", MethodName: "AsBool", AppropriateKind: ipld.KindSet_JustBytes}
264 | }
265 |
266 | func (s *shardNodeFile) AsInt() (int64, error) {
267 | return 0, ipld.ErrWrongKind{TypeName: "int", MethodName: "AsInt", AppropriateKind: ipld.KindSet_JustBytes}
268 | }
269 |
270 | func (s *shardNodeFile) AsFloat() (float64, error) {
271 | return 0, ipld.ErrWrongKind{TypeName: "float", MethodName: "AsFloat", AppropriateKind: ipld.KindSet_JustBytes}
272 | }
273 |
274 | func (s *shardNodeFile) AsString() (string, error) {
275 | return "", ipld.ErrWrongKind{TypeName: "string", MethodName: "AsString", AppropriateKind: ipld.KindSet_JustBytes}
276 | }
277 |
278 | func (s *shardNodeFile) AsLink() (ipld.Link, error) {
279 | return nil, ipld.ErrWrongKind{TypeName: "link", MethodName: "AsLink", AppropriateKind: ipld.KindSet_JustBytes}
280 | }
281 |
282 | func (s *shardNodeFile) AsNode() (ipld.Node, error) {
283 | return nil, nil
284 | }
285 |
286 | func (s *shardNodeFile) Size() int {
287 | return 0
288 | }
289 |
290 | func (s *shardNodeFile) IsAbsent() bool {
291 | return false
292 | }
293 |
294 | func (s *shardNodeFile) IsNull() bool {
295 | return s.substrate.IsNull()
296 | }
297 |
298 | func (s *shardNodeFile) Length() int64 {
299 | return 0
300 | }
301 |
302 | func (s *shardNodeFile) ListIterator() ipld.ListIterator {
303 | return nil
304 | }
305 |
306 | func (s *shardNodeFile) MapIterator() ipld.MapIterator {
307 | return nil
308 | }
309 |
310 | func (s *shardNodeFile) LookupByIndex(idx int64) (ipld.Node, error) {
311 | return nil, ipld.ErrWrongKind{}
312 | }
313 |
314 | func (s *shardNodeFile) LookupByString(key string) (ipld.Node, error) {
315 | return nil, ipld.ErrWrongKind{}
316 | }
317 |
318 | func (s *shardNodeFile) LookupByNode(key ipld.Node) (ipld.Node, error) {
319 | return nil, ipld.ErrWrongKind{}
320 | }
321 |
322 | func (s *shardNodeFile) LookupBySegment(seg ipld.PathSegment) (ipld.Node, error) {
323 | return nil, ipld.ErrWrongKind{}
324 | }
325 |
326 | // shardded files / nodes look like dagpb nodes.
327 | func (s *shardNodeFile) Prototype() ipld.NodePrototype {
328 | return dagpb.Type.PBNode
329 | }
330 |
--------------------------------------------------------------------------------
/file/wrapped.go:
--------------------------------------------------------------------------------
1 | package file
2 |
3 | import (
4 | "github.com/ipfs/go-unixfsnode/data"
5 | "github.com/ipld/go-ipld-prime"
6 | "github.com/ipld/go-ipld-prime/node/basicnode"
7 | )
8 |
9 | func newWrappedNode(substrate ipld.Node) (LargeBytesNode, error) {
10 | dataField, err := substrate.LookupByString("Data")
11 | if err != nil {
12 | return nil, err
13 | }
14 | // unpack as unixfs proto.
15 | dfb, err := dataField.AsBytes()
16 | if err != nil {
17 | return nil, err
18 | }
19 | ufd, err := data.DecodeUnixFSData(dfb)
20 | if err != nil {
21 | return nil, err
22 | }
23 |
24 | if ufd.Data.Exists() {
25 | return &singleNodeFile{
26 | Node: ufd.Data.Must(),
27 | }, nil
28 | }
29 |
30 | // an empty degenerate one.
31 | return &singleNodeFile{
32 | Node: basicnode.NewBytes(nil),
33 | }, nil
34 | }
35 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/ipfs/go-unixfsnode
2 |
3 | go 1.23.8
4 |
5 | require (
6 | github.com/ipfs/boxo v0.30.0
7 | github.com/ipfs/go-bitfield v1.1.0
8 | github.com/ipfs/go-cid v0.5.0
9 | github.com/ipfs/go-ipld-format v0.6.1
10 | github.com/ipfs/go-test v0.2.2
11 | github.com/ipld/go-car/v2 v2.14.3
12 | github.com/ipld/go-codec-dagpb v1.7.0
13 | github.com/ipld/go-ipld-prime v0.21.0
14 | github.com/multiformats/go-multicodec v0.9.0
15 | github.com/multiformats/go-multihash v0.2.3
16 | github.com/spaolacci/murmur3 v1.1.0
17 | github.com/stretchr/testify v1.10.0
18 | google.golang.org/protobuf v1.36.6
19 | )
20 |
21 | require (
22 | github.com/crackcomm/go-gitignore v0.0.0-20241020182519-7843d2ba8fdf // indirect
23 | github.com/davecgh/go-spew v1.1.1 // indirect
24 | github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.0 // indirect
25 | github.com/gammazero/deque v1.0.0 // indirect
26 | github.com/go-logr/logr v1.4.2 // indirect
27 | github.com/go-logr/stdr v1.2.2 // indirect
28 | github.com/google/uuid v1.6.0 // indirect
29 | github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
30 | github.com/ipfs/bbloom v0.0.4 // indirect
31 | github.com/ipfs/go-block-format v0.2.1 // indirect
32 | github.com/ipfs/go-datastore v0.8.2 // indirect
33 | github.com/ipfs/go-ipld-cbor v0.2.0 // indirect
34 | github.com/ipfs/go-ipld-legacy v0.2.1 // indirect
35 | github.com/ipfs/go-log/v2 v2.5.1 // indirect
36 | github.com/ipfs/go-metrics-interface v0.3.0 // indirect
37 | github.com/klauspost/cpuid/v2 v2.2.10 // indirect
38 | github.com/libp2p/go-buffer-pool v0.1.0 // indirect
39 | github.com/libp2p/go-libp2p v0.41.1 // indirect
40 | github.com/mattn/go-isatty v0.0.20 // indirect
41 | github.com/minio/sha256-simd v1.0.1 // indirect
42 | github.com/mr-tron/base58 v1.2.0 // indirect
43 | github.com/multiformats/go-base32 v0.1.0 // indirect
44 | github.com/multiformats/go-base36 v0.2.0 // indirect
45 | github.com/multiformats/go-multiaddr v0.15.0 // indirect
46 | github.com/multiformats/go-multibase v0.2.0 // indirect
47 | github.com/multiformats/go-varint v0.0.7 // indirect
48 | github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 // indirect
49 | github.com/pmezard/go-difflib v1.0.0 // indirect
50 | github.com/polydawn/refmt v0.89.0 // indirect
51 | github.com/whyrusleeping/cbor v0.0.0-20171005072247-63513f603b11 // indirect
52 | github.com/whyrusleeping/cbor-gen v0.1.2 // indirect
53 | github.com/whyrusleeping/chunker v0.0.0-20181014151217-fe64bd25879f // indirect
54 | go.opentelemetry.io/auto/sdk v1.1.0 // indirect
55 | go.opentelemetry.io/otel v1.35.0 // indirect
56 | go.opentelemetry.io/otel/metric v1.35.0 // indirect
57 | go.opentelemetry.io/otel/trace v1.35.0 // indirect
58 | go.uber.org/multierr v1.11.0 // indirect
59 | go.uber.org/zap v1.27.0 // indirect
60 | golang.org/x/crypto v0.38.0 // indirect
61 | golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6 // indirect
62 | golang.org/x/sync v0.14.0 // indirect
63 | golang.org/x/sys v0.33.0 // indirect
64 | golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect
65 | gopkg.in/yaml.v3 v3.0.1 // indirect
66 | lukechampine.com/blake3 v1.4.1 // indirect
67 | )
68 |
--------------------------------------------------------------------------------
/hamt/errors.go:
--------------------------------------------------------------------------------
1 | package hamt
2 |
3 | import "fmt"
4 |
5 | type errorType string
6 |
7 | func (e errorType) Error() string {
8 | return string(e)
9 | }
10 |
11 | const (
12 | // ErrNotProtobuf indicates an error attempting to load a HAMT from a non-protobuf node
13 | ErrNotProtobuf errorType = "node was not a protobuf node"
14 | // ErrNotUnixFSNode indicates an error attempting to load a HAMT from a generic protobuf node
15 | ErrNotUnixFSNode errorType = "node was not a UnixFS node"
16 | // ErrInvalidChildIndex indicates there is no link to load for the given child index
17 | ErrInvalidChildIndex errorType = "invalid index passed to operate children (likely corrupt bitfield)"
18 | // ErrHAMTTooDeep indicates we attempted to load from a HAMT node that went past the depth of the tree
19 | ErrHAMTTooDeep errorType = "sharded directory too deep"
20 | // ErrInvalidHashType indicates the HAMT node's hash function is unsupported (must be Murmur3)
21 | ErrInvalidHashType errorType = "only murmur3 supported as hash function"
22 | // ErrNoDataField indicates the HAMT node's UnixFS structure lacked a data field, which is
23 | // where a bit mask is stored
24 | ErrNoDataField errorType = "'Data' field not present"
25 | // ErrNoFanoutField indicates the HAMT node's UnixFS structure lacked a fanout field, which is required
26 | ErrNoFanoutField errorType = "'Fanout' field not present"
27 | // ErrHAMTSizeInvalid indicates the HAMT's size property was not an exact power of 2
28 | ErrHAMTSizeInvalid errorType = "hamt size should be a power of two"
29 | // ErrMissingLinkName indicates a link in a HAMT had no Name property (required for all HAMTs)
30 | ErrMissingLinkName errorType = "missing link name"
31 | )
32 |
33 | // ErrInvalidLinkName indicates a link's name was too short for a HAMT
34 | type ErrInvalidLinkName struct {
35 | Name string
36 | }
37 |
38 | func (e ErrInvalidLinkName) Error() string {
39 | return fmt.Sprintf("invalid link name '%s'", e.Name)
40 | }
41 |
--------------------------------------------------------------------------------
/hamt/fixtures/wikipedia-cryptographic-hash-function.car:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipfs/go-unixfsnode/55bf436685936f51a82e553bab4776b54dda7932/hamt/fixtures/wikipedia-cryptographic-hash-function.car
--------------------------------------------------------------------------------
/hamt/shardeddir.go:
--------------------------------------------------------------------------------
1 | package hamt
2 |
3 | import (
4 | "context"
5 | "fmt"
6 |
7 | bitfield "github.com/ipfs/go-bitfield"
8 | "github.com/ipfs/go-unixfsnode/data"
9 | "github.com/ipfs/go-unixfsnode/iter"
10 | dagpb "github.com/ipld/go-codec-dagpb"
11 | "github.com/ipld/go-ipld-prime"
12 | "github.com/ipld/go-ipld-prime/schema"
13 | )
14 |
15 | const (
16 | // HashMurmur3 is the multiformats identifier for Murmur3
17 | HashMurmur3 uint64 = 0x22
18 | )
19 |
20 | var _ ipld.Node = UnixFSHAMTShard(nil)
21 | var _ schema.TypedNode = UnixFSHAMTShard(nil)
22 | var _ ipld.ADL = UnixFSHAMTShard(nil)
23 |
24 | // UnixFSHAMTShared is an IPLD Prime Node that provides a read interface
25 | // to a UnixFS HAMT
26 | type UnixFSHAMTShard = *_UnixFSHAMTShard
27 |
28 | type _UnixFSHAMTShard struct {
29 | ctx context.Context
30 | _substrate dagpb.PBNode
31 | data data.UnixFSData
32 | lsys *ipld.LinkSystem
33 | bitfield bitfield.Bitfield
34 | shardCache map[ipld.Link]*_UnixFSHAMTShard
35 | cachedLength int64
36 | }
37 |
38 | // NewUnixFSHAMTShard attempts to construct a UnixFSHAMTShard node from the base protobuf node plus
39 | // a decoded UnixFSData structure
40 | func NewUnixFSHAMTShard(ctx context.Context, substrate dagpb.PBNode, data data.UnixFSData, lsys *ipld.LinkSystem) (ipld.Node, error) {
41 | if err := validateHAMTData(data); err != nil {
42 | return nil, err
43 | }
44 | shardCache := make(map[ipld.Link]*_UnixFSHAMTShard, substrate.FieldLinks().Length())
45 | bf, err := bitField(data)
46 | if err != nil {
47 | return nil, err
48 | }
49 | return &_UnixFSHAMTShard{
50 | ctx: ctx,
51 | _substrate: substrate,
52 | data: data,
53 | lsys: lsys,
54 | shardCache: shardCache,
55 | bitfield: bf,
56 | cachedLength: -1,
57 | }, nil
58 | }
59 |
60 | // NewUnixFSHAMTShardWithPreload attempts to construct a UnixFSHAMTShard node from the base protobuf node plus
61 | // a decoded UnixFSData structure, and then iterate through and load the full set of hamt shards.
62 | func NewUnixFSHAMTShardWithPreload(ctx context.Context, substrate dagpb.PBNode, data data.UnixFSData, lsys *ipld.LinkSystem) (ipld.Node, error) {
63 | n, err := NewUnixFSHAMTShard(ctx, substrate, data, lsys)
64 | if err != nil {
65 | return n, err
66 | }
67 |
68 | traverse, err := n.(*_UnixFSHAMTShard).length()
69 | if traverse == -1 {
70 | return n, fmt.Errorf("could not fully explore hamt during preload")
71 | }
72 | if err != nil {
73 | return n, err
74 | }
75 |
76 | return n, nil
77 | }
78 |
79 | func (n UnixFSHAMTShard) Substrate() ipld.Node {
80 | return n._substrate
81 | }
82 |
83 | func (n UnixFSHAMTShard) Kind() ipld.Kind {
84 | return n._substrate.Kind()
85 | }
86 |
87 | // LookupByString looks for the key in the list of links with a matching name
88 | func (n *_UnixFSHAMTShard) LookupByString(key string) (ipld.Node, error) {
89 | hv := &hashBits{b: hash([]byte(key))}
90 | return n.lookup(key, hv)
91 | }
92 |
93 | func (n UnixFSHAMTShard) lookup(key string, hv *hashBits) (dagpb.Link, error) {
94 | log2 := log2Size(n.data)
95 | maxPadLen := maxPadLength(n.data)
96 | childIndex, err := hv.Next(log2)
97 | if err != nil {
98 | return nil, err
99 | }
100 |
101 | if n.hasChild(childIndex) {
102 | pbLink, err := n.getChildLink(childIndex)
103 | if err != nil {
104 | return nil, err
105 | }
106 | isValue, err := isValueLink(pbLink, maxPadLen)
107 | if err != nil {
108 | return nil, err
109 | }
110 | if isValue {
111 | if MatchKey(pbLink, key, maxPadLen) {
112 | return pbLink.FieldHash(), nil
113 | }
114 | } else {
115 | childNd, err := n.loadChild(pbLink)
116 | if err != nil {
117 | return nil, err
118 | }
119 | return childNd.lookup(key, hv)
120 | }
121 | }
122 | return nil, schema.ErrNoSuchField{Type: nil /*TODO*/, Field: ipld.PathSegmentOfString(key)}
123 | }
124 |
125 | // AttemptHAMTShardFromNode attempts to read a HAMT shard from a general protobuf node
126 | func AttemptHAMTShardFromNode(ctx context.Context, nd ipld.Node, lsys *ipld.LinkSystem) (UnixFSHAMTShard, error) {
127 | // shortcut if node is already a hamt
128 | hnd, ok := nd.(UnixFSHAMTShard)
129 | if ok {
130 | return hnd, nil
131 | }
132 | pbnd, ok := nd.(dagpb.PBNode)
133 | if !ok {
134 | return nil, fmt.Errorf("hamt.AttemptHAMTShardFromNode: %w", ErrNotProtobuf)
135 | }
136 | if !pbnd.FieldData().Exists() {
137 | return nil, fmt.Errorf("hamt.AttemptHAMTShardFromNode: %w", ErrNotUnixFSNode)
138 | }
139 | data, err := data.DecodeUnixFSData(pbnd.FieldData().Must().Bytes())
140 | if err != nil {
141 | return nil, err
142 | }
143 | und, err := NewUnixFSHAMTShard(ctx, pbnd, data, lsys)
144 | if err != nil {
145 | return nil, err
146 | }
147 | return und.(UnixFSHAMTShard), nil
148 | }
149 |
150 | func (n UnixFSHAMTShard) loadChild(pbLink dagpb.PBLink) (UnixFSHAMTShard, error) {
151 | cached, ok := n.shardCache[pbLink.FieldHash().Link()]
152 | if ok {
153 | return cached, nil
154 | }
155 | nd, err := n.lsys.Load(ipld.LinkContext{Ctx: n.ctx}, pbLink.FieldHash().Link(), dagpb.Type.PBNode)
156 | if err != nil {
157 | return nil, err
158 | }
159 | und, err := AttemptHAMTShardFromNode(n.ctx, nd, n.lsys)
160 | if err != nil {
161 | return nil, err
162 | }
163 | n.shardCache[pbLink.FieldHash().Link()] = und
164 | return und, nil
165 | }
166 |
167 | func (n UnixFSHAMTShard) LookupByNode(key ipld.Node) (ipld.Node, error) {
168 | ks, err := key.AsString()
169 | if err != nil {
170 | return nil, err
171 | }
172 | return n.LookupByString(ks)
173 | }
174 |
175 | func (n UnixFSHAMTShard) LookupByIndex(idx int64) (ipld.Node, error) {
176 | return n._substrate.LookupByIndex(idx)
177 | }
178 |
179 | func (n UnixFSHAMTShard) LookupBySegment(seg ipld.PathSegment) (ipld.Node, error) {
180 | return n.LookupByString(seg.String())
181 | }
182 |
183 | func (n UnixFSHAMTShard) MapIterator() ipld.MapIterator {
184 | maxPadLen := maxPadLength(n.data)
185 | listItr := &_UnixFSShardedDir__ListItr{
186 | _substrate: n.FieldLinks().Iterator(),
187 | maxPadLen: maxPadLen,
188 | nd: n,
189 | }
190 | st := stringTransformer{maxPadLen: maxPadLen}
191 | return iter.NewUnixFSDirMapIterator(listItr, st.transformNameNode)
192 | }
193 |
194 | type _UnixFSShardedDir__ListItr struct {
195 | _substrate *dagpb.PBLinks__Itr
196 | childIter *_UnixFSShardedDir__ListItr
197 | nd UnixFSHAMTShard
198 | maxPadLen int
199 | total int64
200 | }
201 |
202 | func (itr *_UnixFSShardedDir__ListItr) Next() (int64, dagpb.PBLink, error) {
203 | total := itr.total
204 | itr.total++
205 | next, err := itr.next()
206 | if err != nil {
207 | return -1, nil, err
208 | }
209 | if next == nil {
210 | return -1, nil, nil
211 | }
212 | return total, next, nil
213 | }
214 |
215 | func (itr *_UnixFSShardedDir__ListItr) next() (dagpb.PBLink, error) {
216 | if itr.childIter == nil {
217 | if itr._substrate.Done() {
218 | return nil, nil
219 | }
220 | _, next := itr._substrate.Next()
221 | isValue, err := isValueLink(next, itr.maxPadLen)
222 | if err != nil {
223 | return nil, err
224 | }
225 | if isValue {
226 | return next, nil
227 | }
228 | child, err := itr.nd.loadChild(next)
229 | if err != nil {
230 | return nil, err
231 | }
232 | itr.childIter = &_UnixFSShardedDir__ListItr{
233 | _substrate: child._substrate.FieldLinks().Iterator(),
234 | nd: child,
235 | maxPadLen: maxPadLength(child.data),
236 | }
237 | }
238 | _, next, err := itr.childIter.Next()
239 | if itr.childIter.Done() {
240 | // do this even on error to make sure we don't overrun a shard where the
241 | // end is missing and the user is ignoring NotFound errors
242 | itr.childIter = nil
243 | }
244 | if err != nil {
245 | return nil, err
246 | }
247 | return next, nil
248 | }
249 |
250 | func (itr *_UnixFSShardedDir__ListItr) Done() bool {
251 | return itr.childIter == nil && itr._substrate.Done()
252 | }
253 |
254 | // ListIterator returns an iterator which yields key-value pairs
255 | // traversing the node.
256 | // If the node kind is anything other than a list, nil will be returned.
257 | //
258 | // The iterator will yield every entry in the list; that is, it
259 | // can be expected that itr.Next will be called node.Length times
260 | // before itr.Done becomes true.
261 | func (n UnixFSHAMTShard) ListIterator() ipld.ListIterator {
262 | return nil
263 | }
264 |
265 | // Length returns the length of a list, or the number of entries in a map,
266 | // or -1 if the node is not of list nor map kind.
267 | func (n UnixFSHAMTShard) length() (int64, error) {
268 | if n.cachedLength != -1 {
269 | return n.cachedLength, nil
270 | }
271 | maxPadLen := maxPadLength(n.data)
272 | total := int64(0)
273 | itr := n.FieldLinks().Iterator()
274 | for !itr.Done() {
275 | _, pbLink := itr.Next()
276 | isValue, err := isValueLink(pbLink, maxPadLen)
277 | if err != nil {
278 | return 0, err
279 | }
280 | if isValue {
281 | total++
282 | } else {
283 | child, err := n.loadChild(pbLink)
284 | if err != nil {
285 | return 0, err
286 | }
287 | cl, err := child.length()
288 | if err != nil {
289 | return 0, err
290 | }
291 | total += cl
292 | }
293 | }
294 | n.cachedLength = total
295 | return total, nil
296 | }
297 |
298 | // Length returns the length of a list, or the number of entries in a map,
299 | // or -1 if the node is not of list nor map kind.
300 | func (n UnixFSHAMTShard) Length() int64 {
301 | len, err := n.length()
302 | if err != nil {
303 | return 0
304 | }
305 | return len
306 | }
307 |
308 | func (n UnixFSHAMTShard) IsAbsent() bool {
309 | return false
310 | }
311 |
312 | func (n UnixFSHAMTShard) IsNull() bool {
313 | return false
314 | }
315 |
316 | func (n UnixFSHAMTShard) AsBool() (bool, error) {
317 | return n._substrate.AsBool()
318 | }
319 |
320 | func (n UnixFSHAMTShard) AsInt() (int64, error) {
321 | return n._substrate.AsInt()
322 | }
323 |
324 | func (n UnixFSHAMTShard) AsFloat() (float64, error) {
325 | return n._substrate.AsFloat()
326 | }
327 |
328 | func (n UnixFSHAMTShard) AsString() (string, error) {
329 | return n._substrate.AsString()
330 | }
331 |
332 | func (n UnixFSHAMTShard) AsBytes() ([]byte, error) {
333 | return n._substrate.AsBytes()
334 | }
335 |
336 | func (n UnixFSHAMTShard) AsLink() (ipld.Link, error) {
337 | return n._substrate.AsLink()
338 | }
339 |
340 | func (n UnixFSHAMTShard) Prototype() ipld.NodePrototype {
341 | // TODO: should this return something?
342 | // probobly not until we write the write interfaces
343 | return nil
344 | }
345 |
346 | // satisfy schema.TypedNode
347 | func (UnixFSHAMTShard) Type() schema.Type {
348 | return nil /*TODO:typelit*/
349 | }
350 |
351 | func (n UnixFSHAMTShard) Representation() ipld.Node {
352 | return n._substrate.Representation()
353 | }
354 |
355 | // Native map accessors
356 |
357 | func (n UnixFSHAMTShard) Iterator() *iter.UnixFSDir__Itr {
358 | maxPadLen := maxPadLength(n.data)
359 | listItr := &_UnixFSShardedDir__ListItr{
360 | _substrate: n.FieldLinks().Iterator(),
361 | maxPadLen: maxPadLen,
362 | nd: n,
363 | }
364 | st := stringTransformer{maxPadLen: maxPadLen}
365 | return iter.NewUnixFSDirIterator(listItr, st.transformNameNode)
366 | }
367 |
368 | func (n UnixFSHAMTShard) Lookup(key dagpb.String) dagpb.Link {
369 | hv := &hashBits{b: hash([]byte(key.String()))}
370 | link, err := n.lookup(key.String(), hv)
371 | if err != nil {
372 | return nil
373 | }
374 | return link
375 | }
376 |
377 | // direct access to the links and data
378 |
379 | func (n UnixFSHAMTShard) FieldLinks() dagpb.PBLinks {
380 | return n._substrate.FieldLinks()
381 | }
382 |
383 | func (n UnixFSHAMTShard) FieldData() dagpb.MaybeBytes {
384 | return n._substrate.FieldData()
385 | }
386 |
387 | func (n UnixFSHAMTShard) getChildLink(childIndex int) (dagpb.PBLink, error) {
388 | linkIndex := n.bitfield.OnesBefore(childIndex)
389 | if linkIndex >= int(n.FieldLinks().Length()) || linkIndex < 0 {
390 | return nil, ErrInvalidChildIndex
391 | }
392 | return n.FieldLinks().Lookup(int64(linkIndex)), nil
393 | }
394 |
395 | func (n UnixFSHAMTShard) hasChild(childIndex int) bool {
396 | return n.bitfield.Bit(childIndex)
397 | }
398 |
399 | type stringTransformer struct {
400 | maxPadLen int
401 | }
402 |
403 | func (s stringTransformer) transformNameNode(nd dagpb.String) dagpb.String {
404 | nb := dagpb.Type.String.NewBuilder()
405 | err := nb.AssignString(nd.String()[s.maxPadLen:])
406 | if err != nil {
407 | return nil
408 | }
409 | return nb.Build().(dagpb.String)
410 | }
411 |
--------------------------------------------------------------------------------
/hamt/shardeddir_test.go:
--------------------------------------------------------------------------------
1 | package hamt_test
2 |
3 | import (
4 | "bytes"
5 | "context"
6 | "fmt"
7 | "io"
8 | "math/rand"
9 | "os"
10 | "slices"
11 | "strings"
12 | "testing"
13 | "time"
14 |
15 | dag "github.com/ipfs/boxo/ipld/merkledag"
16 | mdtest "github.com/ipfs/boxo/ipld/merkledag/test"
17 | ft "github.com/ipfs/boxo/ipld/unixfs"
18 | legacy "github.com/ipfs/boxo/ipld/unixfs/hamt"
19 | format "github.com/ipfs/go-ipld-format"
20 | "github.com/ipfs/go-unixfsnode/hamt"
21 | "github.com/ipld/go-car/v2/storage"
22 | dagpb "github.com/ipld/go-codec-dagpb"
23 | "github.com/ipld/go-ipld-prime"
24 | "github.com/ipld/go-ipld-prime/fluent/qp"
25 | cidlink "github.com/ipld/go-ipld-prime/linking/cid"
26 | basicnode "github.com/ipld/go-ipld-prime/node/basic"
27 | "github.com/ipld/go-ipld-prime/schema"
28 | "github.com/stretchr/testify/require"
29 | )
30 |
31 | // For now these tests use legacy UnixFS HAMT builders until we finish a builder
32 | // in go-ipld-prime
33 | func shuffle(seed int64, arr []string) {
34 | r := rand.New(rand.NewSource(seed))
35 | for i := 0; i < len(arr); i++ {
36 | a := r.Intn(len(arr))
37 | b := r.Intn(len(arr))
38 | arr[a], arr[b] = arr[b], arr[a]
39 | }
40 | }
41 |
42 | func makeDir(ds format.DAGService, size int) ([]string, *legacy.Shard, error) {
43 | return makeDirWidth(ds, size, 256)
44 | }
45 |
46 | func makeDirWidth(ds format.DAGService, size, width int) ([]string, *legacy.Shard, error) {
47 | ctx := context.Background()
48 |
49 | s, err := legacy.NewShard(ds, width)
50 | if err != nil {
51 | return nil, nil, err
52 | }
53 |
54 | var dirs []string
55 | for i := 0; i < size; i++ {
56 | dirs = append(dirs, fmt.Sprintf("DIRNAME%d", i))
57 | }
58 |
59 | shuffle(time.Now().UnixNano(), dirs)
60 |
61 | for i := 0; i < len(dirs); i++ {
62 | nd := ft.EmptyDirNode()
63 | err := ds.Add(ctx, nd)
64 | if err != nil {
65 | return nil, nil, err
66 | }
67 | err = s.Set(ctx, dirs[i], nd)
68 | if err != nil {
69 | return nil, nil, err
70 | }
71 | }
72 |
73 | return dirs, s, nil
74 | }
75 |
76 | func assertLinksEqual(linksA []*format.Link, linksB []*format.Link) error {
77 | if len(linksA) != len(linksB) {
78 | return fmt.Errorf("links arrays are different sizes")
79 | }
80 |
81 | sortLinks(linksA)
82 | sortLinks(linksB)
83 | for i, a := range linksA {
84 | b := linksB[i]
85 | if a.Name != b.Name {
86 | return fmt.Errorf("links names mismatch")
87 | }
88 |
89 | if a.Cid.String() != b.Cid.String() {
90 | return fmt.Errorf("link hashes dont match")
91 | }
92 | }
93 |
94 | return nil
95 | }
96 |
97 | func sortLinks(links []*format.Link) {
98 | slices.SortStableFunc(links, func(a, b *format.Link) int {
99 | return strings.Compare(a.Name, b.Name)
100 | })
101 | }
102 |
103 | func mockDag() (format.DAGService, *ipld.LinkSystem) {
104 | bsrv := mdtest.Bserv()
105 | dsrv := dag.NewDAGService(bsrv)
106 | lsys := cidlink.DefaultLinkSystem()
107 | lsys.StorageReadOpener = func(lnkCtx ipld.LinkContext, lnk ipld.Link) (io.Reader, error) {
108 | cidLink, ok := lnk.(cidlink.Link)
109 | if !ok {
110 | return nil, fmt.Errorf("invalid link type for loading: %v", lnk)
111 | }
112 |
113 | blk, err := bsrv.GetBlock(lnkCtx.Ctx, cidLink.Cid)
114 | if err != nil {
115 | return nil, err
116 | }
117 |
118 | return bytes.NewReader(blk.RawData()), nil
119 | }
120 | lsys.TrustedStorage = true
121 | return dsrv, &lsys
122 | }
123 |
124 | func TestBasicSet(t *testing.T) {
125 | ds, lsys := mockDag()
126 | for _, w := range []int{128, 256, 512, 1024} {
127 | t.Run(fmt.Sprintf("BasicSet%d", w), func(t *testing.T) {
128 | names, s, err := makeDirWidth(ds, 1000, w)
129 | require.NoError(t, err)
130 | ctx := context.Background()
131 | legacyNode, err := s.Node()
132 | require.NoError(t, err)
133 | nd, err := lsys.Load(ipld.LinkContext{Ctx: ctx}, cidlink.Link{Cid: legacyNode.Cid()}, dagpb.Type.PBNode)
134 | require.NoError(t, err)
135 | hamtShard, err := hamt.AttemptHAMTShardFromNode(ctx, nd, lsys)
136 | require.NoError(t, err)
137 | for _, d := range names {
138 | _, err := hamtShard.LookupByString(d)
139 | require.NoError(t, err)
140 | }
141 | })
142 | }
143 | }
144 |
145 | func TestIterator(t *testing.T) {
146 | ds, lsys := mockDag()
147 | _, s, err := makeDir(ds, 300)
148 | if err != nil {
149 | t.Fatal(err)
150 | }
151 | ctx := context.Background()
152 |
153 | legacyNode, err := s.Node()
154 | require.NoError(t, err)
155 | nds, err := legacy.NewHamtFromDag(ds, legacyNode)
156 | require.NoError(t, err)
157 | nd, err := lsys.Load(ipld.LinkContext{Ctx: ctx}, cidlink.Link{Cid: legacyNode.Cid()}, dagpb.Type.PBNode)
158 | require.NoError(t, err)
159 | hamtShard, err := hamt.AttemptHAMTShardFromNode(ctx, nd, lsys)
160 | require.NoError(t, err)
161 |
162 | linksA, err := nds.EnumLinks(ctx)
163 | require.NoError(t, err)
164 |
165 | require.Equal(t, int64(len(linksA)), hamtShard.Length())
166 |
167 | linksB := make([]*format.Link, 0, len(linksA))
168 | iter := hamtShard.Iterator()
169 | for !iter.Done() {
170 | name, link := iter.Next()
171 | linksB = append(linksB, &format.Link{
172 | Name: name.String(),
173 | Cid: link.Link().(cidlink.Link).Cid,
174 | })
175 | }
176 | require.NoError(t, assertLinksEqual(linksA, linksB))
177 | }
178 |
179 | func TestLoadFailsFromNonShard(t *testing.T) {
180 | ds, lsys := mockDag()
181 | ctx := context.Background()
182 | legacyNode := ft.EmptyDirNode()
183 | ds.Add(ctx, legacyNode)
184 | nd, err := lsys.Load(ipld.LinkContext{Ctx: ctx}, cidlink.Link{Cid: legacyNode.Cid()}, dagpb.Type.PBNode)
185 | require.NoError(t, err)
186 | _, err = hamt.AttemptHAMTShardFromNode(ctx, nd, lsys)
187 | require.Error(t, err)
188 |
189 | // empty protobuf w/o data
190 | nd, err = qp.BuildMap(dagpb.Type.PBNode, -1, func(ma ipld.MapAssembler) {
191 | qp.MapEntry(ma, "Links", qp.List(-1, func(ipld.ListAssembler) {}))
192 | })
193 | require.NoError(t, err)
194 |
195 | _, err = hamt.AttemptHAMTShardFromNode(ctx, nd, lsys)
196 | require.Error(t, err)
197 | }
198 |
199 | func TestFindNonExisting(t *testing.T) {
200 | ds, lsys := mockDag()
201 | _, s, err := makeDir(ds, 100)
202 | if err != nil {
203 | t.Fatal(err)
204 | }
205 | ctx := context.Background()
206 | legacyNode, err := s.Node()
207 | require.NoError(t, err)
208 | nd, err := lsys.Load(ipld.LinkContext{Ctx: ctx}, cidlink.Link{Cid: legacyNode.Cid()}, dagpb.Type.PBNode)
209 | require.NoError(t, err)
210 | hamtShard, err := hamt.AttemptHAMTShardFromNode(ctx, nd, lsys)
211 | require.NoError(t, err)
212 | for i := 0; i < 200; i++ {
213 | key := fmt.Sprintf("notfound%d", i)
214 | _, err := hamtShard.LookupByString(key)
215 | require.EqualError(t, err, schema.ErrNoSuchField{Field: ipld.PathSegmentOfString(key)}.Error())
216 | }
217 | }
218 |
219 | func TestIncompleteShardedIteration(t *testing.T) {
220 | ctx := context.Background()
221 | req := require.New(t)
222 |
223 | fixture := "./fixtures/wikipedia-cryptographic-hash-function.car"
224 | f, err := os.Open(fixture)
225 | req.NoError(err)
226 | defer f.Close()
227 | carstore, err := storage.OpenReadable(f)
228 | req.NoError(err)
229 | lsys := cidlink.DefaultLinkSystem()
230 | lsys.TrustedStorage = true
231 | lsys.SetReadStorage(carstore)
232 |
233 | // classic recursive go-ipld-prime map iteration, being forgiving about
234 | // NotFound block loads to see what we end up with
235 |
236 | kvs := make(map[string]string)
237 | var iterNotFound int
238 | blockNotFound := make(map[string]struct{})
239 |
240 | var iter func(string, ipld.Link)
241 | iter = func(dir string, lnk ipld.Link) {
242 | nd, err := lsys.Load(ipld.LinkContext{Ctx: ctx}, lnk, basicnode.Prototype.Any)
243 | if nf, ok := err.(interface{ NotFound() bool }); ok && nf.NotFound() {
244 | // got a named link that we can't load
245 | blockNotFound[dir] = struct{}{}
246 | return
247 | }
248 | req.NoError(err)
249 | if nd.Kind() == ipld.Kind_Bytes {
250 | bv, err := nd.AsBytes()
251 | req.NoError(err)
252 | kvs[dir] = string(bv)
253 | return
254 | }
255 |
256 | nb := dagpb.Type.PBNode.NewBuilder()
257 | req.NoError(nb.AssignNode(nd))
258 | pbn := nb.Build()
259 | hamtShard, err := hamt.AttemptHAMTShardFromNode(ctx, pbn, &lsys)
260 | req.NoError(err)
261 |
262 | mi := hamtShard.MapIterator()
263 | for !mi.Done() {
264 | k, v, err := mi.Next()
265 | if nf, ok := err.(interface{ NotFound() bool }); ok && nf.NotFound() {
266 | // internal shard link that won't load, we don't know what it might
267 | // point to
268 | iterNotFound++
269 | continue
270 | }
271 | req.NoError(err)
272 | ks, err := k.AsString()
273 | req.NoError(err)
274 | req.Equal(ipld.Kind_Link, v.Kind())
275 | lv, err := v.AsLink()
276 | req.NoError(err)
277 | iter(dir+"/"+ks, lv)
278 | }
279 | }
280 | // walk the tree
281 | iter("", cidlink.Link{Cid: carstore.Roots()[0]})
282 |
283 | req.Len(kvs, 1)
284 | req.Contains(kvs, "/wiki/Cryptographic_hash_function")
285 | req.Contains(kvs["/wiki/Cryptographic_hash_function"], "Cryptographic hash function\n")
286 | req.Equal(iterNotFound, 570) // tried to load 570 blocks that were not in the CAR
287 | req.Len(blockNotFound, 110) // 110 blocks, for named links, were not found in the CAR
288 | // some of the root block links
289 | req.Contains(blockNotFound, "/favicon.ico")
290 | req.Contains(blockNotFound, "/index.html")
291 | req.Contains(blockNotFound, "/zimdump_version")
292 | // some of the shard links
293 | req.Contains(blockNotFound, "/wiki/UK_railway_Signal")
294 | req.Contains(blockNotFound, "/wiki/Australian_House")
295 | req.Contains(blockNotFound, "/wiki/ICloud_Drive")
296 | req.Contains(blockNotFound, "/wiki/Édouard_Bamberger")
297 | }
298 |
--------------------------------------------------------------------------------
/hamt/util.go:
--------------------------------------------------------------------------------
1 | package hamt
2 |
3 | // adapted from https://github.com/ipfs/go-unixfs/blob/master/hamt/util.go
4 |
5 | import (
6 | "fmt"
7 |
8 | "math/bits"
9 |
10 | bitfield "github.com/ipfs/go-bitfield"
11 | "github.com/ipfs/go-unixfsnode/data"
12 | dagpb "github.com/ipld/go-codec-dagpb"
13 | "github.com/spaolacci/murmur3"
14 | )
15 |
16 | // hashBits is a helper that allows the reading of the 'next n bits' as an integer.
17 | type hashBits struct {
18 | b []byte
19 | consumed int
20 | }
21 |
22 | func mkmask(n int) byte {
23 | return (1 << uint(n)) - 1
24 | }
25 |
26 | // Next returns the next 'i' bits of the hashBits value as an integer, or an
27 | // error if there aren't enough bits.
28 | func (hb *hashBits) Next(i int) (int, error) {
29 | if hb.consumed+i > len(hb.b)*8 {
30 | return 0, ErrHAMTTooDeep
31 | }
32 | return hb.next(i), nil
33 | }
34 |
35 | func (hb *hashBits) next(i int) int {
36 | curbi := hb.consumed / 8
37 | leftb := 8 - (hb.consumed % 8)
38 |
39 | curb := hb.b[curbi]
40 | if i == leftb {
41 | out := int(mkmask(i) & curb)
42 | hb.consumed += i
43 | return out
44 | }
45 | if i < leftb {
46 | a := curb & mkmask(leftb) // mask out the high bits we don't want
47 | b := a & ^mkmask(leftb-i) // mask out the low bits we don't want
48 | c := b >> uint(leftb-i) // shift whats left down
49 | hb.consumed += i
50 | return int(c)
51 | }
52 | out := int(mkmask(leftb) & curb)
53 | out <<= uint(i - leftb)
54 | hb.consumed += leftb
55 | out += hb.next(i - leftb)
56 | return out
57 |
58 | }
59 |
60 | func validateHAMTData(nd data.UnixFSData) error {
61 | if nd.FieldDataType().Int() != data.Data_HAMTShard {
62 | return data.ErrWrongNodeType{Expected: data.Data_HAMTShard, Actual: nd.FieldDataType().Int()}
63 | }
64 |
65 | if !nd.FieldHashType().Exists() || uint64(nd.FieldHashType().Must().Int()) != HashMurmur3 {
66 | return ErrInvalidHashType
67 | }
68 |
69 | if !nd.FieldData().Exists() {
70 | return ErrNoDataField
71 | }
72 |
73 | if !nd.FieldFanout().Exists() {
74 | return ErrNoFanoutField
75 | }
76 | if err := checkLogTwo(int(nd.FieldFanout().Must().Int())); err != nil {
77 | return err
78 | }
79 |
80 | return nil
81 | }
82 |
83 | func log2Size(nd data.UnixFSData) int {
84 | return bits.TrailingZeros(uint(nd.FieldFanout().Must().Int()))
85 | }
86 |
87 | func maxPadLength(nd data.UnixFSData) int {
88 | return len(fmt.Sprintf("%X", nd.FieldFanout().Must().Int()-1))
89 | }
90 |
91 | const maximumHamtWidth = 1 << 10
92 |
93 | func bitField(nd data.UnixFSData) (bitfield.Bitfield, error) {
94 | fanout := int(nd.FieldFanout().Must().Int())
95 | if fanout > maximumHamtWidth {
96 | return nil, fmt.Errorf("hamt witdh (%d) exceed maximum allowed (%d)", fanout, maximumHamtWidth)
97 | }
98 | bf, err := bitfield.NewBitfield(fanout)
99 | if err != nil {
100 | return nil, err
101 | }
102 | bf.SetBytes(nd.FieldData().Must().Bytes())
103 | return bf, nil
104 | }
105 |
106 | func checkLogTwo(v int) error {
107 | if v <= 0 {
108 | return ErrHAMTSizeInvalid
109 | }
110 | lg2 := bits.TrailingZeros(uint(v))
111 | if 1< 0 {
135 | // Wrap selector in ExploreFields as we walk back up through the path.
136 | // We can assume each segment to be a unixfs path section, so we
137 | // InterpretAs to make sure the node is reified through go-unixfsnode
138 | // (if possible) and we can traverse through according to unixfs pathing
139 | // rather than bare IPLD pathing - which also gives us the ability to
140 | // traverse through HAMT shards.
141 | ss = ssb.ExploreInterpretAs("unixfs", ssb.ExploreFields(
142 | func(efsb builder.ExploreFieldsSpecBuilder) {
143 | efsb.Insert(segments.Last().String(), ss)
144 | },
145 | ))
146 | if matchPath {
147 | ss = ssb.ExploreUnion(ssb.Matcher(), ss)
148 | }
149 | segments = segments.Pop()
150 | }
151 |
152 | return ss.Node()
153 | }
154 |
155 | func specBuilder(b func(ssb builder.SelectorSpecBuilder) builder.SelectorSpec) builder.SelectorSpec {
156 | return b(builder.NewSelectorSpecBuilder(basicnode.Prototype.Any))
157 | }
158 |
--------------------------------------------------------------------------------
/signalling_test.go:
--------------------------------------------------------------------------------
1 | package unixfsnode_test
2 |
3 | import (
4 | "fmt"
5 | "strings"
6 | "testing"
7 |
8 | "github.com/ipfs/go-unixfsnode"
9 | "github.com/ipld/go-ipld-prime"
10 | "github.com/ipld/go-ipld-prime/codec/dagjson"
11 | "github.com/ipld/go-ipld-prime/traversal/selector/builder"
12 | selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse"
13 | "github.com/stretchr/testify/require"
14 | )
15 |
16 | // Selectors are tested against JSON expected forms; this doesn't necessarily
17 | // validate that they work as advertised. It's just a sanity check that the
18 | // selectors are being built as expected.
19 |
20 | var exploreAllJson = mustDagJson(selectorparse.CommonSelector_ExploreAllRecursively)
21 |
22 | // explore interpret-as (~), next (>), match (.), interpreted as unixfs-preload
23 | var matchUnixfsPreloadJson = `{"~":{">":{".":{}},"as":"unixfs-preload"}}`
24 |
25 | // explore interpret-as (~), next (>), union (|) of match (.) and explore recursive (R) edge (@) with a depth of 1, interpreted as unixfs
26 | var matchUnixfsEntityJson = `{"~":{">":{"|":[{".":{}},{"R":{":>":{"a":{">":{"@":{}}}},"l":{"depth":1}}}]},"as":"unixfs"}}`
27 |
28 | // match interpret-as (~), next (>), match (.), interpreted as unixfs
29 | var matchUnixfsJson = `{"~":{">":{".":{}},"as":"unixfs"}}`
30 |
31 | func TestUnixFSPathSelector(t *testing.T) {
32 | testCases := []struct {
33 | name string
34 | path string
35 | expextedSelector string
36 | }{
37 | {
38 | name: "empty path",
39 | path: "",
40 | expextedSelector: matchUnixfsJson,
41 | },
42 | {
43 | name: "single field",
44 | path: "/foo",
45 | expextedSelector: jsonFields(matchUnixfsJson, "foo"),
46 | },
47 | {
48 | name: "multiple fields",
49 | path: "/foo/bar",
50 | expextedSelector: jsonFields(matchUnixfsJson, "foo", "bar"),
51 | },
52 | {
53 | name: "leading slash optional",
54 | path: "foo/bar",
55 | expextedSelector: jsonFields(matchUnixfsJson, "foo", "bar"),
56 | },
57 | {
58 | name: "trailing slash optional",
59 | path: "/foo/bar/",
60 | expextedSelector: jsonFields(matchUnixfsJson, "foo", "bar"),
61 | },
62 | {
63 | // a go-ipld-prime specific thing, not clearly specified by path spec (?)
64 | name: ".. is a field named ..",
65 | path: "/foo/../bar/",
66 | expextedSelector: jsonFields(matchUnixfsJson, "foo", "..", "bar"),
67 | },
68 | {
69 | // a go-ipld-prime specific thing, not clearly specified by path spec
70 | name: "redundant slashes ignored",
71 | path: "foo///bar",
72 | expextedSelector: jsonFields(matchUnixfsJson, "foo", "bar"),
73 | },
74 | }
75 |
76 | for _, tc := range testCases {
77 | t.Run(tc.name, func(t *testing.T) {
78 | sel := unixfsnode.UnixFSPathSelector(tc.path)
79 | require.Equal(t, tc.expextedSelector, mustDagJson(sel))
80 | })
81 | }
82 | }
83 |
84 | func TestUnixFSPathSelectorBuilder(t *testing.T) {
85 | testCases := []struct {
86 | name string
87 | path string
88 | target builder.SelectorSpec
89 | matchPath bool
90 | expextedSelector string
91 | }{
92 | {
93 | name: "empty path",
94 | path: "",
95 | target: unixfsnode.ExploreAllRecursivelySelector,
96 | expextedSelector: exploreAllJson,
97 | },
98 | {
99 | name: "empty path shallow (preload)",
100 | path: "",
101 | target: unixfsnode.MatchUnixFSPreloadSelector,
102 | expextedSelector: matchUnixfsPreloadJson,
103 | },
104 | {
105 | name: "empty path shallow (entity)",
106 | path: "",
107 | target: unixfsnode.MatchUnixFSEntitySelector,
108 | expextedSelector: matchUnixfsEntityJson,
109 | },
110 | {
111 | name: "single field",
112 | path: "/foo",
113 | expextedSelector: jsonFields(exploreAllJson, "foo"),
114 | target: unixfsnode.ExploreAllRecursivelySelector,
115 | },
116 | {
117 | name: "single field, match path",
118 | path: "/foo",
119 | expextedSelector: jsonFieldsMatchPoint(exploreAllJson, "foo"),
120 | target: unixfsnode.ExploreAllRecursivelySelector,
121 | matchPath: true,
122 | },
123 | {
124 | name: "single field shallow (preload)",
125 | path: "/foo",
126 | expextedSelector: jsonFields(matchUnixfsPreloadJson, "foo"),
127 | target: unixfsnode.MatchUnixFSPreloadSelector,
128 | },
129 | {
130 | name: "single field shallow (entity)",
131 | path: "/foo",
132 | expextedSelector: jsonFields(matchUnixfsEntityJson, "foo"),
133 | target: unixfsnode.MatchUnixFSEntitySelector,
134 | },
135 | {
136 | name: "multiple fields",
137 | path: "/foo/bar",
138 | expextedSelector: jsonFields(exploreAllJson, "foo", "bar"),
139 | target: unixfsnode.ExploreAllRecursivelySelector,
140 | },
141 | {
142 | name: "multiple fields, match path",
143 | path: "/foo/bar",
144 | expextedSelector: jsonFieldsMatchPoint(exploreAllJson, "foo", "bar"),
145 | target: unixfsnode.ExploreAllRecursivelySelector,
146 | matchPath: true,
147 | },
148 | {
149 | name: "multiple fields shallow",
150 | path: "/foo/bar",
151 | expextedSelector: jsonFields(matchUnixfsPreloadJson, "foo", "bar"),
152 | target: unixfsnode.MatchUnixFSPreloadSelector,
153 | },
154 | {
155 | name: "leading slash optional",
156 | path: "foo/bar",
157 | expextedSelector: jsonFields(exploreAllJson, "foo", "bar"),
158 | target: unixfsnode.ExploreAllRecursivelySelector,
159 | },
160 | {
161 | name: "trailing slash optional",
162 | path: "/foo/bar/",
163 | expextedSelector: jsonFields(exploreAllJson, "foo", "bar"),
164 | target: unixfsnode.ExploreAllRecursivelySelector,
165 | },
166 | // a go-ipld-prime specific thing, not clearly specified by path spec (?)
167 | {
168 | name: ".. is a field named ..",
169 | path: "/foo/../bar/",
170 | expextedSelector: jsonFields(exploreAllJson, "foo", "..", "bar"),
171 | target: unixfsnode.ExploreAllRecursivelySelector,
172 | },
173 | {
174 | // a go-ipld-prime specific thing, not clearly specified by path spec
175 | name: "redundant slashes ignored",
176 | path: "foo///bar",
177 | expextedSelector: jsonFields(exploreAllJson, "foo", "bar"),
178 | target: unixfsnode.ExploreAllRecursivelySelector,
179 | },
180 | }
181 |
182 | for _, tc := range testCases {
183 | t.Run(tc.name, func(t *testing.T) {
184 | sel := unixfsnode.UnixFSPathSelectorBuilder(tc.path, tc.target, tc.matchPath)
185 | require.Equal(t, tc.expextedSelector, mustDagJson(sel))
186 | })
187 | }
188 | }
189 |
190 | func jsonFields(target string, fields ...string) string {
191 | var sb strings.Builder
192 | for _, n := range fields {
193 | // explore interpret-as (~) next (>), explore field (f) + specific field (f>), with field name
194 | sb.WriteString(fmt.Sprintf(`{"~":{">":{"f":{"f>":{"%s":`, n))
195 | }
196 | sb.WriteString(target)
197 | sb.WriteString(strings.Repeat(`}}},"as":"unixfs"}}`, len(fields)))
198 | return sb.String()
199 | }
200 |
201 | func jsonFieldsMatchPoint(target string, fields ...string) string {
202 | var sb strings.Builder
203 | for _, n := range fields {
204 | // union (|) of match (.) and explore interpret-as (~) next (>), explore field (f) + specific field (f>), with field name
205 | sb.WriteString(fmt.Sprintf(`{"|":[{".":{}},{"~":{">":{"f":{"f>":{"%s":`, n))
206 | }
207 | sb.WriteString(target)
208 | sb.WriteString(strings.Repeat(`}}},"as":"unixfs"}}]}`, len(fields)))
209 | return sb.String()
210 | }
211 |
212 | func mustDagJson(n ipld.Node) string {
213 | byts, err := ipld.Encode(n, dagjson.Encode)
214 | if err != nil {
215 | panic(err)
216 | }
217 | return string(byts)
218 | }
219 |
--------------------------------------------------------------------------------
/test/doc.go:
--------------------------------------------------------------------------------
1 | // Package test provides ADL testing of the ipld specification around
2 | // * traversal making use of match subsets
3 | // * largeByteNode readers
4 | package test
5 |
--------------------------------------------------------------------------------
/test/partial_file_access_test.go:
--------------------------------------------------------------------------------
1 | package test
2 |
3 | import (
4 | "bytes"
5 | "context"
6 | "io"
7 | "testing"
8 |
9 | "github.com/ipfs/go-test/random"
10 | "github.com/ipfs/go-unixfsnode/data/builder"
11 | "github.com/ipfs/go-unixfsnode/file"
12 | dagpb "github.com/ipld/go-codec-dagpb"
13 | "github.com/ipld/go-ipld-prime"
14 | "github.com/ipld/go-ipld-prime/datamodel"
15 | "github.com/ipld/go-ipld-prime/linking"
16 | cidlink "github.com/ipld/go-ipld-prime/linking/cid"
17 | basicnode "github.com/ipld/go-ipld-prime/node/basic"
18 | "github.com/ipld/go-ipld-prime/traversal"
19 | sb "github.com/ipld/go-ipld-prime/traversal/selector/builder"
20 | )
21 |
22 | func TestPartialFileAccess(t *testing.T) {
23 | buf := make([]byte, 10*1024*1024)
24 | random.NewSeededRand(0xdeadbeef).Read(buf)
25 | r := bytes.NewReader(buf)
26 |
27 | ls := cidlink.DefaultLinkSystem()
28 | storage := cidlink.Memory{}
29 | ls.StorageReadOpener = storage.OpenRead
30 | ls.StorageWriteOpener = storage.OpenWrite
31 |
32 | f, _, err := builder.BuildUnixFSFile(r, "", &ls)
33 | if err != nil {
34 | t.Fatal(err)
35 | }
36 |
37 | // get back the root node substrate from the link at the top of the builder.
38 | fr, err := ls.Load(ipld.LinkContext{}, f, dagpb.Type.PBNode)
39 | if err != nil {
40 | t.Fatal(err)
41 | }
42 |
43 | ufn, err := file.NewUnixFSFile(context.Background(), fr, &ls)
44 | if err != nil {
45 | t.Fatal(err)
46 | }
47 |
48 | openedLinks := []ipld.Link{}
49 | ls.StorageReadOpener = func(lc linking.LinkContext, l datamodel.Link) (io.Reader, error) {
50 | openedLinks = append(openedLinks, l)
51 | return storage.OpenRead(lc, l)
52 | }
53 |
54 | // read back out the file.
55 | out, err := ufn.AsBytes()
56 | if err != nil {
57 | t.Fatal(err)
58 | }
59 | if !bytes.Equal(out, buf) {
60 | t.Fatal("Not equal")
61 | }
62 |
63 | fullLen := len(openedLinks)
64 |
65 | openedLinks = []ipld.Link{}
66 |
67 | partial, err := ufn.(datamodel.LargeBytesNode).AsLargeBytes()
68 | if err != nil {
69 | t.Fatal(err)
70 | }
71 | half := make([]byte, len(buf)/2)
72 | if _, err := partial.Read(half); err != nil {
73 | t.Fatal(err)
74 | }
75 | if len(openedLinks) >= fullLen {
76 | t.Fatal("should not have accessed full file on a partial read.")
77 | }
78 |
79 | openedLinks = []ipld.Link{}
80 |
81 | prog := traversal.Progress{
82 | Cfg: &traversal.Config{
83 | LinkSystem: ls,
84 | },
85 | }
86 | sb := sb.NewSelectorSpecBuilder(basicnode.Prototype.Any)
87 | ss := sb.MatcherSubset(5*1024*1024, 6*1024*1024)
88 | sel, err := ss.Selector()
89 | if err != nil {
90 | t.Fatal(err)
91 | }
92 |
93 | if err := prog.WalkMatching(ufn, sel, func(_ traversal.Progress, n datamodel.Node) error {
94 | b, err := n.AsBytes()
95 | if err != nil {
96 | t.Fatal(err)
97 | }
98 | if len(b) != 1024*1024 {
99 | t.Fatalf("wrong length: %d", len(b))
100 | }
101 | return nil
102 | }); err != nil {
103 | t.Fatal(err)
104 | }
105 | if len(openedLinks) >= fullLen {
106 | t.Fatal("should not have accessed full file on a partial traversal.")
107 | }
108 | }
109 |
--------------------------------------------------------------------------------
/testutil/directory.go:
--------------------------------------------------------------------------------
1 | package testutil
2 |
3 | import (
4 | "context"
5 | "crypto/sha256"
6 | "encoding/hex"
7 | "fmt"
8 | "io"
9 | "testing"
10 |
11 | "github.com/ipfs/go-cid"
12 | dagpb "github.com/ipld/go-codec-dagpb"
13 | "github.com/ipld/go-ipld-prime"
14 | "github.com/ipld/go-ipld-prime/datamodel"
15 | "github.com/ipld/go-ipld-prime/linking"
16 | cidlink "github.com/ipld/go-ipld-prime/linking/cid"
17 | "github.com/ipld/go-ipld-prime/node/basicnode"
18 | "github.com/ipld/go-ipld-prime/traversal"
19 | "github.com/stretchr/testify/require"
20 | )
21 |
22 | // DirEntry represents a flattened directory entry, where Path is from the
23 | // root of the directory and Content is the file contents. It is intended
24 | // that a DirEntry slice can be used to represent a full-depth directory without
25 | // needing nesting.
26 | type DirEntry struct {
27 | Path string
28 | Content []byte
29 | Root cid.Cid
30 | SelfCids []cid.Cid
31 | TSize uint64
32 | Children []DirEntry
33 | }
34 |
35 | func (de DirEntry) Size() (int64, error) {
36 | return int64(de.TSize), nil
37 | }
38 |
39 | func (de DirEntry) Link() ipld.Link {
40 | return cidlink.Link{Cid: de.Root}
41 | }
42 |
43 | // ToDirEntry takes a LinkSystem containing UnixFS data and builds a DirEntry
44 | // tree representing the file and directory structure it finds starting at the
45 | // rootCid. If expectFull is true, it will error if it encounters a UnixFS
46 | // node that it cannot fully load. If expectFull is false, it will ignore
47 | // errors and return nil for any node it cannot load.
48 | func ToDirEntry(t *testing.T, linkSys linking.LinkSystem, rootCid cid.Cid, expectFull bool) DirEntry {
49 | return ToDirEntryFrom(t, linkSys, rootCid, "", expectFull)
50 | }
51 |
52 | // ToDirEntryFrom is the same as ToDirEntry but allows specifying a rootPath
53 | // such that the resulting DirEntry tree will all have that path as a prefix.
54 | // This is useful when representing a sub-DAG of a larger DAG where you want
55 | // to make direct comparisons.
56 | func ToDirEntryFrom(t *testing.T, linkSys linking.LinkSystem, rootCid cid.Cid, rootPath string, expectFull bool) DirEntry {
57 | var proto datamodel.NodePrototype = dagpb.Type.PBNode
58 | isDagPb := rootCid.Prefix().Codec == cid.DagProtobuf
59 | if !isDagPb {
60 | proto = basicnode.Prototype.Any
61 | }
62 | node, err := linkSys.Load(linking.LinkContext{Ctx: context.TODO()}, cidlink.Link{Cid: rootCid}, proto)
63 | if expectFull {
64 | require.NoError(t, err)
65 | } else if err != nil {
66 | if e, ok := err.(interface{ NotFound() bool }); ok && e.NotFound() {
67 | return DirEntry{}
68 | }
69 | require.NoError(t, err)
70 | }
71 |
72 | if node.Kind() == ipld.Kind_Bytes { // is a file
73 | byts, err := node.AsBytes()
74 | require.NoError(t, err)
75 | return DirEntry{
76 | Path: rootPath,
77 | Content: byts,
78 | Root: rootCid,
79 | }
80 | }
81 |
82 | children := make([]DirEntry, 0)
83 | if isDagPb {
84 | // else is likely a directory
85 | for itr := node.MapIterator(); !itr.Done(); {
86 | k, v, err := itr.Next()
87 | require.NoError(t, err)
88 | childName, err := k.AsString()
89 | require.NoError(t, err)
90 | childLink, err := v.AsLink()
91 | require.NoError(t, err)
92 | child := ToDirEntryFrom(t, linkSys, childLink.(cidlink.Link).Cid, rootPath+"/"+childName, expectFull)
93 | children = append(children, child)
94 | }
95 | } else {
96 | // not a dag-pb node, let's pretend it is but using IPLD pathing rules
97 | err := traversal.WalkLocal(node, func(prog traversal.Progress, n ipld.Node) error {
98 | if n.Kind() == ipld.Kind_Link {
99 | l, err := n.AsLink()
100 | if err != nil {
101 | return err
102 | }
103 | child := ToDirEntryFrom(t, linkSys, l.(cidlink.Link).Cid, rootPath+"/"+prog.Path.String(), expectFull)
104 | children = append(children, child)
105 | }
106 | return nil
107 | })
108 | require.NoError(t, err)
109 | }
110 |
111 | return DirEntry{
112 | Path: rootPath,
113 | Root: rootCid,
114 | Children: children,
115 | }
116 | }
117 |
118 | // CompareDirEntries is a safe, recursive comparison between two DirEntry
119 | // values. It doesn't strictly require child ordering to match, but it does
120 | // require that all children exist and match, in some order.
121 | func CompareDirEntries(t *testing.T, a, b DirEntry) {
122 | // t.Log("CompareDirEntries", a.Path, b.Path) // TODO: remove this
123 | require.Equal(t, a.Path, b.Path)
124 | require.Equal(t, a.Root.String(), b.Root.String(), a.Path+" root mismatch")
125 | hashA := sha256.Sum256(a.Content)
126 | hashB := sha256.Sum256(b.Content)
127 | require.Equal(t, hex.EncodeToString(hashA[:]), hex.EncodeToString(hashB[:]), a.Path+"content hash mismatch")
128 | require.Equal(t, len(a.Children), len(b.Children), fmt.Sprintf("%s child length mismatch %d <> %d", a.Path, len(a.Children), len(b.Children)))
129 | for i := range a.Children {
130 | // not necessarily in order
131 | var found bool
132 | for j := range b.Children {
133 | if a.Children[i].Path == b.Children[j].Path {
134 | found = true
135 | CompareDirEntries(t, a.Children[i], b.Children[j])
136 | }
137 | }
138 | require.True(t, found, fmt.Sprintf("@ path [%s], a's child [%s] not found in b", a.Path, a.Children[i].Path))
139 | }
140 | }
141 |
142 | // WrapContent embeds the content we want in some random nested content such
143 | // that it's fetchable under the provided path. If exclusive is true, the
144 | // content will be the only thing under the path. If false, there will be
145 | // content before and after the wrapped content at each point in the path.
146 | func WrapContent(t *testing.T, rndReader io.Reader, lsys *ipld.LinkSystem, content DirEntry, wrapPath string, exclusive bool) DirEntry {
147 | want := content
148 | ps := datamodel.ParsePath(wrapPath)
149 | for ps.Len() > 0 {
150 | de := []DirEntry{}
151 | if !exclusive {
152 | before := GenerateDirectory(t, lsys, rndReader, 4<<10, false)
153 | before.Path = "!before"
154 | de = append(de, before)
155 | }
156 | want.Path = ps.Last().String()
157 | de = append(de, want)
158 | if !exclusive {
159 | after := GenerateDirectory(t, lsys, rndReader, 4<<11, true)
160 | after.Path = "~after"
161 | de = append(de, after)
162 | }
163 | want = BuildDirectory(t, lsys, de, false)
164 | ps = ps.Pop()
165 | }
166 | return want
167 | }
168 |
--------------------------------------------------------------------------------
/testutil/doc.go:
--------------------------------------------------------------------------------
1 | // Package testutil provides utilities for writing tests that require
2 | // nontrivial UnixFS data of various forms
3 | package testutil
4 |
--------------------------------------------------------------------------------
/testutil/namegen/namegen.go:
--------------------------------------------------------------------------------
1 | package namegen
2 |
3 | import (
4 | "encoding/binary"
5 | "io"
6 | "strings"
7 | )
8 |
9 | var words = strings.Fields(wordData)
10 | var extensions = []string{"", ".txt", ".pdf", ".docx", ".png", ".jpg", ".csv", ".json", ".xml"}
11 |
12 | func getRandomIndex(r io.Reader, max int) (int, error) {
13 | var n uint32
14 | err := binary.Read(r, binary.BigEndian, &n)
15 | if err != nil {
16 | return 0, err
17 | }
18 | return int(n % uint32(max)), nil
19 | }
20 |
21 | // RandomDirectoryName returns a random directory name from the provided word list.
22 | func RandomDirectoryName(r io.Reader) (string, error) {
23 | index, err := getRandomIndex(r, len(words))
24 | if err != nil {
25 | return "", err
26 | }
27 | return words[index], nil
28 | }
29 |
30 | // RandomFileName returns a random file name with an extension from the provided word list and common extensions.
31 | func RandomFileName(r io.Reader) (string, error) {
32 | wordIndex, err := getRandomIndex(r, len(words))
33 | if err != nil {
34 | return "", err
35 | }
36 | ext, err := RandomFileExtension(r)
37 | if err != nil {
38 | return "", err
39 | }
40 | return words[wordIndex] + ext, nil
41 | }
42 |
43 | // RandomFileExtension returns a random file extension, including '.'. This may
44 | // also return an empty string.
45 | func RandomFileExtension(r io.Reader) (string, error) {
46 | index, err := getRandomIndex(r, len(extensions))
47 | if err != nil {
48 | return "", err
49 | }
50 | return extensions[index], nil
51 | }
52 |
53 | const wordData = `jabberwocky Snark whiffling borogoves mome raths brillig slithy toves outgrabe
54 | Tumtum Frabjous Bandersnatch Jubjub Callay slumgullion snicker-snack brobdingnagian Jabberwock
55 | tree Poglorian Binkleborf Wockbristle Zizzotether dinglewock Flumgurgle Glimperwick RazzleDazzle8
56 | gyre tortlewhack whispyfangle Crumplehorn Higgledy7 Piggledy3 flibberwocky Zamborot Flizzleflink
57 | gimble Shakespearean Macbeth Othello Hamlet soliloquy iambic pentameter Benvolio Capulet Montague
58 | Puck Malvolio Beatrice Prospero Iago Falstaff Rosencrantz Guildenstern Cordelia Polonius
59 | Titania Oberon Tybalt Caliban Mercutio Portia Brabantio 4Lear Desdemona Lysander
60 | YossarianScar Jujimufu9 Gorgulon Oozyboozle Razzmatazz8 BlinkenWoggle Flibbertigibbet Quixotic2
61 | Galumphing Widdershins Pecksniffian Bandicoot11 Flapdoodle Fandango Whippersnapper Grandiloquent
62 | Lollygag Persnickety Gibberish Codswallop Rigmarole Nincompoop Flummox Snollygoster Poppycock
63 | Kerfuffle Balderdash Gobbledygook Fiddle-faddle Antidisestablishmentarianism
64 | Supercalifragilisticexpialidocious Rambunctious9 Lickety-split Hullabaloo Skullduggery Ballyhoo
65 | Flabbergasted Discombobulate Pernicious Bumfuzzle Bamboozle Pandemonium Tomfoolery Hobbledehoy7
66 | Claptrap Cockamamie Hocus-pocus8 Higgledy-piggledy Dodecahedron Nonsensical Contraption Quizzical
67 | Snuffleupagus Ostentatious Serendipity Ephemeral Melancholy Sonorous Plethora Brouhaha Absquatulate
68 | Gobbledygook3 Lilliputian Chortle Euphonious Mellifluous Obfuscate Perspicacious Prevaricate
69 | Sesquipedalian Tintinnabulation Quibble9 Umbrageous Quotidian Flapdoodle5 NoodleDoodle
70 | Zigzagumptious Throttlebottom WuzzleWump Canoodle Hodgepodge Blatherskite7 Hornswoggle
71 | BibbidiBobbidiBoo Prestidigitation Confabulate Abscond8 Lickspittle Ragamuffin Taradiddle
72 | Widdershins4 Boondoggle Snuffleupagus9 Gallivant Folderol Malarkey Skedaddle Hobgoblin
73 | BlubberCrumble ZibberZap Snickerdoodle Mooncalf LicketySplit8 Whatchamacallit Thingamajig
74 | Thingamabob GibbleGabble FuddleDuddle LoopyLoo Splendiferous Bumbershoot Catawampus Flibbertigibbet5
75 | Gobbledygook7 Whippersnapper9 Ragamuffin8 Splendiferous
76 | ætheling witan ealdorman leofwyrd swain bēorhall beorn mēarh scōp cyning hēahgerefa
77 | sceadugenga wilweorc hildoræswa þegn ælfscyne wyrmslaga wælwulf fyrd hrēowmōd dēor
78 | ealdorleornung scyldwiga þēodcwealm hāligbōc gūþweard wealdend gāstcynn wīfmann
79 | wīsestōw þrēatung rīcere scealc eorþwerod bealucræft cynerīce sceorp ættwer
80 | gāsthof ealdrīce wæpnedmann wæterfōr landgemære gafolgelda wīcstede mægenþrymm
81 | æscwiga læcedōm wīdferhþ eorlgestrēon brimrād wæterstede hūslēoþ searocraeft
82 | þegnunga wælscenc þrīstguma fyrdrinc wundorcræft cræftleornung eorþbūend
83 | sǣlācend þunorrad wætergifu wæterscipe wæterþenung eorþtilþ eorþgebyrde
84 | eorþhæbbend eorþgræf eorþbærn eorþhūs eorþscearu eorþsweg eorþtæfl eorþweorc
85 | eorþweall eorþwaru eorþwela eorþwīs eorþworn eorþyþ eorþweg eorþwīse eorþwyrhta
86 | eorþwīn eorþsceaða eorþsweart eorþscræf eorþscrūd eorþswyft eorþscīr eorþscūa
87 | eorþsēoc eorþsele eorþhūsl eorþsted eorþswyn eorþsittend eorþsniþ eorþscearp
88 | eorþscyld eorþsceaft eorþstapol eorþstede eorþsmitta eorþscēawere
89 | velociraptorious chimeraesque bellerophontic serendipitastic transmogrification ultracrepidarian
90 | prestidigitationary supraluminescence hemidemisemiquaver unquestionability intercontinentalism
91 | antediluvianistic disproportionately absquatulationism automagicalization
92 | floccinaucinihilipilification quintessentiality incomprehensibility juxtapositionally
93 | perpendicularitude transubstantiation synchronicityverse astronomicalunit thermodynamicness
94 | electromagnetismal procrastinatorily disenfranchisement neutrinooscillation hyperventilatingly
95 | pneumonoultramicroscopicsilicovolcanoconiosis supercalifragilisticexpialidocious thaumaturgeonomics
96 | idiosyncratically unencumberedness phantasmagoricity extraterrestrialism philanthropistastic
97 | xenotransplantation incontrovertibility spontaneityvolution teleportationally labyrinthinean
98 | megalomaniaction cryptozoologician ineffablemystique multiplicativity sisypheanquandary
99 | overenthusiastically irrefutablenotion exceptionalitysphere
100 | blibby ploof twindle zibbet jinty wiblo glimsy snaft trindle quopp vistly chark plizet snibber frint
101 | trazzle buvvy skipple flizz dworp grindle yipple zarfle clippet swazz mibber brackle tindle grozz
102 | vindle plazz freggle twazz snuzzle gwippet whindle juzzle krazz yazzle flippet skindle zapple prazz
103 | buzzle chazz gripple snozzle trizz wazzle blikket zib glup snof yipr tazz vlim frub dwex klop
104 | aa ab ad ae ag ah ai al am an as at aw ax ay ba be bi bo by de do ed ef eh el em en er es et ex fa
105 | fe go ha he hi hm ho id if in is it jo ka ki la li lo ma me mi mm mo mu my na ne no nu od oe of oh
106 | oi om on op or os ow ox oy pa pe pi qi re sh si so ta ti to uh um un up us ut we wo xi xu ya ye yo
107 | za zo
108 | hĕlłø cąfѐ ŝmîłe þřęê ċỏẽxist ǩāŕáōķê ŧrävèl кυгiοsity ŭпịςørn мëĺōđỳ ğħōšţ ŵăνę ẓẽṕhýr ғụzzlę
109 | пåŕŧy 僃êct ԁяêåм љúвïĺëë ѓåḿъḽë ţęmƿęşţ říše čajovna želva štěstí ýpsilon ďábel ňadraží ťava
110 | h3ll0 w0rld c0d1ng 3x3mpl3 pr0gr4mm1ng d3v3l0p3r 5cr4bbl3 3l3ph4nt 4pp 5y5t3m 1nput 0utput 3rr0r
111 | 5t4ck0v3rfl0w 5tr1ng 5l1c3 5h4k35p34r3 5t4nd4rd 3ncrypt10n 5h3ll 5cr1pt 5t4ck 5qu4r3 r3ct4ngl3
112 | tr14ngl3 c1rc13 5ph3r3 5qu4r3r00t 3xpr35510n 5t4t15t1c5 5t4t3m3nt 5ynt4x 5ugg35t10n 5y5t3m4t1c
113 | 5h0rtcut 5h4d0w 5h4r3d
114 | 1 2 3 4 5 6 7 8 9 0
115 | a b c d e f g h i j k l m n o p q r s t u v w x y z
116 | A B C D E F G H I J K L M N O P Q R S T U V W X Y Z`
117 |
--------------------------------------------------------------------------------
/utils/utils.go:
--------------------------------------------------------------------------------
1 | package utils
2 |
3 | import dagpb "github.com/ipld/go-codec-dagpb"
4 |
5 | // Lookup finds a name key in a list of dag pb links
6 | func Lookup(links dagpb.PBLinks, key string) dagpb.Link {
7 | li := links.Iterator()
8 | for !li.Done() {
9 | _, next := li.Next()
10 | name := ""
11 | if next.FieldName().Exists() {
12 | name = next.FieldName().Must().String()
13 | }
14 | if key == name {
15 | return next.FieldHash()
16 | }
17 | }
18 | return nil
19 | }
20 |
--------------------------------------------------------------------------------
/version.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": "v1.10.1"
3 | }
4 |
--------------------------------------------------------------------------------