├── .github └── workflows │ ├── generated-pr.yml │ ├── go-check.yml │ ├── go-test.yml │ ├── release-check.yml │ ├── releaser.yml │ ├── stale.yml │ └── tagpush.yml ├── LICENSE.md ├── README.md ├── data ├── builder │ ├── builder.go │ ├── dir_test.go │ ├── directory.go │ ├── dirshard.go │ ├── file.go │ ├── file_test.go │ ├── quick │ │ ├── quick.go │ │ └── quick_test.go │ └── util.go ├── datatypes.go ├── doc.go ├── errors.go ├── fixtures │ ├── directory.unixfs │ ├── directory │ │ └── file.txt │ ├── file.txt │ ├── file.txt.unixfs │ ├── raw.unixfs │ ├── symlink.txt │ └── symlink.txt.unixfs ├── format_test.go ├── gen │ └── main.go ├── ipldsch_minima.go ├── ipldsch_satisfaction.go ├── ipldsch_types.go ├── marshal.go ├── permissions.go ├── unmarshal.go └── wirenumbers.go ├── directory └── basicdir.go ├── file ├── deferred.go ├── file.go ├── file_test.go ├── fixtures │ ├── QmT78zSuBmuS4z925WZfrqQ1qHaJ56DQaTfyMUF7F8ff5o.car │ └── QmT8EC9sJq63SkDZ1mWLbWWyVV66PuqyHWpKkH4pcVyY4H.car ├── large_file_test.go ├── shard.go └── wrapped.go ├── go.mod ├── go.sum ├── hamt ├── errors.go ├── fixtures │ └── wikipedia-cryptographic-hash-function.car ├── shardeddir.go ├── shardeddir_test.go ├── util.go └── util_test.go ├── iter ├── iter.go └── iterlink.go ├── pathpbnode.go ├── reification.go ├── signaling.go ├── signalling_test.go ├── test ├── doc.go └── partial_file_access_test.go ├── testutil ├── directory.go ├── doc.go ├── generator.go └── namegen │ └── namegen.go ├── utils └── utils.go └── version.json /.github/workflows/generated-pr.yml: -------------------------------------------------------------------------------- 1 | name: Close Generated PRs 2 | 3 | on: 4 | schedule: 5 | - cron: '0 0 * * *' 6 | workflow_dispatch: 7 | 8 | permissions: 9 | issues: write 10 | pull-requests: write 11 | 12 | jobs: 13 | stale: 14 | uses: ipdxco/unified-github-workflows/.github/workflows/reusable-generated-pr.yml@v1 15 | -------------------------------------------------------------------------------- /.github/workflows/go-check.yml: -------------------------------------------------------------------------------- 1 | name: Go Checks 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: ["main"] 7 | workflow_dispatch: 8 | 9 | permissions: 10 | contents: read 11 | 12 | concurrency: 13 | group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.event_name == 'push' && github.sha || github.ref }} 14 | cancel-in-progress: true 15 | 16 | jobs: 17 | go-check: 18 | uses: ipdxco/unified-github-workflows/.github/workflows/go-check.yml@v1.0 19 | -------------------------------------------------------------------------------- /.github/workflows/go-test.yml: -------------------------------------------------------------------------------- 1 | name: Go Test 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: ["main"] 7 | workflow_dispatch: 8 | 9 | permissions: 10 | contents: read 11 | 12 | concurrency: 13 | group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.event_name == 'push' && github.sha || github.ref }} 14 | cancel-in-progress: true 15 | 16 | jobs: 17 | go-test: 18 | uses: ipdxco/unified-github-workflows/.github/workflows/go-test.yml@v1.0 19 | secrets: 20 | CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} 21 | -------------------------------------------------------------------------------- /.github/workflows/release-check.yml: -------------------------------------------------------------------------------- 1 | name: Release Checker 2 | 3 | on: 4 | pull_request_target: 5 | paths: [ 'version.json' ] 6 | types: [ opened, synchronize, reopened, labeled, unlabeled ] 7 | workflow_dispatch: 8 | 9 | permissions: 10 | contents: write 11 | pull-requests: write 12 | 13 | concurrency: 14 | group: ${{ github.workflow }}-${{ github.ref }} 15 | cancel-in-progress: true 16 | 17 | jobs: 18 | release-check: 19 | uses: ipdxco/unified-github-workflows/.github/workflows/release-check.yml@v1.0 20 | -------------------------------------------------------------------------------- /.github/workflows/releaser.yml: -------------------------------------------------------------------------------- 1 | name: Releaser 2 | 3 | on: 4 | push: 5 | paths: [ 'version.json' ] 6 | workflow_dispatch: 7 | 8 | permissions: 9 | contents: write 10 | 11 | concurrency: 12 | group: ${{ github.workflow }}-${{ github.sha }} 13 | cancel-in-progress: true 14 | 15 | jobs: 16 | releaser: 17 | uses: ipdxco/unified-github-workflows/.github/workflows/releaser.yml@v1.0 18 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | name: Close Stale Issues 2 | 3 | on: 4 | schedule: 5 | - cron: '0 0 * * *' 6 | workflow_dispatch: 7 | 8 | permissions: 9 | issues: write 10 | pull-requests: write 11 | 12 | jobs: 13 | stale: 14 | uses: ipdxco/unified-github-workflows/.github/workflows/reusable-stale-issue.yml@v1 15 | -------------------------------------------------------------------------------- /.github/workflows/tagpush.yml: -------------------------------------------------------------------------------- 1 | name: Tag Push Checker 2 | 3 | on: 4 | push: 5 | tags: 6 | - v* 7 | 8 | permissions: 9 | contents: read 10 | issues: write 11 | 12 | concurrency: 13 | group: ${{ github.workflow }}-${{ github.ref }} 14 | cancel-in-progress: true 15 | 16 | jobs: 17 | releaser: 18 | uses: ipdxco/unified-github-workflows/.github/workflows/tagpush.yml@v1.0 19 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The contents of this repository are Copyright (c) corresponding authors and 2 | contributors, licensed under the `Permissive License Stack` meaning either of: 3 | 4 | - Apache-2.0 Software License: https://www.apache.org/licenses/LICENSE-2.0 5 | ([...4tr2kfsq](https://dweb.link/ipfs/bafkreiankqxazcae4onkp436wag2lj3ccso4nawxqkkfckd6cg4tr2kfsq)) 6 | 7 | - MIT Software License: https://opensource.org/licenses/MIT 8 | ([...vljevcba](https://dweb.link/ipfs/bafkreiepofszg4gfe2gzuhojmksgemsub2h4uy2gewdnr35kswvljevcba)) 9 | 10 | You may not use the contents of this repository except in compliance 11 | with one of the listed Licenses. For an extended clarification of the 12 | intent behind the choice of Licensing please refer to 13 | https://protocol.ai/blog/announcing-the-permissive-license-stack/ 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the terms listed in this notice is distributed on 17 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 18 | either express or implied. See each License for the specific language 19 | governing permissions and limitations under that License. 20 | 21 | 22 | `SPDX-License-Identifier: Apache-2.0 OR MIT` 23 | 24 | Verbatim copies of both licenses are included below: 25 | 26 |
Apache-2.0 Software License 27 | 28 | ``` 29 | Apache License 30 | Version 2.0, January 2004 31 | http://www.apache.org/licenses/ 32 | 33 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 34 | 35 | 1. Definitions. 36 | 37 | "License" shall mean the terms and conditions for use, reproduction, 38 | and distribution as defined by Sections 1 through 9 of this document. 39 | 40 | "Licensor" shall mean the copyright owner or entity authorized by 41 | the copyright owner that is granting the License. 42 | 43 | "Legal Entity" shall mean the union of the acting entity and all 44 | other entities that control, are controlled by, or are under common 45 | control with that entity. For the purposes of this definition, 46 | "control" means (i) the power, direct or indirect, to cause the 47 | direction or management of such entity, whether by contract or 48 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 49 | outstanding shares, or (iii) beneficial ownership of such entity. 50 | 51 | "You" (or "Your") shall mean an individual or Legal Entity 52 | exercising permissions granted by this License. 53 | 54 | "Source" form shall mean the preferred form for making modifications, 55 | including but not limited to software source code, documentation 56 | source, and configuration files. 57 | 58 | "Object" form shall mean any form resulting from mechanical 59 | transformation or translation of a Source form, including but 60 | not limited to compiled object code, generated documentation, 61 | and conversions to other media types. 62 | 63 | "Work" shall mean the work of authorship, whether in Source or 64 | Object form, made available under the License, as indicated by a 65 | copyright notice that is included in or attached to the work 66 | (an example is provided in the Appendix below). 67 | 68 | "Derivative Works" shall mean any work, whether in Source or Object 69 | form, that is based on (or derived from) the Work and for which the 70 | editorial revisions, annotations, elaborations, or other modifications 71 | represent, as a whole, an original work of authorship. For the purposes 72 | of this License, Derivative Works shall not include works that remain 73 | separable from, or merely link (or bind by name) to the interfaces of, 74 | the Work and Derivative Works thereof. 75 | 76 | "Contribution" shall mean any work of authorship, including 77 | the original version of the Work and any modifications or additions 78 | to that Work or Derivative Works thereof, that is intentionally 79 | submitted to Licensor for inclusion in the Work by the copyright owner 80 | or by an individual or Legal Entity authorized to submit on behalf of 81 | the copyright owner. For the purposes of this definition, "submitted" 82 | means any form of electronic, verbal, or written communication sent 83 | to the Licensor or its representatives, including but not limited to 84 | communication on electronic mailing lists, source code control systems, 85 | and issue tracking systems that are managed by, or on behalf of, the 86 | Licensor for the purpose of discussing and improving the Work, but 87 | excluding communication that is conspicuously marked or otherwise 88 | designated in writing by the copyright owner as "Not a Contribution." 89 | 90 | "Contributor" shall mean Licensor and any individual or Legal Entity 91 | on behalf of whom a Contribution has been received by Licensor and 92 | subsequently incorporated within the Work. 93 | 94 | 2. Grant of Copyright License. Subject to the terms and conditions of 95 | this License, each Contributor hereby grants to You a perpetual, 96 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 97 | copyright license to reproduce, prepare Derivative Works of, 98 | publicly display, publicly perform, sublicense, and distribute the 99 | Work and such Derivative Works in Source or Object form. 100 | 101 | 3. Grant of Patent License. Subject to the terms and conditions of 102 | this License, each Contributor hereby grants to You a perpetual, 103 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 104 | (except as stated in this section) patent license to make, have made, 105 | use, offer to sell, sell, import, and otherwise transfer the Work, 106 | where such license applies only to those patent claims licensable 107 | by such Contributor that are necessarily infringed by their 108 | Contribution(s) alone or by combination of their Contribution(s) 109 | with the Work to which such Contribution(s) was submitted. If You 110 | institute patent litigation against any entity (including a 111 | cross-claim or counterclaim in a lawsuit) alleging that the Work 112 | or a Contribution incorporated within the Work constitutes direct 113 | or contributory patent infringement, then any patent licenses 114 | granted to You under this License for that Work shall terminate 115 | as of the date such litigation is filed. 116 | 117 | 4. Redistribution. You may reproduce and distribute copies of the 118 | Work or Derivative Works thereof in any medium, with or without 119 | modifications, and in Source or Object form, provided that You 120 | meet the following conditions: 121 | 122 | (a) You must give any other recipients of the Work or 123 | Derivative Works a copy of this License; and 124 | 125 | (b) You must cause any modified files to carry prominent notices 126 | stating that You changed the files; and 127 | 128 | (c) You must retain, in the Source form of any Derivative Works 129 | that You distribute, all copyright, patent, trademark, and 130 | attribution notices from the Source form of the Work, 131 | excluding those notices that do not pertain to any part of 132 | the Derivative Works; and 133 | 134 | (d) If the Work includes a "NOTICE" text file as part of its 135 | distribution, then any Derivative Works that You distribute must 136 | include a readable copy of the attribution notices contained 137 | within such NOTICE file, excluding those notices that do not 138 | pertain to any part of the Derivative Works, in at least one 139 | of the following places: within a NOTICE text file distributed 140 | as part of the Derivative Works; within the Source form or 141 | documentation, if provided along with the Derivative Works; or, 142 | within a display generated by the Derivative Works, if and 143 | wherever such third-party notices normally appear. The contents 144 | of the NOTICE file are for informational purposes only and 145 | do not modify the License. You may add Your own attribution 146 | notices within Derivative Works that You distribute, alongside 147 | or as an addendum to the NOTICE text from the Work, provided 148 | that such additional attribution notices cannot be construed 149 | as modifying the License. 150 | 151 | You may add Your own copyright statement to Your modifications and 152 | may provide additional or different license terms and conditions 153 | for use, reproduction, or distribution of Your modifications, or 154 | for any such Derivative Works as a whole, provided Your use, 155 | reproduction, and distribution of the Work otherwise complies with 156 | the conditions stated in this License. 157 | 158 | 5. Submission of Contributions. Unless You explicitly state otherwise, 159 | any Contribution intentionally submitted for inclusion in the Work 160 | by You to the Licensor shall be under the terms and conditions of 161 | this License, without any additional terms or conditions. 162 | Notwithstanding the above, nothing herein shall supersede or modify 163 | the terms of any separate license agreement you may have executed 164 | with Licensor regarding such Contributions. 165 | 166 | 6. Trademarks. This License does not grant permission to use the trade 167 | names, trademarks, service marks, or product names of the Licensor, 168 | except as required for reasonable and customary use in describing the 169 | origin of the Work and reproducing the content of the NOTICE file. 170 | 171 | 7. Disclaimer of Warranty. Unless required by applicable law or 172 | agreed to in writing, Licensor provides the Work (and each 173 | Contributor provides its Contributions) on an "AS IS" BASIS, 174 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 175 | implied, including, without limitation, any warranties or conditions 176 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 177 | PARTICULAR PURPOSE. You are solely responsible for determining the 178 | appropriateness of using or redistributing the Work and assume any 179 | risks associated with Your exercise of permissions under this License. 180 | 181 | 8. Limitation of Liability. In no event and under no legal theory, 182 | whether in tort (including negligence), contract, or otherwise, 183 | unless required by applicable law (such as deliberate and grossly 184 | negligent acts) or agreed to in writing, shall any Contributor be 185 | liable to You for damages, including any direct, indirect, special, 186 | incidental, or consequential damages of any character arising as a 187 | result of this License or out of the use or inability to use the 188 | Work (including but not limited to damages for loss of goodwill, 189 | work stoppage, computer failure or malfunction, or any and all 190 | other commercial damages or losses), even if such Contributor 191 | has been advised of the possibility of such damages. 192 | 193 | 9. Accepting Warranty or Additional Liability. While redistributing 194 | the Work or Derivative Works thereof, You may choose to offer, 195 | and charge a fee for, acceptance of support, warranty, indemnity, 196 | or other liability obligations and/or rights consistent with this 197 | License. However, in accepting such obligations, You may act only 198 | on Your own behalf and on Your sole responsibility, not on behalf 199 | of any other Contributor, and only if You agree to indemnify, 200 | defend, and hold each Contributor harmless for any liability 201 | incurred by, or claims asserted against, such Contributor by reason 202 | of your accepting any such warranty or additional liability. 203 | 204 | END OF TERMS AND CONDITIONS 205 | ``` 206 |
207 | 208 |
MIT Software License 209 | 210 | ``` 211 | Permission is hereby granted, free of charge, to any person obtaining a copy 212 | of this software and associated documentation files (the "Software"), to deal 213 | in the Software without restriction, including without limitation the rights 214 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 215 | copies of the Software, and to permit persons to whom the Software is 216 | furnished to do so, subject to the following conditions: 217 | 218 | The above copyright notice and this permission notice shall be included in 219 | all copies or substantial portions of the Software. 220 | 221 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 222 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 223 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 224 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 225 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 226 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 227 | THE SOFTWARE. 228 | ``` 229 |
230 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # go-unixfsnode 2 | 3 | This is an IPLD ADL that provides string based pathing for protobuf nodes. The top level node behaves like a map where LookupByString returns the Hash property on the Link in the protobufs list of Links whos Name property matches the key. This should enable selector traversals that work based of paths. 4 | 5 | Note that while it works internally with go-codec-dagpb, the Reify method (used to get a UnixFSNode from a DagPB node should actually work successfully with go-ipld-prime-proto nodes) 6 | 7 | ## Usage 8 | 9 | The primary interaction with this package is to register an ADL on a link system. This is done with via a helper method. 10 | 11 | ```go 12 | AddUnixFSReificationToLinkSystem(lsys *ipld.LinkSystem) 13 | ``` 14 | 15 | For link systems which have UnixFS reification registered, two ADLs will be available to the [`InterpretAs`](https://ipld.io/specs/selectors/) selector: 'unixfs' and 'unixfs-preload'. The different between these two ADLs is that the preload variant will access all blocks within a UnixFS Object (file or directory) when that object is accessed by a selector traversal. The non-preload variant in contrast will only access the subset of blocks strictly needed for the traversal. In practice, this means the subset of a sharded directory needed to access a specific file, or the sub-range of a file directly accessed by a range selector. 16 | 17 | 18 | ## License 19 | 20 | Apache-2.0/MIT © Protocol Labs 21 | -------------------------------------------------------------------------------- /data/builder/builder.go: -------------------------------------------------------------------------------- 1 | package builder 2 | 3 | import ( 4 | "errors" 5 | "strconv" 6 | "time" 7 | 8 | "github.com/ipfs/go-unixfsnode/data" 9 | "github.com/ipld/go-ipld-prime" 10 | "github.com/ipld/go-ipld-prime/fluent/qp" 11 | ) 12 | 13 | // BuildUnixFS provides a clean, validated interface to building data structures 14 | // that match the UnixFS protobuf encoded in the Data member of a ProtoNode 15 | // with sensible defaults 16 | // 17 | // smallFileData, err := BuildUnixFS(func(b *Builder) { 18 | // Data(b, []byte{"hello world"}) 19 | // Mtime(b, func(tb TimeBuilder) { 20 | // Time(tb, time.Now()) 21 | // }) 22 | // }) 23 | func BuildUnixFS(fn func(*Builder)) (data.UnixFSData, error) { 24 | nd, err := qp.BuildMap(data.Type.UnixFSData, -1, func(ma ipld.MapAssembler) { 25 | b := &Builder{MapAssembler: ma} 26 | fn(b) 27 | if !b.hasBlockSizes { 28 | qp.MapEntry(ma, data.Field__BlockSizes, qp.List(0, func(ipld.ListAssembler) {})) 29 | } 30 | if !b.hasDataType { 31 | qp.MapEntry(ma, data.Field__DataType, qp.Int(data.Data_File)) 32 | } 33 | }) 34 | if err != nil { 35 | return nil, err 36 | } 37 | return nd.(data.UnixFSData), nil 38 | } 39 | 40 | // Builder is an interface for making UnixFS data nodes 41 | type Builder struct { 42 | ipld.MapAssembler 43 | hasDataType bool 44 | hasBlockSizes bool 45 | } 46 | 47 | // DataType sets the default on a builder for a UnixFS node - default is File 48 | func DataType(b *Builder, dataType int64) { 49 | _, ok := data.DataTypeNames[dataType] 50 | if !ok { 51 | panic(data.ErrInvalidDataType{DataType: dataType}) 52 | } 53 | qp.MapEntry(b.MapAssembler, data.Field__DataType, qp.Int(dataType)) 54 | b.hasDataType = true 55 | } 56 | 57 | // Data sets the data member inside the UnixFS data 58 | func Data(b *Builder, dataBytes []byte) { 59 | qp.MapEntry(b.MapAssembler, data.Field__Data, qp.Bytes(dataBytes)) 60 | } 61 | 62 | // FileSize sets the file size which should be the size of actual bytes underneath 63 | // this node for large files, w/o additional bytes to encode intermediate nodes 64 | func FileSize(b *Builder, fileSize uint64) { 65 | qp.MapEntry(b.MapAssembler, data.Field__FileSize, qp.Int(int64(fileSize))) 66 | } 67 | 68 | // BlockSizes encodes block sizes for each child node 69 | func BlockSizes(b *Builder, blockSizes []uint64) { 70 | qp.MapEntry(b.MapAssembler, data.Field__BlockSizes, qp.List(int64(len(blockSizes)), func(la ipld.ListAssembler) { 71 | for _, bs := range blockSizes { 72 | qp.ListEntry(la, qp.Int(int64(bs))) 73 | } 74 | })) 75 | b.hasBlockSizes = true 76 | } 77 | 78 | // HashType sets the hash function for this node -- only applicable to HAMT 79 | func HashType(b *Builder, hashType uint64) { 80 | qp.MapEntry(b.MapAssembler, data.Field__HashType, qp.Int(int64(hashType))) 81 | } 82 | 83 | // Fanout sets the fanout in a HAMT tree 84 | func Fanout(b *Builder, fanout uint64) { 85 | qp.MapEntry(b.MapAssembler, data.Field__Fanout, qp.Int(int64(fanout))) 86 | } 87 | 88 | // Permissions sets file permissions for the Mode member of the UnixFS node 89 | func Permissions(b *Builder, mode int) { 90 | mode = mode & 0xFFF 91 | qp.MapEntry(b.MapAssembler, data.Field__Mode, qp.Int(int64(mode))) 92 | } 93 | 94 | func parseModeString(modeString string) (uint64, error) { 95 | if len(modeString) > 0 && modeString[0] == '0' { 96 | return strconv.ParseUint(modeString, 8, 32) 97 | } 98 | return strconv.ParseUint(modeString, 10, 32) 99 | } 100 | 101 | // PermissionsString sets file permissions for the Mode member of the UnixFS node, 102 | // parsed from a typical octect encoded permission string (eg '0755') 103 | func PermissionsString(b *Builder, modeString string) { 104 | mode64, err := parseModeString(modeString) 105 | if err != nil { 106 | panic(err) 107 | } 108 | mode64 = mode64 & 0xFFF 109 | qp.MapEntry(b.MapAssembler, data.Field__Mode, qp.Int(int64(mode64))) 110 | } 111 | 112 | // Mtime sets the modification time for this node using the time builder interface 113 | // and associated methods 114 | func Mtime(b *Builder, fn func(tb TimeBuilder)) { 115 | qp.MapEntry(b.MapAssembler, data.Field__Mtime, qp.Map(-1, func(ma ipld.MapAssembler) { 116 | fn(ma) 117 | })) 118 | } 119 | 120 | // TimeBuilder is a simple interface for constructing the time member of UnixFS data 121 | type TimeBuilder ipld.MapAssembler 122 | 123 | // Time sets the modification time from a golang time value 124 | func Time(ma TimeBuilder, t time.Time) { 125 | Seconds(ma, t.Unix()) 126 | FractionalNanoseconds(ma, int32(t.Nanosecond())) 127 | } 128 | 129 | // Seconds sets the seconds for a modification time 130 | func Seconds(ma TimeBuilder, seconds int64) { 131 | qp.MapEntry(ma, data.Field__Seconds, qp.Int(seconds)) 132 | 133 | } 134 | 135 | // FractionalNanoseconds sets the nanoseconds for a modification time (must 136 | // be between 0 & a billion) 137 | func FractionalNanoseconds(ma TimeBuilder, nanoseconds int32) { 138 | if nanoseconds < 0 || nanoseconds > 999999999 { 139 | panic(errors.New("mtime-nsecs must be within the range [0,999999999]")) 140 | } 141 | qp.MapEntry(ma, data.Field__Nanoseconds, qp.Int(int64(nanoseconds))) 142 | } 143 | -------------------------------------------------------------------------------- /data/builder/dir_test.go: -------------------------------------------------------------------------------- 1 | package builder 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "io" 7 | "os" 8 | "path/filepath" 9 | "strconv" 10 | "testing" 11 | 12 | "github.com/ipfs/go-cid" 13 | "github.com/ipfs/go-test/random" 14 | "github.com/ipfs/go-unixfsnode" 15 | dagpb "github.com/ipld/go-codec-dagpb" 16 | "github.com/ipld/go-ipld-prime" 17 | cidlink "github.com/ipld/go-ipld-prime/linking/cid" 18 | "github.com/multiformats/go-multihash" 19 | "github.com/stretchr/testify/require" 20 | ) 21 | 22 | func mkEntries(cnt int, ls *ipld.LinkSystem) ([]dagpb.PBLink, error) { 23 | entries := make([]dagpb.PBLink, 0, cnt) 24 | for i := 0; i < cnt; i++ { 25 | r := bytes.NewBufferString(fmt.Sprintf("%d", i)) 26 | e, err := mkEntry(r, fmt.Sprintf("file %d", i), ls) 27 | if err != nil { 28 | return nil, err 29 | } 30 | entries = append(entries, e) 31 | } 32 | return entries, nil 33 | } 34 | 35 | func mkEntry(r io.Reader, name string, ls *ipld.LinkSystem) (dagpb.PBLink, error) { 36 | f, s, err := BuildUnixFSFile(r, "", ls) 37 | if err != nil { 38 | return nil, err 39 | } 40 | return BuildUnixFSDirectoryEntry(name, int64(s), f) 41 | } 42 | 43 | func TestBuildUnixFSFileWrappedInDirectory_Reference(t *testing.T) { 44 | for _, tc := range referenceTestCases { 45 | t.Run(strconv.Itoa(tc.size), func(t *testing.T) { 46 | buf := make([]byte, tc.size) 47 | random.NewSeededRand(0xdeadbeef).Read(buf) 48 | r := bytes.NewReader(buf) 49 | 50 | ls := cidlink.DefaultLinkSystem() 51 | storage := cidlink.Memory{} 52 | ls.StorageReadOpener = storage.OpenRead 53 | ls.StorageWriteOpener = storage.OpenWrite 54 | 55 | e, err := mkEntry(r, fmt.Sprintf("%d", tc.size), &ls) 56 | require.NoError(t, err) 57 | d, sz, err := BuildUnixFSDirectory([]dagpb.PBLink{e}, &ls) 58 | require.NoError(t, err) 59 | require.Equal(t, tc.wrappedExpected.String(), d.(cidlink.Link).Cid.String()) 60 | 61 | // check sz is the stored size of all blocks in the generated DAG 62 | var totStored int 63 | for _, blk := range storage.Bag { 64 | totStored += len(blk) 65 | } 66 | require.Equal(t, totStored, int(sz)) 67 | }) 68 | } 69 | } 70 | 71 | // Cross-impl reference test: directory of files with single character 72 | // names, starting from ' ' and ending with '~', but excluding the special 73 | // characters '/' and '.'. Each file should contain a single byte with the 74 | // same value as the character in its name. Files are added to a sharded 75 | // directory with a fanout of 16, using CIDv1 throughout, and should result 76 | // in the root CID of: 77 | // 78 | // bafybeihnipspiyy3dctpcx7lv655qpiuy52d7b2fzs52dtrjqwmvbiux44 79 | func TestBuildUnixFSDirectoryShardAltFanout_Reference(t *testing.T) { 80 | ls := cidlink.DefaultLinkSystem() 81 | storage := cidlink.Memory{} 82 | ls.StorageReadOpener = storage.OpenRead 83 | ls.StorageWriteOpener = storage.OpenWrite 84 | entries := make([]dagpb.PBLink, 0) 85 | for ch := ' '; ch <= '~'; ch++ { 86 | if ch == '/' || ch == '.' { 87 | continue 88 | } 89 | s := string(ch) 90 | r := bytes.NewBuffer([]byte(s)) 91 | e, err := mkEntry(r, s, &ls) 92 | require.NoError(t, err) 93 | entries = append(entries, e) 94 | } 95 | lnk, sz, err := BuildUnixFSShardedDirectory(16, multihash.MURMUR3X64_64, entries, &ls) 96 | require.NoError(t, err) 97 | var totStored int 98 | for _, blk := range storage.Bag { 99 | totStored += len(blk) 100 | } 101 | require.Equal(t, totStored, int(sz)) 102 | require.Equal(t, "bafybeihnipspiyy3dctpcx7lv655qpiuy52d7b2fzs52dtrjqwmvbiux44", lnk.String()) 103 | } 104 | 105 | func TestBuildUnixFSDirectory(t *testing.T) { 106 | ls := cidlink.DefaultLinkSystem() 107 | storage := cidlink.Memory{} 108 | ls.StorageReadOpener = storage.OpenRead 109 | ls.StorageWriteOpener = storage.OpenWrite 110 | 111 | testSizes := []int{100, 1000, 50000} 112 | for _, cnt := range testSizes { 113 | entries, err := mkEntries(cnt, &ls) 114 | if err != nil { 115 | t.Fatal(err) 116 | } 117 | 118 | dl, _, err := BuildUnixFSDirectory(entries, &ls) 119 | if err != nil { 120 | t.Fatal(err) 121 | } 122 | 123 | pbn, err := ls.Load(ipld.LinkContext{}, dl, dagpb.Type.PBNode) 124 | if err != nil { 125 | t.Fatal(err) 126 | } 127 | ufd, err := unixfsnode.Reify(ipld.LinkContext{}, pbn, &ls) 128 | if err != nil { 129 | t.Fatal(err) 130 | } 131 | observedCnt := 0 132 | 133 | li := ufd.MapIterator() 134 | for !li.Done() { 135 | _, _, err := li.Next() 136 | if err != nil { 137 | t.Fatal(err) 138 | } 139 | observedCnt++ 140 | } 141 | if observedCnt != cnt { 142 | fmt.Printf("%+v\n", ufd) 143 | t.Fatalf("unexpected number of dir entries %d vs %d", observedCnt, cnt) 144 | } 145 | } 146 | } 147 | 148 | func TestBuildUnixFSRecursive(t *testing.T) { 149 | // only the top CID is of interest, but this tree is correct and can be used for future validation 150 | fixture := fentry{ 151 | "rootDir", 152 | "", 153 | mustCidDecode("bafybeihswl3f7pa7fueyayewcvr3clkdz7oetv4jolyejgw26p6l3qzlbm"), 154 | []fentry{ 155 | {"a", "aaa", mustCidDecode("bafkreieygsdw3t5qlsywpjocjfj6xjmmjlejwgw7k7zi6l45bgxra7xi6a"), nil}, 156 | { 157 | "b", 158 | "", 159 | mustCidDecode("bafybeibohj54uixf2mso4t53suyarv6cfuxt6b5cj6qjsqaa2ezfxnu5pu"), 160 | []fentry{ 161 | {"1", "111", mustCidDecode("bafkreihw4cq6flcbsrnjvj77rkfkudhlyevdxteydkjjvvopqefasdqrvy"), nil}, 162 | {"2", "222", mustCidDecode("bafkreie3q4kremt4bhhjdxletm7znjr3oqeo6jt4rtcxcaiu4yuxgdfwd4"), nil}, 163 | }, 164 | }, 165 | {"c", "ccc", mustCidDecode("bafkreide3ksevvet74uks3x7vnxhp4ltfi6zpwbsifmbwn6324fhusia7y"), nil}, 166 | }, 167 | } 168 | 169 | ls := cidlink.DefaultLinkSystem() 170 | storage := cidlink.Memory{} 171 | ls.StorageReadOpener = storage.OpenRead 172 | ls.StorageWriteOpener = storage.OpenWrite 173 | 174 | dir := t.TempDir() 175 | makeFixture(t, dir, fixture) 176 | 177 | lnk, sz, err := BuildUnixFSRecursive(filepath.Join(dir, fixture.name), &ls) 178 | require.NoError(t, err) 179 | require.Equal(t, fixture.expectedLnk.String(), lnk.String()) 180 | require.Equal(t, uint64(245), sz) 181 | } 182 | 183 | func TestBuildUnixFSRecursiveLargeSharded(t *testing.T) { 184 | // only the top CID is of interest, but this tree is correct and can be used for future validation 185 | fixture := fentry{ 186 | "rootDir", 187 | "", 188 | mustCidDecode("bafybeigyvxs6og5jbmpaa43qbhhd5swklqcfzqdrtjgfh53qjon6hpjaye"), 189 | make([]fentry, 0), 190 | } 191 | 192 | for i := 0; i < 1344; i++ { 193 | name := fmt.Sprintf("long name to fill out bytes to make the sharded directory test flip over the sharded directory limit because link names are included in the directory entry %d", i) 194 | fixture.children = append(fixture.children, fentry{name, name, cid.Undef, nil}) 195 | } 196 | 197 | ls := cidlink.DefaultLinkSystem() 198 | storage := cidlink.Memory{} 199 | ls.StorageReadOpener = storage.OpenRead 200 | ls.StorageWriteOpener = storage.OpenWrite 201 | 202 | dir := t.TempDir() 203 | makeFixture(t, dir, fixture) 204 | 205 | lnk, sz, err := BuildUnixFSRecursive(filepath.Join(dir, fixture.name), &ls) 206 | require.NoError(t, err) 207 | require.Equal(t, fixture.expectedLnk.String(), lnk.String()) 208 | require.Equal(t, uint64(515735), sz) 209 | } 210 | 211 | // Same as TestBuildUnixFSRecursiveLargeSharded but it's one file less which flips 212 | // it back to the un-sharded format. So we're testing the boundary condition and 213 | // the proper construction of large DAGs. 214 | func TestBuildUnixFSRecursiveLargeUnsharded(t *testing.T) { 215 | // only the top CID is of interest, but this tree is correct and can be used for future validation 216 | fixture := fentry{ 217 | "rootDir", 218 | "", 219 | mustCidDecode("bafybeihecq4rpl4nw3cgfb2uiwltgsmw5sutouvuldv5fxn4gfbihvnalq"), 220 | make([]fentry, 0), 221 | } 222 | 223 | for i := 0; i < 1343; i++ { 224 | name := fmt.Sprintf("long name to fill out bytes to make the sharded directory test flip over the sharded directory limit because link names are included in the directory entry %d", i) 225 | fixture.children = append(fixture.children, fentry{name, name, cid.Undef, nil}) 226 | } 227 | 228 | ls := cidlink.DefaultLinkSystem() 229 | storage := cidlink.Memory{} 230 | ls.StorageReadOpener = storage.OpenRead 231 | ls.StorageWriteOpener = storage.OpenWrite 232 | 233 | dir := t.TempDir() 234 | makeFixture(t, dir, fixture) 235 | 236 | lnk, sz, err := BuildUnixFSRecursive(filepath.Join(dir, fixture.name), &ls) 237 | require.NoError(t, err) 238 | require.Equal(t, fixture.expectedLnk.String(), lnk.String()) 239 | require.Equal(t, uint64(490665), sz) 240 | } 241 | 242 | type fentry struct { 243 | name string 244 | content string 245 | expectedLnk cid.Cid 246 | children []fentry 247 | } 248 | 249 | func makeFixture(t *testing.T, dir string, fixture fentry) { 250 | path := filepath.Join(dir, fixture.name) 251 | if fixture.children != nil { 252 | require.NoError(t, os.Mkdir(path, 0755)) 253 | for _, c := range fixture.children { 254 | makeFixture(t, path, c) 255 | } 256 | } else { 257 | os.WriteFile(path, []byte(fixture.content), 0644) 258 | } 259 | } 260 | 261 | func mustCidDecode(s string) cid.Cid { 262 | c, err := cid.Decode(s) 263 | if err != nil { 264 | panic(err) 265 | } 266 | return c 267 | } 268 | -------------------------------------------------------------------------------- /data/builder/directory.go: -------------------------------------------------------------------------------- 1 | package builder 2 | 3 | import ( 4 | "fmt" 5 | "io/fs" 6 | "os" 7 | "path" 8 | 9 | "github.com/ipfs/go-unixfsnode/data" 10 | dagpb "github.com/ipld/go-codec-dagpb" 11 | "github.com/ipld/go-ipld-prime" 12 | cidlink "github.com/ipld/go-ipld-prime/linking/cid" 13 | "github.com/multiformats/go-multihash" 14 | ) 15 | 16 | // https://github.com/ipfs/go-ipfs/pull/8114/files#diff-eec963b47a6e1080d9d8023b4e438e6e3591b4154f7379a7e728401d2055374aR319 17 | const shardSplitThreshold = 262144 18 | 19 | // https://github.com/ipfs/go-unixfs/blob/ec6bb5a4c5efdc3a5bce99151b294f663ee9c08d/io/directory.go#L29 20 | const defaultShardWidth = 256 21 | 22 | // BuildUnixFSRecursive returns a link pointing to the UnixFS node representing 23 | // the file or directory tree pointed to by `root` 24 | func BuildUnixFSRecursive(root string, ls *ipld.LinkSystem) (ipld.Link, uint64, error) { 25 | info, err := os.Lstat(root) 26 | if err != nil { 27 | return nil, 0, err 28 | } 29 | 30 | m := info.Mode() 31 | switch { 32 | case m.IsDir(): 33 | var tsize uint64 34 | entries, err := os.ReadDir(root) 35 | if err != nil { 36 | return nil, 0, err 37 | } 38 | lnks := make([]dagpb.PBLink, 0, len(entries)) 39 | for _, e := range entries { 40 | lnk, sz, err := BuildUnixFSRecursive(path.Join(root, e.Name()), ls) 41 | if err != nil { 42 | return nil, 0, err 43 | } 44 | tsize += sz 45 | entry, err := BuildUnixFSDirectoryEntry(e.Name(), int64(sz), lnk) 46 | if err != nil { 47 | return nil, 0, err 48 | } 49 | lnks = append(lnks, entry) 50 | } 51 | return BuildUnixFSDirectory(lnks, ls) 52 | case m.Type() == fs.ModeSymlink: 53 | content, err := os.Readlink(root) 54 | if err != nil { 55 | return nil, 0, err 56 | } 57 | outLnk, sz, err := BuildUnixFSSymlink(content, ls) 58 | if err != nil { 59 | return nil, 0, err 60 | } 61 | return outLnk, sz, nil 62 | case m.IsRegular(): 63 | fp, err := os.Open(root) 64 | if err != nil { 65 | return nil, 0, err 66 | } 67 | defer fp.Close() 68 | outLnk, sz, err := BuildUnixFSFile(fp, "", ls) 69 | if err != nil { 70 | return nil, 0, err 71 | } 72 | return outLnk, sz, nil 73 | default: 74 | return nil, 0, fmt.Errorf("cannot encode non regular file: %s", root) 75 | } 76 | } 77 | 78 | // estimateDirSize estimates if a directory is big enough that it warrents sharding. 79 | // The estimate is the sum over the len(linkName) + bytelen(linkHash) 80 | // https://github.com/ipfs/go-unixfs/blob/master/io/directory.go#L152-L162 81 | func estimateDirSize(entries []dagpb.PBLink) int { 82 | s := 0 83 | for _, e := range entries { 84 | s += len(e.Name.Must().String()) 85 | lnk := e.Hash.Link() 86 | cl, ok := lnk.(cidlink.Link) 87 | if ok { 88 | s += cl.ByteLen() 89 | } else if lnk == nil { 90 | s += 0 91 | } else { 92 | s += len(lnk.Binary()) 93 | } 94 | } 95 | return s 96 | } 97 | 98 | // BuildUnixFSDirectory creates a directory link over a collection of entries. 99 | func BuildUnixFSDirectory(entries []dagpb.PBLink, ls *ipld.LinkSystem) (ipld.Link, uint64, error) { 100 | if estimateDirSize(entries) > shardSplitThreshold { 101 | return BuildUnixFSShardedDirectory(defaultShardWidth, multihash.MURMUR3X64_64, entries, ls) 102 | } 103 | ufd, err := BuildUnixFS(func(b *Builder) { 104 | DataType(b, data.Data_Directory) 105 | }) 106 | if err != nil { 107 | return nil, 0, err 108 | } 109 | pbb := dagpb.Type.PBNode.NewBuilder() 110 | pbm, err := pbb.BeginMap(2) 111 | if err != nil { 112 | return nil, 0, err 113 | } 114 | if err = pbm.AssembleKey().AssignString("Data"); err != nil { 115 | return nil, 0, err 116 | } 117 | if err = pbm.AssembleValue().AssignBytes(data.EncodeUnixFSData(ufd)); err != nil { 118 | return nil, 0, err 119 | } 120 | if err = pbm.AssembleKey().AssignString("Links"); err != nil { 121 | return nil, 0, err 122 | } 123 | lnks, err := pbm.AssembleValue().BeginList(int64(len(entries))) 124 | if err != nil { 125 | return nil, 0, err 126 | } 127 | // sorting happens in codec-dagpb 128 | var totalSize uint64 129 | for _, e := range entries { 130 | totalSize += uint64(e.Tsize.Must().Int()) 131 | if err := lnks.AssembleValue().AssignNode(e); err != nil { 132 | return nil, 0, err 133 | } 134 | } 135 | if err := lnks.Finish(); err != nil { 136 | return nil, 0, err 137 | } 138 | if err := pbm.Finish(); err != nil { 139 | return nil, 0, err 140 | } 141 | node := pbb.Build() 142 | lnk, sz, err := sizedStore(ls, fileLinkProto, node) 143 | if err != nil { 144 | return nil, 0, err 145 | } 146 | return lnk, totalSize + sz, err 147 | } 148 | -------------------------------------------------------------------------------- /data/builder/dirshard.go: -------------------------------------------------------------------------------- 1 | package builder 2 | 3 | import ( 4 | "fmt" 5 | "hash" 6 | 7 | bitfield "github.com/ipfs/go-bitfield" 8 | "github.com/ipfs/go-unixfsnode/data" 9 | "github.com/ipfs/go-unixfsnode/hamt" 10 | dagpb "github.com/ipld/go-codec-dagpb" 11 | "github.com/ipld/go-ipld-prime" 12 | "github.com/multiformats/go-multihash" 13 | "github.com/spaolacci/murmur3" 14 | ) 15 | 16 | type shard struct { 17 | // metadata about the shard 18 | hasher uint64 19 | size int 20 | sizeLg2 int 21 | width int 22 | depth int 23 | 24 | children map[int]entry 25 | } 26 | 27 | // a shard entry is either another shard, or a direct link. 28 | type entry struct { 29 | *shard 30 | *hamtLink 31 | } 32 | 33 | // a hamtLink is a member of the hamt - the file/directory pointed to, but 34 | // stored with it's hashed key used for addressing. 35 | type hamtLink struct { 36 | hash hashBits 37 | dagpb.PBLink 38 | } 39 | 40 | // BuildUnixFSShardedDirectory will build a hamt of unixfs hamt shards encoing a directory with more entries 41 | // than is typically allowed to fit in a standard IPFS single-block unixFS directory. 42 | func BuildUnixFSShardedDirectory(size int, hasher uint64, entries []dagpb.PBLink, ls *ipld.LinkSystem) (ipld.Link, uint64, error) { 43 | // hash the entries 44 | var h hash.Hash 45 | var err error 46 | // TODO: use the multihash registry once murmur3 behavior is encoded there. 47 | // https://github.com/multiformats/go-multihash/pull/150 48 | if hasher == hamt.HashMurmur3 { 49 | h = murmur3.New64() 50 | } else { 51 | h, err = multihash.GetHasher(hasher) 52 | if err != nil { 53 | return nil, 0, err 54 | } 55 | } 56 | hamtEntries := make([]hamtLink, 0, len(entries)) 57 | for _, e := range entries { 58 | name := e.Name.Must().String() 59 | h.Reset() 60 | h.Write([]byte(name)) 61 | sum := h.Sum(nil) 62 | hamtEntries = append(hamtEntries, hamtLink{ 63 | sum, 64 | e, 65 | }) 66 | } 67 | 68 | sizeLg2, err := logtwo(size) 69 | if err != nil { 70 | return nil, 0, err 71 | } 72 | 73 | sharder := shard{ 74 | hasher: hasher, 75 | size: size, 76 | sizeLg2: sizeLg2, 77 | width: len(fmt.Sprintf("%X", size-1)), 78 | depth: 0, 79 | 80 | children: make(map[int]entry), 81 | } 82 | 83 | for _, entry := range hamtEntries { 84 | err := sharder.add(entry) 85 | if err != nil { 86 | return nil, 0, err 87 | } 88 | } 89 | 90 | return sharder.serialize(ls) 91 | } 92 | 93 | func (s *shard) add(lnk hamtLink) error { 94 | // get the bucket for lnk 95 | bucket, err := lnk.hash.Slice(s.depth*s.sizeLg2, s.sizeLg2) 96 | if err != nil { 97 | return err 98 | } 99 | 100 | current, ok := s.children[bucket] 101 | if !ok { 102 | // no bucket, make one with this entry 103 | s.children[bucket] = entry{nil, &lnk} 104 | return nil 105 | } else if current.shard != nil { 106 | // existing shard, add this link to the shard 107 | return current.shard.add(lnk) 108 | } 109 | // make a shard for current and lnk 110 | newShard := entry{ 111 | &shard{ 112 | hasher: s.hasher, 113 | size: s.size, 114 | sizeLg2: s.sizeLg2, 115 | width: s.width, 116 | depth: s.depth + 1, 117 | children: make(map[int]entry), 118 | }, 119 | nil, 120 | } 121 | // add existing link from this bucket to the new shard 122 | if err := newShard.add(*current.hamtLink); err != nil { 123 | return err 124 | } 125 | // replace bucket with shard 126 | s.children[bucket] = newShard 127 | // add new link to the new shard 128 | return newShard.add(lnk) 129 | } 130 | 131 | func (s *shard) formatLinkName(name string, idx int) string { 132 | return fmt.Sprintf("%0*X%s", s.width, idx, name) 133 | } 134 | 135 | // bitmap calculates the bitmap of which links in the shard are set. 136 | func (s *shard) bitmap() ([]byte, error) { 137 | bm, err := bitfield.NewBitfield(s.size) 138 | if err != nil { 139 | return nil, err 140 | } 141 | for i := 0; i < s.size; i++ { 142 | if _, ok := s.children[i]; ok { 143 | bm.SetBit(i) 144 | } 145 | } 146 | return bm.Bytes(), nil 147 | } 148 | 149 | // serialize stores the concrete representation of this shard in the link system and 150 | // returns a link to it. 151 | func (s *shard) serialize(ls *ipld.LinkSystem) (ipld.Link, uint64, error) { 152 | bm, err := s.bitmap() 153 | if err != nil { 154 | return nil, 0, err 155 | } 156 | ufd, err := BuildUnixFS(func(b *Builder) { 157 | DataType(b, data.Data_HAMTShard) 158 | HashType(b, s.hasher) 159 | Data(b, bm) 160 | Fanout(b, uint64(s.size)) 161 | }) 162 | if err != nil { 163 | return nil, 0, err 164 | } 165 | pbb := dagpb.Type.PBNode.NewBuilder() 166 | pbm, err := pbb.BeginMap(2) 167 | if err != nil { 168 | return nil, 0, err 169 | } 170 | if err = pbm.AssembleKey().AssignString("Data"); err != nil { 171 | return nil, 0, err 172 | } 173 | if err = pbm.AssembleValue().AssignBytes(data.EncodeUnixFSData(ufd)); err != nil { 174 | return nil, 0, err 175 | } 176 | if err = pbm.AssembleKey().AssignString("Links"); err != nil { 177 | return nil, 0, err 178 | } 179 | 180 | lnkBuilder := dagpb.Type.PBLinks.NewBuilder() 181 | lnks, err := lnkBuilder.BeginList(int64(len(s.children))) 182 | if err != nil { 183 | return nil, 0, err 184 | } 185 | // sorting happens in codec-dagpb 186 | var totalSize uint64 187 | for idx, e := range s.children { 188 | var lnk dagpb.PBLink 189 | if e.shard != nil { 190 | ipldLnk, sz, err := e.shard.serialize(ls) 191 | if err != nil { 192 | return nil, 0, err 193 | } 194 | totalSize += sz 195 | fullName := s.formatLinkName("", idx) 196 | lnk, err = BuildUnixFSDirectoryEntry(fullName, int64(sz), ipldLnk) 197 | if err != nil { 198 | return nil, 0, err 199 | } 200 | } else { 201 | fullName := s.formatLinkName(e.Name.Must().String(), idx) 202 | sz := e.Tsize.Must().Int() 203 | totalSize += uint64(sz) 204 | lnk, err = BuildUnixFSDirectoryEntry(fullName, sz, e.Hash.Link()) 205 | } 206 | if err != nil { 207 | return nil, 0, err 208 | } 209 | if err := lnks.AssembleValue().AssignNode(lnk); err != nil { 210 | return nil, 0, err 211 | } 212 | } 213 | if err := lnks.Finish(); err != nil { 214 | return nil, 0, err 215 | } 216 | pbm.AssembleValue().AssignNode(lnkBuilder.Build()) 217 | if err := pbm.Finish(); err != nil { 218 | return nil, 0, err 219 | } 220 | node := pbb.Build() 221 | lnk, sz, err := sizedStore(ls, fileLinkProto, node) 222 | if err != nil { 223 | return nil, 0, err 224 | } 225 | return lnk, totalSize + sz, nil 226 | } 227 | -------------------------------------------------------------------------------- /data/builder/file.go: -------------------------------------------------------------------------------- 1 | package builder 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | 7 | chunk "github.com/ipfs/boxo/chunker" 8 | "github.com/ipfs/go-cid" 9 | "github.com/ipfs/go-unixfsnode/data" 10 | dagpb "github.com/ipld/go-codec-dagpb" 11 | "github.com/ipld/go-ipld-prime" 12 | "github.com/ipld/go-ipld-prime/datamodel" 13 | cidlink "github.com/ipld/go-ipld-prime/linking/cid" 14 | basicnode "github.com/ipld/go-ipld-prime/node/basic" 15 | "github.com/multiformats/go-multicodec" 16 | multihash "github.com/multiformats/go-multihash/core" 17 | 18 | // raw needed for opening as bytes 19 | _ "github.com/ipld/go-ipld-prime/codec/raw" 20 | ) 21 | 22 | type fileShardMeta struct { 23 | link datamodel.Link 24 | byteSize uint64 25 | storedSize uint64 26 | } 27 | 28 | type fileShards []fileShardMeta 29 | 30 | func (fs fileShards) totalByteSize() uint64 { 31 | var total uint64 32 | for _, f := range fs { 33 | total += f.byteSize 34 | } 35 | return total 36 | } 37 | 38 | func (fs fileShards) totalStoredSize() uint64 { 39 | var total uint64 40 | for _, f := range fs { 41 | total += f.storedSize 42 | } 43 | return total 44 | } 45 | 46 | func (fs fileShards) byteSizes() []uint64 { 47 | sizes := make([]uint64, len(fs)) 48 | for i, f := range fs { 49 | sizes[i] = f.byteSize 50 | } 51 | return sizes 52 | } 53 | 54 | // BuildUnixFSFile creates a dag of ipld Nodes representing file data. 55 | // This recreates the functionality previously found in 56 | // github.com/ipfs/go-unixfs/importer/balanced, but tailored to the 57 | // go-unixfsnode & ipld-prime data layout of nodes. 58 | // We make some assumptions in building files with this builder to reduce 59 | // complexity, namely: 60 | // - we assume we are using CIDv1, which has implied that the leaf 61 | // data nodes are stored as raw bytes. 62 | // ref: https://github.com/ipfs/go-mfs/blob/1b1fd06cff048caabeddb02d4dbf22d2274c7971/file.go#L50 63 | func BuildUnixFSFile(r io.Reader, chunker string, ls *ipld.LinkSystem) (ipld.Link, uint64, error) { 64 | src, err := chunk.FromString(r, chunker) 65 | if err != nil { 66 | return nil, 0, err 67 | } 68 | 69 | var prev fileShards 70 | depth := 1 71 | for { 72 | next, err := fileTreeRecursive(depth, prev, src, ls) 73 | if err != nil { 74 | return nil, 0, err 75 | } 76 | 77 | if prev != nil && prev[0].link == next.link { 78 | if next.link == nil { 79 | node := basicnode.NewBytes([]byte{}) 80 | link, err := ls.Store(ipld.LinkContext{}, leafLinkProto, node) 81 | return link, 0, err 82 | } 83 | return next.link, next.storedSize, nil 84 | } 85 | 86 | prev = []fileShardMeta{next} 87 | depth++ 88 | } 89 | } 90 | 91 | var fileLinkProto = cidlink.LinkPrototype{ 92 | Prefix: cid.Prefix{ 93 | Version: 1, 94 | Codec: uint64(multicodec.DagPb), 95 | MhType: multihash.SHA2_256, 96 | MhLength: 32, 97 | }, 98 | } 99 | 100 | var leafLinkProto = cidlink.LinkPrototype{ 101 | Prefix: cid.Prefix{ 102 | Version: 1, 103 | Codec: uint64(multicodec.Raw), 104 | MhType: multihash.SHA2_256, 105 | MhLength: 32, 106 | }, 107 | } 108 | 109 | // fileTreeRecursive packs a file into chunks recursively, returning a root for 110 | // this level of recursion, the number of file bytes consumed for this level of 111 | // recursion and and the number of bytes used to store this level of recursion. 112 | func fileTreeRecursive( 113 | depth int, 114 | children fileShards, 115 | src chunk.Splitter, 116 | ls *ipld.LinkSystem, 117 | ) (fileShardMeta, error) { 118 | if depth == 1 { 119 | // file leaf, next chunk, encode as raw bytes, store and retuen 120 | if len(children) > 0 { 121 | return fileShardMeta{}, fmt.Errorf("leaf nodes cannot have children") 122 | } 123 | leaf, err := src.NextBytes() 124 | if err != nil { 125 | if err == io.EOF { 126 | return fileShardMeta{}, nil 127 | } 128 | return fileShardMeta{}, err 129 | } 130 | node := basicnode.NewBytes(leaf) 131 | l, sz, err := sizedStore(ls, leafLinkProto, node) 132 | if err != nil { 133 | return fileShardMeta{}, err 134 | } 135 | return fileShardMeta{link: l, byteSize: uint64(len(leaf)), storedSize: sz}, nil 136 | } 137 | 138 | // depth > 1 139 | 140 | if children == nil { 141 | children = make(fileShards, 0) 142 | } 143 | 144 | // fill up the links for this level, if we need to go beyond 145 | // DefaultLinksPerBlock we'll end up back here making a parallel tree 146 | for len(children) < DefaultLinksPerBlock { 147 | // descend down toward the leaves 148 | next, err := fileTreeRecursive(depth-1, nil, src, ls) 149 | if err != nil { 150 | return fileShardMeta{}, err 151 | } else if next.link == nil { // eof 152 | break 153 | } 154 | children = append(children, next) 155 | } 156 | 157 | if len(children) == 0 { 158 | // empty case 159 | return fileShardMeta{}, nil 160 | } else if len(children) == 1 { 161 | // degenerate case 162 | return children[0], nil 163 | } 164 | 165 | // make the unixfs node 166 | node, err := BuildUnixFS(func(b *Builder) { 167 | FileSize(b, children.totalByteSize()) 168 | BlockSizes(b, children.byteSizes()) 169 | }) 170 | if err != nil { 171 | return fileShardMeta{}, err 172 | } 173 | pbn, err := packFileChildren(node, children) 174 | if err != nil { 175 | return fileShardMeta{}, err 176 | } 177 | 178 | link, sz, err := sizedStore(ls, fileLinkProto, pbn) 179 | if err != nil { 180 | return fileShardMeta{}, err 181 | } 182 | return fileShardMeta{ 183 | link: link, 184 | byteSize: children.totalByteSize(), 185 | storedSize: children.totalStoredSize() + sz, 186 | }, nil 187 | } 188 | 189 | func packFileChildren(node data.UnixFSData, children fileShards) (datamodel.Node, error) { 190 | dpbb := dagpb.Type.PBNode.NewBuilder() 191 | pbm, err := dpbb.BeginMap(2) 192 | if err != nil { 193 | return nil, err 194 | } 195 | pblb, err := pbm.AssembleEntry("Links") 196 | if err != nil { 197 | return nil, err 198 | } 199 | pbl, err := pblb.BeginList(int64(len(children))) 200 | if err != nil { 201 | return nil, err 202 | } 203 | for _, c := range children { 204 | pbln, err := BuildUnixFSDirectoryEntry("", int64(c.storedSize), c.link) 205 | if err != nil { 206 | return nil, err 207 | } 208 | if err = pbl.AssembleValue().AssignNode(pbln); err != nil { 209 | return nil, err 210 | } 211 | } 212 | if err = pbl.Finish(); err != nil { 213 | return nil, err 214 | } 215 | if err = pbm.AssembleKey().AssignString("Data"); err != nil { 216 | return nil, err 217 | } 218 | if err = pbm.AssembleValue().AssignBytes(data.EncodeUnixFSData(node)); err != nil { 219 | return nil, err 220 | } 221 | if err = pbm.Finish(); err != nil { 222 | return nil, err 223 | } 224 | return dpbb.Build(), nil 225 | } 226 | 227 | // BuildUnixFSDirectoryEntry creates the link to a file or directory as it appears within a unixfs directory. 228 | func BuildUnixFSDirectoryEntry(name string, size int64, hash ipld.Link) (dagpb.PBLink, error) { 229 | dpbl := dagpb.Type.PBLink.NewBuilder() 230 | lma, err := dpbl.BeginMap(3) 231 | if err != nil { 232 | return nil, err 233 | } 234 | if err = lma.AssembleKey().AssignString("Hash"); err != nil { 235 | return nil, err 236 | } 237 | if err = lma.AssembleValue().AssignLink(hash); err != nil { 238 | return nil, err 239 | } 240 | if err = lma.AssembleKey().AssignString("Name"); err != nil { 241 | return nil, err 242 | } 243 | if err = lma.AssembleValue().AssignString(name); err != nil { 244 | return nil, err 245 | } 246 | if err = lma.AssembleKey().AssignString("Tsize"); err != nil { 247 | return nil, err 248 | } 249 | if err = lma.AssembleValue().AssignInt(size); err != nil { 250 | return nil, err 251 | } 252 | if err = lma.Finish(); err != nil { 253 | return nil, err 254 | } 255 | return dpbl.Build().(dagpb.PBLink), nil 256 | } 257 | 258 | // BuildUnixFSSymlink builds a symlink entry in a unixfs tree 259 | func BuildUnixFSSymlink(content string, ls *ipld.LinkSystem) (ipld.Link, uint64, error) { 260 | // make the unixfs node. 261 | node, err := BuildUnixFS(func(b *Builder) { 262 | DataType(b, data.Data_Symlink) 263 | Data(b, []byte(content)) 264 | }) 265 | if err != nil { 266 | return nil, 0, err 267 | } 268 | 269 | dpbb := dagpb.Type.PBNode.NewBuilder() 270 | pbm, err := dpbb.BeginMap(2) 271 | if err != nil { 272 | return nil, 0, err 273 | } 274 | pblb, err := pbm.AssembleEntry("Links") 275 | if err != nil { 276 | return nil, 0, err 277 | } 278 | pbl, err := pblb.BeginList(0) 279 | if err != nil { 280 | return nil, 0, err 281 | } 282 | if err = pbl.Finish(); err != nil { 283 | return nil, 0, err 284 | } 285 | if err = pbm.AssembleKey().AssignString("Data"); err != nil { 286 | return nil, 0, err 287 | } 288 | if err = pbm.AssembleValue().AssignBytes(data.EncodeUnixFSData(node)); err != nil { 289 | return nil, 0, err 290 | } 291 | if err = pbm.Finish(); err != nil { 292 | return nil, 0, err 293 | } 294 | pbn := dpbb.Build() 295 | 296 | return sizedStore(ls, fileLinkProto, pbn) 297 | } 298 | 299 | // Constants below are from 300 | // https://github.com/ipfs/go-unixfs/blob/ec6bb5a4c5efdc3a5bce99151b294f663ee9c08d/importer/helpers/helpers.go 301 | 302 | // BlockSizeLimit specifies the maximum size an imported block can have. 303 | var BlockSizeLimit = 1048576 // 1 MB 304 | 305 | // rough estimates on expected sizes 306 | var roughLinkBlockSize = 1 << 13 // 8KB 307 | var roughLinkSize = 34 + 8 + 5 // sha256 multihash + size + no name + protobuf framing 308 | 309 | // DefaultLinksPerBlock governs how the importer decides how many links there 310 | // will be per block. This calculation is based on expected distributions of: 311 | // - the expected distribution of block sizes 312 | // - the expected distribution of link sizes 313 | // - desired access speed 314 | // 315 | // For now, we use: 316 | // 317 | // var roughLinkBlockSize = 1 << 13 // 8KB 318 | // var roughLinkSize = 34 + 8 + 5 // sha256 multihash + size + no name 319 | // // + protobuf framing 320 | // var DefaultLinksPerBlock = (roughLinkBlockSize / roughLinkSize) 321 | // = ( 8192 / 47 ) 322 | // = (approximately) 174 323 | var DefaultLinksPerBlock = roughLinkBlockSize / roughLinkSize 324 | -------------------------------------------------------------------------------- /data/builder/file_test.go: -------------------------------------------------------------------------------- 1 | package builder 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "strconv" 7 | "testing" 8 | 9 | "github.com/ipfs/go-cid" 10 | "github.com/ipfs/go-test/random" 11 | "github.com/ipfs/go-unixfsnode/file" 12 | dagpb "github.com/ipld/go-codec-dagpb" 13 | "github.com/ipld/go-ipld-prime" 14 | cidlink "github.com/ipld/go-ipld-prime/linking/cid" 15 | "github.com/stretchr/testify/require" 16 | ) 17 | 18 | // referenceTestCases using older IPFS libraries, both bare forms of files sharded across raw leaves 19 | // with CIDv1 and the same but wrapped in a directory with the name of the number of bytes. 20 | var referenceTestCases = []struct { 21 | size int 22 | bareExpected cid.Cid 23 | wrappedExpected cid.Cid 24 | }{ 25 | { 26 | size: 1024, 27 | bareExpected: cid.MustParse("bafkreigwqvgm5f6vgdv7wjkttdhgnkpbazhvuzvrqzaje4scb4moeinjum"), 28 | wrappedExpected: cid.MustParse("bafybeib7rloaw4vl56brrnsetobsopu23e5ezoqxq4zorxxtljoeafcpca"), 29 | }, 30 | { 31 | size: 10 * 1024, 32 | bareExpected: cid.MustParse("bafkreihaxm6boumj2cwzbs3t3mnktfsgcf25ratcvtcf5kqnsymgk2gxqy"), 33 | wrappedExpected: cid.MustParse("bafybeieogamws33kfbtpk5mdhoo2wkxwmd7dwnduyvo7wo65ll75d36xgi"), 34 | }, 35 | { 36 | size: 100 * 1024, 37 | bareExpected: cid.MustParse("bafkreia7ockt35s5ki5qzrm37bp57woott6bju6gw64wl7rus7xwjcoemq"), 38 | wrappedExpected: cid.MustParse("bafybeicywdnaqrwj3t7xltqgtaoi3ebk6fi2oyam6gsqle3bl4piucpzua"), 39 | }, 40 | { 41 | size: 10 * 1024 * 1024, 42 | // https://github.com/ipfs/go-unixfs/blob/a7243ebfc36eaa89d79a39d3cef3fa1e60f7e49e/importer/importer_test.go#L49C1-L49C1 43 | // QmZN1qquw84zhV4j6vT56tCcmFxaDaySL1ezTXFvMdNmrK, but with --cid-version=1 all the way through the DAG 44 | bareExpected: cid.MustParse("bafybeibxlkafr6oqgflgjcjfbl5db6agozxdknpludvh7ym54oa5qoowbm"), 45 | wrappedExpected: cid.MustParse("bafybeigqbp6jog6fvxbpq4opzcgn5rsp7xqrk7xa4zbgnqo6htjmolt3iy"), 46 | }, 47 | } 48 | 49 | func TestBuildUnixFSFile_Reference(t *testing.T) { 50 | for _, tc := range referenceTestCases { 51 | t.Run(strconv.Itoa(tc.size), func(t *testing.T) { 52 | buf := make([]byte, tc.size) 53 | random.NewSeededRand(0xdeadbeef).Read(buf) 54 | r := bytes.NewReader(buf) 55 | 56 | ls := cidlink.DefaultLinkSystem() 57 | storage := cidlink.Memory{} 58 | ls.StorageReadOpener = storage.OpenRead 59 | ls.StorageWriteOpener = storage.OpenWrite 60 | 61 | f, sz, err := BuildUnixFSFile(r, "", &ls) 62 | require.NoError(t, err) 63 | require.Equal(t, tc.bareExpected.String(), f.(cidlink.Link).Cid.String()) 64 | 65 | // check sz is the stored size of all blocks in the generated DAG 66 | var totStored int 67 | for _, blk := range storage.Bag { 68 | totStored += len(blk) 69 | } 70 | require.Equal(t, totStored, int(sz)) 71 | }) 72 | } 73 | } 74 | 75 | func TestUnixFSFileRoundtrip(t *testing.T) { 76 | buf := make([]byte, 10*1024*1024) 77 | random.NewSeededRand(0xdeadbeef).Read(buf) 78 | r := bytes.NewReader(buf) 79 | 80 | ls := cidlink.DefaultLinkSystem() 81 | storage := cidlink.Memory{} 82 | ls.StorageReadOpener = storage.OpenRead 83 | ls.StorageWriteOpener = storage.OpenWrite 84 | 85 | f, _, err := BuildUnixFSFile(r, "", &ls) 86 | if err != nil { 87 | t.Fatal(err) 88 | } 89 | 90 | // get back the root node substrate from the link at the top of the builder. 91 | fr, err := ls.Load(ipld.LinkContext{}, f, dagpb.Type.PBNode) 92 | if err != nil { 93 | t.Fatal(err) 94 | } 95 | 96 | ufn, err := file.NewUnixFSFile(context.Background(), fr, &ls) 97 | if err != nil { 98 | t.Fatal(err) 99 | } 100 | // read back out the file. 101 | out, err := ufn.AsBytes() 102 | if err != nil { 103 | t.Fatal(err) 104 | } 105 | if !bytes.Equal(out, buf) { 106 | t.Fatal("Not equal") 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /data/builder/quick/quick.go: -------------------------------------------------------------------------------- 1 | // Package quickbuilder is designed as a replacement for the existing ipfs-files 2 | // constructor for a simple way to generate synthetic directory trees. 3 | package quickbuilder 4 | 5 | import ( 6 | "bytes" 7 | 8 | "github.com/ipfs/go-unixfsnode/data/builder" 9 | dagpb "github.com/ipld/go-codec-dagpb" 10 | "github.com/ipld/go-ipld-prime" 11 | ) 12 | 13 | // A Node represents the most basic form of a file or directory 14 | type Node interface { 15 | Size() (int64, error) 16 | Link() ipld.Link 17 | } 18 | 19 | type lnkNode struct { 20 | link ipld.Link 21 | size int64 22 | ls *ipld.LinkSystem 23 | } 24 | 25 | func (ln *lnkNode) Size() (int64, error) { 26 | return ln.size, nil 27 | } 28 | 29 | func (ln *lnkNode) Link() ipld.Link { 30 | return ln.link 31 | } 32 | 33 | // Builder provides the linksystem context for saving files & directories 34 | type Builder struct { 35 | ls *ipld.LinkSystem 36 | } 37 | 38 | // NewMapDirectory creates a unixfs directory from a list of named entries 39 | func (b *Builder) NewMapDirectory(entries map[string]Node) Node { 40 | lnks := make([]dagpb.PBLink, 0, len(entries)) 41 | for name, e := range entries { 42 | sz, _ := e.Size() 43 | entry, err := builder.BuildUnixFSDirectoryEntry(name, sz, e.Link()) 44 | if err != nil { 45 | return nil 46 | } 47 | lnks = append(lnks, entry) 48 | } 49 | n, size, err := builder.BuildUnixFSDirectory(lnks, b.ls) 50 | if err != nil { 51 | panic(err) 52 | } 53 | return &lnkNode{ 54 | n, 55 | int64(size), 56 | b.ls, 57 | } 58 | } 59 | 60 | // NewBytesFile creates a unixfs file from byte contents 61 | func (b *Builder) NewBytesFile(data []byte) Node { 62 | n, size, err := builder.BuildUnixFSFile(bytes.NewReader(data), "", b.ls) 63 | if err != nil { 64 | panic(err) 65 | } 66 | return &lnkNode{ 67 | n, 68 | int64(size), 69 | b.ls, 70 | } 71 | } 72 | 73 | // Store provides a builder context for making unixfs files and directories 74 | func Store(ls *ipld.LinkSystem, cb func(b *Builder) error) error { 75 | b := Builder{ls} 76 | return cb(&b) 77 | } 78 | -------------------------------------------------------------------------------- /data/builder/quick/quick_test.go: -------------------------------------------------------------------------------- 1 | package quickbuilder_test 2 | 3 | import ( 4 | "testing" 5 | 6 | quickbuilder "github.com/ipfs/go-unixfsnode/data/builder/quick" 7 | cidlink "github.com/ipld/go-ipld-prime/linking/cid" 8 | "github.com/ipld/go-ipld-prime/storage/memstore" 9 | ) 10 | 11 | func TestQuickBuilder(t *testing.T) { 12 | ls := cidlink.DefaultLinkSystem() 13 | store := memstore.Store{Bag: make(map[string][]byte)} 14 | ls.SetReadStorage(&store) 15 | ls.SetWriteStorage(&store) 16 | err := quickbuilder.Store(&ls, func(b *quickbuilder.Builder) error { 17 | b.NewMapDirectory(map[string]quickbuilder.Node{ 18 | "file.txt": b.NewBytesFile([]byte("1")), 19 | "foo? #<'": b.NewMapDirectory(map[string]quickbuilder.Node{ 20 | "file.txt": b.NewBytesFile([]byte("2")), 21 | "bar": b.NewMapDirectory(map[string]quickbuilder.Node{ 22 | "file.txt": b.NewBytesFile([]byte("3")), 23 | }), 24 | }), 25 | }) 26 | return nil 27 | }) 28 | if err != nil { 29 | t.Fatal(err) 30 | } 31 | 32 | if len(store.Bag) != 6 { 33 | t.Fatal("unexpected number of stored nodes") 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /data/builder/util.go: -------------------------------------------------------------------------------- 1 | package builder 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "math/bits" 7 | 8 | "github.com/ipld/go-ipld-prime" 9 | "github.com/ipld/go-ipld-prime/codec" 10 | "github.com/ipld/go-ipld-prime/datamodel" 11 | ) 12 | 13 | // Common code from go-unixfs/hamt/util.go 14 | 15 | // hashBits is a helper for pulling out sections of a hash 16 | type hashBits []byte 17 | 18 | func mkmask(n int) byte { 19 | return (1 << uint(n)) - 1 20 | } 21 | 22 | // Slice returns the 'width' bits of the hashBits value as an integer, or an 23 | // error if there aren't enough bits. 24 | func (hb hashBits) Slice(offset, width int) (int, error) { 25 | if offset+width > len(hb)*8 { 26 | return 0, fmt.Errorf("sharded directory too deep") 27 | } 28 | return hb.slice(offset, width), nil 29 | } 30 | 31 | func (hb hashBits) slice(offset, width int) int { 32 | curbi := offset / 8 33 | leftb := 8 - (offset % 8) 34 | 35 | curb := hb[curbi] 36 | if width == leftb { 37 | out := int(mkmask(width) & curb) 38 | return out 39 | } else if width < leftb { 40 | a := curb & mkmask(leftb) // mask out the high bits we don't want 41 | b := a & ^mkmask(leftb-width) // mask out the low bits we don't want 42 | c := b >> uint(leftb-width) // shift whats left down 43 | return int(c) 44 | } else { 45 | out := int(mkmask(leftb) & curb) 46 | out <<= uint(width - leftb) 47 | out += hb.slice(offset+leftb, width-leftb) 48 | return out 49 | } 50 | } 51 | 52 | func logtwo(v int) (int, error) { 53 | if v <= 0 { 54 | return 0, fmt.Errorf("hamt size should be a power of two") 55 | } 56 | lg2 := bits.TrailingZeros(uint(v)) 57 | if 1< math.MaxUint32 { 142 | return errors.New("mode should be a 32 bit value") 143 | } 144 | remaining = remaining[n:] 145 | qp.MapEntry(ma, Field__Mode, qp.Int(int64(mode))) 146 | case Data_MtimeWireNum: 147 | if wireType != protowire.BytesType { 148 | return ErrWrongWireType{"UnixFSData", Field__Mtime, protowire.BytesType, wireType} 149 | } 150 | mTimeBytes, n := protowire.ConsumeBytes(remaining) 151 | if n < 0 { 152 | return protowire.ParseError(n) 153 | } 154 | remaining = remaining[n:] 155 | qp.MapEntry(ma, Field__Mtime, qp.Map(-1, func(ma ipld.MapAssembler) { 156 | err := consumeUnixTime(mTimeBytes, ma) 157 | if err != nil { 158 | panic(err) 159 | } 160 | })) 161 | default: 162 | n := protowire.ConsumeFieldValue(fieldNum, wireType, remaining) 163 | if n < 0 { 164 | return protowire.ParseError(n) 165 | } 166 | remaining = remaining[n:] 167 | } 168 | } 169 | if !packedBlockSizes { 170 | if la == nil { 171 | qp.MapEntry(ma, Field__BlockSizes, qp.List(0, func(ipld.ListAssembler) {})) 172 | } else { 173 | err := la.Finish() 174 | if err != nil { 175 | return err 176 | } 177 | nd := bsa.Build() 178 | qp.MapEntry(ma, Field__BlockSizes, qp.Node(nd)) 179 | } 180 | } 181 | return nil 182 | } 183 | 184 | func consumeBlockSizes(remaining []byte, count int64, la ipld.ListAssembler) error { 185 | for i := 0; i < int(count); i++ { 186 | blockSize, n := protowire.ConsumeVarint(remaining) 187 | if n < 0 { 188 | return protowire.ParseError(n) 189 | } 190 | remaining = remaining[n:] 191 | qp.ListEntry(la, qp.Int(int64(blockSize))) 192 | } 193 | if len(remaining) > 0 { 194 | return errors.New("did not consume all block sizes") 195 | } 196 | return nil 197 | } 198 | 199 | func consumeUnixTime(remaining []byte, ma ipld.MapAssembler) error { 200 | for len(remaining) != 0 { 201 | fieldNum, wireType, n := protowire.ConsumeTag(remaining) 202 | if n < 0 { 203 | return protowire.ParseError(n) 204 | } 205 | remaining = remaining[n:] 206 | 207 | switch fieldNum { 208 | case UnixTime_SecondsWireNum: 209 | if wireType != protowire.VarintType { 210 | return ErrWrongWireType{"UnixTime", Field__Seconds, protowire.VarintType, wireType} 211 | } 212 | seconds, n := protowire.ConsumeVarint(remaining) 213 | if n < 0 { 214 | return protowire.ParseError(n) 215 | } 216 | remaining = remaining[n:] 217 | qp.MapEntry(ma, Field__Seconds, qp.Int(int64(seconds))) 218 | case UnixTime_FractionalNanosecondsWireNum: 219 | if wireType != protowire.Fixed32Type { 220 | return ErrWrongWireType{"UnixTime", Field__Nanoseconds, protowire.Fixed32Type, wireType} 221 | } 222 | fractionalNanoseconds, n := protowire.ConsumeFixed32(remaining) 223 | if n < 0 { 224 | return protowire.ParseError(n) 225 | } 226 | remaining = remaining[n:] 227 | qp.MapEntry(ma, Field__Nanoseconds, qp.Int(int64(fractionalNanoseconds))) 228 | default: 229 | n := protowire.ConsumeFieldValue(fieldNum, wireType, remaining) 230 | if n < 0 { 231 | return protowire.ParseError(n) 232 | } 233 | remaining = remaining[n:] 234 | } 235 | } 236 | return nil 237 | } 238 | func DecodeUnixTime(src []byte) (UnixTime, error) { 239 | nd, err := qp.BuildMap(Type.UnixTime, -1, func(ma ipld.MapAssembler) { 240 | err := consumeUnixTime(src, ma) 241 | if err != nil { 242 | panic(err) 243 | } 244 | }) 245 | if err != nil { 246 | return nil, err 247 | } 248 | return nd.(UnixTime), err 249 | } 250 | 251 | func DecodeUnixFSMetadata(src []byte) (UnixFSMetadata, error) { 252 | nd, err := qp.BuildMap(Type.UnixFSMetadata, -1, func(ma ipld.MapAssembler) { 253 | err := consumeUnixFSMetadata(src, ma) 254 | if err != nil { 255 | panic(err) 256 | } 257 | }) 258 | if err != nil { 259 | return nil, err 260 | } 261 | return nd.(UnixFSMetadata), nil 262 | } 263 | 264 | func consumeUnixFSMetadata(remaining []byte, ma ipld.MapAssembler) error { 265 | for len(remaining) != 0 { 266 | 267 | fieldNum, wireType, n := protowire.ConsumeTag(remaining) 268 | if n < 0 { 269 | return protowire.ParseError(n) 270 | } 271 | remaining = remaining[n:] 272 | 273 | switch fieldNum { 274 | case Metadata_MimeTypeWireNum: 275 | if wireType != protowire.BytesType { 276 | return ErrWrongWireType{"UnixFSMetadata", Field__MimeType, protowire.VarintType, wireType} 277 | } 278 | mimeTypeBytes, n := protowire.ConsumeBytes(remaining) 279 | if n < 0 { 280 | return protowire.ParseError(n) 281 | } 282 | remaining = remaining[n:] 283 | qp.MapEntry(ma, Field__MimeType, qp.String(string(mimeTypeBytes))) 284 | default: 285 | n := protowire.ConsumeFieldValue(fieldNum, wireType, remaining) 286 | if n < 0 { 287 | return protowire.ParseError(n) 288 | } 289 | remaining = remaining[n:] 290 | } 291 | } 292 | return nil 293 | } 294 | -------------------------------------------------------------------------------- /data/wirenumbers.go: -------------------------------------------------------------------------------- 1 | package data 2 | 3 | import "google.golang.org/protobuf/encoding/protowire" 4 | 5 | const ( 6 | Data_DataTypeWireNum protowire.Number = 1 7 | Data_DataWireNum protowire.Number = 2 8 | Data_FileSizeWireNum protowire.Number = 3 9 | Data_BlockSizesWireNum protowire.Number = 4 10 | Data_HashTypeWireNum protowire.Number = 5 11 | Data_FanoutWireNum protowire.Number = 6 12 | Data_ModeWireNum protowire.Number = 7 13 | Data_MtimeWireNum protowire.Number = 8 14 | UnixTime_SecondsWireNum protowire.Number = 1 15 | UnixTime_FractionalNanosecondsWireNum protowire.Number = 2 16 | Metadata_MimeTypeWireNum protowire.Number = 1 17 | ) 18 | -------------------------------------------------------------------------------- /directory/basicdir.go: -------------------------------------------------------------------------------- 1 | package directory 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/ipfs/go-unixfsnode/data" 7 | "github.com/ipfs/go-unixfsnode/iter" 8 | "github.com/ipfs/go-unixfsnode/utils" 9 | dagpb "github.com/ipld/go-codec-dagpb" 10 | "github.com/ipld/go-ipld-prime" 11 | "github.com/ipld/go-ipld-prime/schema" 12 | ) 13 | 14 | var _ ipld.Node = UnixFSBasicDir(nil) 15 | var _ schema.TypedNode = UnixFSBasicDir(nil) 16 | var _ ipld.ADL = UnixFSBasicDir(nil) 17 | 18 | type UnixFSBasicDir = *_UnixFSBasicDir 19 | 20 | type _UnixFSBasicDir struct { 21 | _substrate dagpb.PBNode 22 | } 23 | 24 | func NewUnixFSBasicDir(ctx context.Context, substrate dagpb.PBNode, nddata data.UnixFSData, _ *ipld.LinkSystem) (ipld.Node, error) { 25 | if nddata.FieldDataType().Int() != data.Data_Directory { 26 | return nil, data.ErrWrongNodeType{Expected: data.Data_Directory, Actual: nddata.FieldDataType().Int()} 27 | } 28 | return &_UnixFSBasicDir{_substrate: substrate}, nil 29 | } 30 | 31 | func (n UnixFSBasicDir) Kind() ipld.Kind { 32 | return n._substrate.Kind() 33 | } 34 | 35 | // LookupByString looks for the key in the list of links with a matching name 36 | func (n UnixFSBasicDir) LookupByString(key string) (ipld.Node, error) { 37 | links := n._substrate.FieldLinks() 38 | link := utils.Lookup(links, key) 39 | if link == nil { 40 | return nil, schema.ErrNoSuchField{Type: nil /*TODO*/, Field: ipld.PathSegmentOfString(key)} 41 | } 42 | return link, nil 43 | } 44 | 45 | func (n UnixFSBasicDir) LookupByNode(key ipld.Node) (ipld.Node, error) { 46 | ks, err := key.AsString() 47 | if err != nil { 48 | return nil, err 49 | } 50 | return n.LookupByString(ks) 51 | } 52 | 53 | func (n UnixFSBasicDir) LookupByIndex(idx int64) (ipld.Node, error) { 54 | return n._substrate.LookupByIndex(idx) 55 | } 56 | 57 | func (n UnixFSBasicDir) LookupBySegment(seg ipld.PathSegment) (ipld.Node, error) { 58 | return n.LookupByString(seg.String()) 59 | } 60 | 61 | func (n UnixFSBasicDir) MapIterator() ipld.MapIterator { 62 | return iter.NewUnixFSDirMapIterator(&_UnixFSBasicDir__ListItr{n._substrate.Links.Iterator()}, nil) 63 | } 64 | 65 | // ListIterator returns an iterator which yields key-value pairs 66 | // traversing the node. 67 | // If the node kind is anything other than a list, nil will be returned. 68 | // 69 | // The iterator will yield every entry in the list; that is, it 70 | // can be expected that itr.Next will be called node.Length times 71 | // before itr.Done becomes true. 72 | func (n UnixFSBasicDir) ListIterator() ipld.ListIterator { 73 | return nil 74 | } 75 | 76 | // Length returns the length of a list, or the number of entries in a map, 77 | // or -1 if the node is not of list nor map kind. 78 | func (n UnixFSBasicDir) Length() int64 { 79 | return n._substrate.FieldLinks().Length() 80 | } 81 | 82 | func (n UnixFSBasicDir) IsAbsent() bool { 83 | return false 84 | } 85 | 86 | func (n UnixFSBasicDir) IsNull() bool { 87 | return false 88 | } 89 | 90 | func (n UnixFSBasicDir) AsBool() (bool, error) { 91 | return n._substrate.AsBool() 92 | } 93 | 94 | func (n UnixFSBasicDir) AsInt() (int64, error) { 95 | return n._substrate.AsInt() 96 | } 97 | 98 | func (n UnixFSBasicDir) AsFloat() (float64, error) { 99 | return n._substrate.AsFloat() 100 | } 101 | 102 | func (n UnixFSBasicDir) AsString() (string, error) { 103 | return n._substrate.AsString() 104 | } 105 | 106 | func (n UnixFSBasicDir) AsBytes() ([]byte, error) { 107 | return n._substrate.AsBytes() 108 | } 109 | 110 | func (n UnixFSBasicDir) AsLink() (ipld.Link, error) { 111 | return n._substrate.AsLink() 112 | } 113 | 114 | func (n UnixFSBasicDir) Prototype() ipld.NodePrototype { 115 | // TODO: should this return something? 116 | // probobly not until we write the write interfaces 117 | return nil 118 | } 119 | 120 | // satisfy schema.TypedNode 121 | func (UnixFSBasicDir) Type() schema.Type { 122 | return nil /*TODO:typelit*/ 123 | } 124 | 125 | func (n UnixFSBasicDir) Representation() ipld.Node { 126 | return n._substrate.Representation() 127 | } 128 | 129 | // Native map accessors 130 | 131 | func (n UnixFSBasicDir) Iterator() *iter.UnixFSDir__Itr { 132 | return iter.NewUnixFSDirIterator(&_UnixFSBasicDir__ListItr{n._substrate.Links.Iterator()}, nil) 133 | } 134 | 135 | func (n UnixFSBasicDir) Lookup(key dagpb.String) dagpb.Link { 136 | return utils.Lookup(n._substrate.FieldLinks(), key.String()) 137 | } 138 | 139 | // direct access to the links and data 140 | 141 | func (n UnixFSBasicDir) FieldLinks() dagpb.PBLinks { 142 | return n._substrate.FieldLinks() 143 | } 144 | 145 | func (n UnixFSBasicDir) FieldData() dagpb.MaybeBytes { 146 | return n._substrate.FieldData() 147 | } 148 | 149 | // Substrate returns the underlying PBNode -- note: only the substrate will encode successfully to protobuf if writing 150 | func (n UnixFSBasicDir) Substrate() ipld.Node { 151 | return n._substrate 152 | } 153 | 154 | type _UnixFSBasicDir__ListItr struct { 155 | _substrate *dagpb.PBLinks__Itr 156 | } 157 | 158 | func (itr *_UnixFSBasicDir__ListItr) Next() (int64, dagpb.PBLink, error) { 159 | idx, v := itr._substrate.Next() 160 | return idx, v, nil 161 | } 162 | 163 | func (itr *_UnixFSBasicDir__ListItr) Done() bool { 164 | return itr._substrate.Done() 165 | } 166 | -------------------------------------------------------------------------------- /file/deferred.go: -------------------------------------------------------------------------------- 1 | package file 2 | 3 | import ( 4 | "context" 5 | "io" 6 | 7 | dagpb "github.com/ipld/go-codec-dagpb" 8 | "github.com/ipld/go-ipld-prime" 9 | ) 10 | 11 | func newDeferredFileNode(ctx context.Context, lsys *ipld.LinkSystem, root ipld.Link) LargeBytesNode { 12 | dfn := deferredFileNode{ 13 | LargeBytesNode: nil, 14 | root: root, 15 | lsys: lsys, 16 | ctx: ctx, 17 | } 18 | dfn.LargeBytesNode = &deferred{&dfn} 19 | return &dfn 20 | } 21 | 22 | type deferredFileNode struct { 23 | LargeBytesNode 24 | 25 | root ipld.Link 26 | lsys *ipld.LinkSystem 27 | ctx context.Context 28 | } 29 | 30 | func (d *deferredFileNode) resolve() error { 31 | if d.lsys == nil { 32 | return nil 33 | } 34 | target, err := d.lsys.Load(ipld.LinkContext{Ctx: d.ctx}, d.root, protoFor(d.root)) 35 | if err != nil { 36 | return err 37 | } 38 | 39 | asFSNode, err := NewUnixFSFile(d.ctx, target, d.lsys) 40 | if err != nil { 41 | return err 42 | } 43 | d.LargeBytesNode = asFSNode 44 | d.root = nil 45 | d.lsys = nil 46 | d.ctx = nil 47 | return nil 48 | } 49 | 50 | type deferred struct { 51 | *deferredFileNode 52 | } 53 | 54 | type deferredReader struct { 55 | io.ReadSeeker 56 | *deferredFileNode 57 | } 58 | 59 | func (d *deferred) AsLargeBytes() (io.ReadSeeker, error) { 60 | return &deferredReader{nil, d.deferredFileNode}, nil 61 | } 62 | 63 | func (d *deferredReader) Read(p []byte) (int, error) { 64 | if d.ReadSeeker == nil { 65 | if err := d.deferredFileNode.resolve(); err != nil { 66 | return 0, err 67 | } 68 | rs, err := d.deferredFileNode.AsLargeBytes() 69 | if err != nil { 70 | return 0, err 71 | } 72 | d.ReadSeeker = rs 73 | } 74 | return d.ReadSeeker.Read(p) 75 | } 76 | 77 | func (d *deferredReader) Seek(offset int64, whence int) (int64, error) { 78 | if d.ReadSeeker == nil { 79 | if err := d.deferredFileNode.resolve(); err != nil { 80 | return 0, err 81 | } 82 | rs, err := d.deferredFileNode.AsLargeBytes() 83 | if err != nil { 84 | return 0, err 85 | } 86 | d.ReadSeeker = rs 87 | } 88 | return d.ReadSeeker.Seek(offset, whence) 89 | } 90 | 91 | func (d *deferred) Kind() ipld.Kind { 92 | return ipld.Kind_Bytes 93 | } 94 | 95 | func (d *deferred) AsBytes() ([]byte, error) { 96 | if err := d.deferredFileNode.resolve(); err != nil { 97 | return []byte{}, err 98 | } 99 | 100 | return d.deferredFileNode.AsBytes() 101 | } 102 | 103 | func (d *deferred) AsBool() (bool, error) { 104 | return false, ipld.ErrWrongKind{TypeName: "bool", MethodName: "AsBool", AppropriateKind: ipld.KindSet_JustBytes} 105 | } 106 | 107 | func (d *deferred) AsInt() (int64, error) { 108 | return 0, ipld.ErrWrongKind{TypeName: "int", MethodName: "AsInt", AppropriateKind: ipld.KindSet_JustBytes} 109 | } 110 | 111 | func (d *deferred) AsFloat() (float64, error) { 112 | return 0, ipld.ErrWrongKind{TypeName: "float", MethodName: "AsFloat", AppropriateKind: ipld.KindSet_JustBytes} 113 | } 114 | 115 | func (d *deferred) AsString() (string, error) { 116 | return "", ipld.ErrWrongKind{TypeName: "string", MethodName: "AsString", AppropriateKind: ipld.KindSet_JustBytes} 117 | } 118 | 119 | func (d *deferred) AsLink() (ipld.Link, error) { 120 | return nil, ipld.ErrWrongKind{TypeName: "link", MethodName: "AsLink", AppropriateKind: ipld.KindSet_JustBytes} 121 | } 122 | 123 | func (d *deferred) AsNode() (ipld.Node, error) { 124 | return nil, nil 125 | } 126 | 127 | func (d *deferred) Size() int { 128 | return 0 129 | } 130 | 131 | func (d *deferred) IsAbsent() bool { 132 | return false 133 | } 134 | 135 | func (d *deferred) IsNull() bool { 136 | if err := d.deferredFileNode.resolve(); err != nil { 137 | return true 138 | } 139 | return d.deferredFileNode.IsNull() 140 | } 141 | 142 | func (d *deferred) Length() int64 { 143 | return 0 144 | } 145 | 146 | func (d *deferred) ListIterator() ipld.ListIterator { 147 | return nil 148 | } 149 | 150 | func (d *deferred) MapIterator() ipld.MapIterator { 151 | return nil 152 | } 153 | 154 | func (d *deferred) LookupByIndex(idx int64) (ipld.Node, error) { 155 | return nil, ipld.ErrWrongKind{} 156 | } 157 | 158 | func (d *deferred) LookupByString(key string) (ipld.Node, error) { 159 | return nil, ipld.ErrWrongKind{} 160 | } 161 | 162 | func (d *deferred) LookupByNode(key ipld.Node) (ipld.Node, error) { 163 | return nil, ipld.ErrWrongKind{} 164 | } 165 | 166 | func (d *deferred) LookupBySegment(seg ipld.PathSegment) (ipld.Node, error) { 167 | return nil, ipld.ErrWrongKind{} 168 | } 169 | 170 | // shardded files / nodes look like dagpb nodes. 171 | func (d *deferred) Prototype() ipld.NodePrototype { 172 | return dagpb.Type.PBNode 173 | } 174 | -------------------------------------------------------------------------------- /file/file.go: -------------------------------------------------------------------------------- 1 | package file 2 | 3 | import ( 4 | "context" 5 | "io" 6 | 7 | "github.com/ipld/go-ipld-prime" 8 | "github.com/ipld/go-ipld-prime/adl" 9 | "github.com/ipld/go-ipld-prime/datamodel" 10 | ) 11 | 12 | // NewUnixFSFile attempts to construct an ipld node from the base protobuf node representing the 13 | // root of a unixfs File. 14 | // It provides a `bytes` view over the file, along with access to io.Reader streaming access 15 | // to file data. 16 | func NewUnixFSFile(ctx context.Context, substrate ipld.Node, lsys *ipld.LinkSystem) (LargeBytesNode, error) { 17 | if substrate.Kind() == ipld.Kind_Bytes { 18 | // A raw / single-node file. 19 | return &singleNodeFile{substrate}, nil 20 | } 21 | // see if it's got children. 22 | links, err := substrate.LookupByString("Links") 23 | if err != nil { 24 | return nil, err 25 | } 26 | if links.Length() == 0 { 27 | // no children. 28 | return newWrappedNode(substrate) 29 | } 30 | 31 | return &shardNodeFile{ 32 | ctx: ctx, 33 | lsys: lsys, 34 | substrate: substrate, 35 | }, nil 36 | } 37 | 38 | // NewUnixFSFileWithPreload is the same as NewUnixFSFile but it performs a full load of constituent 39 | // blocks where the file spans multiple blocks. This is useful where a system needs to watch the 40 | // LinkSystem for block loads to determine which blocks make up this file. 41 | // NewUnixFSFileWithPreload is used by the "unixfs-preload" reifier. 42 | func NewUnixFSFileWithPreload(ctx context.Context, substrate ipld.Node, lsys *ipld.LinkSystem) (LargeBytesNode, error) { 43 | f, err := NewUnixFSFile(ctx, substrate, lsys) 44 | if err != nil { 45 | return nil, err 46 | } 47 | r, err := f.AsLargeBytes() 48 | if err != nil { 49 | return nil, err 50 | } 51 | if _, err := io.Copy(io.Discard, r); err != nil { 52 | return nil, err 53 | } 54 | return f, nil 55 | } 56 | 57 | // A LargeBytesNode is an ipld.Node that can be streamed over. It is guaranteed to have a Bytes type. 58 | type LargeBytesNode interface { 59 | adl.ADL 60 | AsLargeBytes() (io.ReadSeeker, error) 61 | } 62 | 63 | type singleNodeFile struct { 64 | ipld.Node 65 | } 66 | 67 | func (f *singleNodeFile) AsLargeBytes() (io.ReadSeeker, error) { 68 | return &singleNodeReader{f, 0}, nil 69 | } 70 | 71 | func (f *singleNodeFile) Substrate() datamodel.Node { 72 | return f.Node 73 | } 74 | 75 | type singleNodeReader struct { 76 | ipld.Node 77 | offset int 78 | } 79 | 80 | func (f *singleNodeReader) Read(p []byte) (int, error) { 81 | buf, err := f.Node.AsBytes() 82 | if err != nil { 83 | return 0, err 84 | } 85 | if f.offset >= len(buf) { 86 | return 0, io.EOF 87 | } 88 | n := copy(p, buf[f.offset:]) 89 | f.offset += n 90 | return n, nil 91 | } 92 | 93 | func (f *singleNodeReader) Seek(offset int64, whence int) (int64, error) { 94 | buf, err := f.Node.AsBytes() 95 | if err != nil { 96 | return 0, err 97 | } 98 | 99 | switch whence { 100 | case io.SeekStart: 101 | f.offset = int(offset) 102 | case io.SeekCurrent: 103 | f.offset += int(offset) 104 | case io.SeekEnd: 105 | f.offset = len(buf) + int(offset) 106 | } 107 | if f.offset < 0 { 108 | return 0, io.EOF 109 | } 110 | return int64(f.offset), nil 111 | } 112 | -------------------------------------------------------------------------------- /file/file_test.go: -------------------------------------------------------------------------------- 1 | package file_test 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "fmt" 7 | "io" 8 | "testing" 9 | 10 | "github.com/ipfs/go-test/random" 11 | "github.com/ipfs/go-unixfsnode" 12 | "github.com/ipfs/go-unixfsnode/data/builder" 13 | "github.com/ipfs/go-unixfsnode/directory" 14 | "github.com/ipfs/go-unixfsnode/file" 15 | "github.com/ipld/go-car/v2/blockstore" 16 | dagpb "github.com/ipld/go-codec-dagpb" 17 | "github.com/ipld/go-ipld-prime" 18 | cidlink "github.com/ipld/go-ipld-prime/linking/cid" 19 | "github.com/ipld/go-ipld-prime/node/basicnode" 20 | ) 21 | 22 | func TestRootV0File(t *testing.T) { 23 | baseFile := "./fixtures/QmT78zSuBmuS4z925WZfrqQ1qHaJ56DQaTfyMUF7F8ff5o.car" 24 | root, ls := open(baseFile, t) 25 | file, err := file.NewUnixFSFile(context.Background(), root, ls) 26 | if err != nil { 27 | t.Fatal(err) 28 | } 29 | fc, err := file.AsBytes() 30 | if err != nil { 31 | t.Fatal(err) 32 | } 33 | if !bytes.Equal(fc, []byte("hello world\n")) { 34 | t.Errorf("file content does not match: %s", string(fc)) 35 | } 36 | } 37 | 38 | func TestNamedV0File(t *testing.T) { 39 | baseFile := "./fixtures/QmT8EC9sJq63SkDZ1mWLbWWyVV66PuqyHWpKkH4pcVyY4H.car" 40 | root, ls := open(baseFile, t) 41 | dir, err := unixfsnode.Reify(ipld.LinkContext{}, root, ls) 42 | if err != nil { 43 | t.Fatal(err) 44 | } 45 | dpbn := dir.(directory.UnixFSBasicDir) 46 | name, link := dpbn.Iterator().Next() 47 | if name.String() != "b.txt" { 48 | t.Fatal("unexpected filename") 49 | } 50 | fileNode, err := ls.Load(ipld.LinkContext{}, link.Link(), dagpb.Type.PBNode) 51 | if err != nil { 52 | t.Fatal(err) 53 | } 54 | file, err := file.NewUnixFSFile(context.Background(), fileNode, ls) 55 | if err != nil { 56 | t.Fatal(err) 57 | } 58 | fc, err := file.AsBytes() 59 | if err != nil { 60 | t.Fatal(err) 61 | } 62 | if !bytes.Equal(fc, []byte("hello world\n")) { 63 | t.Errorf("file content does not match: %s", string(fc)) 64 | } 65 | } 66 | 67 | func TestFileSeeker(t *testing.T) { 68 | ls := cidlink.DefaultLinkSystem() 69 | storage := cidlink.Memory{} 70 | ls.StorageReadOpener = storage.OpenRead 71 | ls.StorageWriteOpener = storage.OpenWrite 72 | 73 | // Make random file with 1024 bytes. 74 | buf := make([]byte, 1024) 75 | random.NewSeededRand(0xdeadbeef).Read(buf) 76 | r := bytes.NewReader(buf) 77 | 78 | // Build UnixFS File as a single chunk 79 | f, _, err := builder.BuildUnixFSFile(r, "size-1024", &ls) 80 | if err != nil { 81 | t.Fatal(err) 82 | } 83 | 84 | // Load the file. 85 | fr, err := ls.Load(ipld.LinkContext{}, f, basicnode.Prototype.Bytes) 86 | if err != nil { 87 | t.Fatal(err) 88 | } 89 | 90 | // Create it. 91 | ufn, err := file.NewUnixFSFile(context.Background(), fr, &ls) 92 | if err != nil { 93 | t.Fatal(err) 94 | } 95 | 96 | rs, err := ufn.AsLargeBytes() 97 | if err != nil { 98 | t.Fatal(err) 99 | } 100 | 101 | testSeekIn1024ByteFile(t, rs) 102 | } 103 | 104 | func open(car string, t *testing.T) (ipld.Node, *ipld.LinkSystem) { 105 | baseStore, err := blockstore.OpenReadOnly(car) 106 | if err != nil { 107 | t.Fatal(err) 108 | } 109 | ls := cidlink.DefaultLinkSystem() 110 | ls.StorageReadOpener = func(lctx ipld.LinkContext, l ipld.Link) (io.Reader, error) { 111 | cl, ok := l.(cidlink.Link) 112 | if !ok { 113 | return nil, fmt.Errorf("couldn't load link") 114 | } 115 | blk, err := baseStore.Get(lctx.Ctx, cl.Cid) 116 | if err != nil { 117 | return nil, err 118 | } 119 | return bytes.NewBuffer(blk.RawData()), nil 120 | } 121 | carRoots, err := baseStore.Roots() 122 | if err != nil { 123 | t.Fatal(err) 124 | } 125 | root, err := ls.Load(ipld.LinkContext{}, cidlink.Link{Cid: carRoots[0]}, dagpb.Type.PBNode) 126 | if err != nil { 127 | t.Fatal(err) 128 | } 129 | return root, &ls 130 | } 131 | 132 | func testSeekIn1024ByteFile(t *testing.T, rs io.ReadSeeker) { 133 | // Seek from the start and try reading 134 | offset, err := rs.Seek(128, io.SeekStart) 135 | if err != nil { 136 | t.Fatal(err) 137 | } 138 | 139 | if offset != 128 { 140 | t.Fatalf("expected offset %d, got %d", 484, offset) 141 | } 142 | 143 | readBuf := make([]byte, 256) 144 | _, err = io.ReadFull(rs, readBuf) 145 | if err != nil { 146 | t.Fatal(err) 147 | } 148 | 149 | // Validate we can detect the offset with SeekCurrent 150 | offset, err = rs.Seek(0, io.SeekCurrent) 151 | if err != nil { 152 | t.Fatal(err) 153 | } 154 | 155 | if offset != 384 { 156 | t.Fatalf("expected offset %d, got %d", 384, offset) 157 | } 158 | 159 | // Validate we can read after moving with SeekCurrent 160 | offset, err = rs.Seek(100, io.SeekCurrent) 161 | if err != nil { 162 | t.Fatal(err) 163 | } 164 | if offset != 484 { 165 | t.Fatalf("expected offset %d, got %d", 484, offset) 166 | } 167 | 168 | _, err = io.ReadFull(rs, readBuf) 169 | if err != nil { 170 | t.Fatal(err) 171 | } 172 | 173 | offset, err = rs.Seek(0, io.SeekCurrent) 174 | if err != nil { 175 | t.Fatal(err) 176 | } 177 | 178 | if offset != 740 { 179 | t.Fatalf("expected offset %d, got %d", 740, offset) 180 | } 181 | 182 | // Validate we can read after moving with SeekEnd 183 | offset, err = rs.Seek(-400, io.SeekEnd) 184 | if err != nil { 185 | t.Fatal(err) 186 | } 187 | 188 | if offset != 624 { 189 | t.Fatalf("expected offset %d, got %d", 624, offset) 190 | } 191 | 192 | _, err = io.ReadFull(rs, readBuf) 193 | if err != nil { 194 | t.Fatal(err) 195 | } 196 | 197 | offset, err = rs.Seek(0, io.SeekCurrent) 198 | if err != nil { 199 | t.Fatal(err) 200 | } 201 | 202 | if offset != 880 { 203 | t.Fatalf("expected offset %d, got %d", 880, offset) 204 | } 205 | } 206 | -------------------------------------------------------------------------------- /file/fixtures/QmT78zSuBmuS4z925WZfrqQ1qHaJ56DQaTfyMUF7F8ff5o.car: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipfs/go-unixfsnode/55bf436685936f51a82e553bab4776b54dda7932/file/fixtures/QmT78zSuBmuS4z925WZfrqQ1qHaJ56DQaTfyMUF7F8ff5o.car -------------------------------------------------------------------------------- /file/fixtures/QmT8EC9sJq63SkDZ1mWLbWWyVV66PuqyHWpKkH4pcVyY4H.car: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipfs/go-unixfsnode/55bf436685936f51a82e553bab4776b54dda7932/file/fixtures/QmT8EC9sJq63SkDZ1mWLbWWyVV66PuqyHWpKkH4pcVyY4H.car -------------------------------------------------------------------------------- /file/large_file_test.go: -------------------------------------------------------------------------------- 1 | //go:build !race 2 | 3 | package file_test 4 | 5 | import ( 6 | "bytes" 7 | "context" 8 | "fmt" 9 | "io" 10 | "strconv" 11 | "sync" 12 | "testing" 13 | 14 | "github.com/ipfs/go-cid" 15 | "github.com/ipfs/go-test/random" 16 | "github.com/ipfs/go-unixfsnode/data/builder" 17 | "github.com/ipfs/go-unixfsnode/file" 18 | dagpb "github.com/ipld/go-codec-dagpb" 19 | "github.com/ipld/go-ipld-prime" 20 | cidlink "github.com/ipld/go-ipld-prime/linking/cid" 21 | ) 22 | 23 | func TestLargeFileReader(t *testing.T) { 24 | if testing.Short() || strconv.IntSize == 32 { 25 | t.Skip() 26 | } 27 | buf := make([]byte, 512*1024*1024) 28 | random.NewSeededRand(0xdeadbeef).Read(buf) 29 | r := bytes.NewReader(buf) 30 | 31 | ls := cidlink.DefaultLinkSystem() 32 | storage := cidlink.Memory{} 33 | ls.StorageReadOpener = storage.OpenRead 34 | ls.StorageWriteOpener = storage.OpenWrite 35 | 36 | f, _, err := builder.BuildUnixFSFile(r, "", &ls) 37 | if err != nil { 38 | t.Fatal(err) 39 | } 40 | 41 | // get back the root node substrate from the link at the top of the builder. 42 | fr, err := ls.Load(ipld.LinkContext{}, f, dagpb.Type.PBNode) 43 | if err != nil { 44 | t.Fatal(err) 45 | } 46 | 47 | ufn, err := file.NewUnixFSFile(context.Background(), fr, &ls) 48 | if err != nil { 49 | t.Fatal(err) 50 | } 51 | // read back out the file. 52 | for i := 0; i < len(buf); i += 100 * 1024 * 1024 { 53 | rs, err := ufn.AsLargeBytes() 54 | if err != nil { 55 | t.Fatal(err) 56 | } 57 | _, err = rs.Seek(int64(i), io.SeekStart) 58 | if err != nil { 59 | t.Fatal(err) 60 | } 61 | ob, err := io.ReadAll(rs) 62 | if err != nil { 63 | t.Fatal(err) 64 | } 65 | if !bytes.Equal(ob, buf[i:]) { 66 | t.Fatal("Not equal at offset", i, "expected", len(buf[i:]), "got", len(ob)) 67 | } 68 | } 69 | } 70 | 71 | func TestLargeFileSeeker(t *testing.T) { 72 | ls := cidlink.DefaultLinkSystem() 73 | storage := cidlink.Memory{} 74 | ls.StorageReadOpener = storage.OpenRead 75 | ls.StorageWriteOpener = storage.OpenWrite 76 | 77 | // Make random file with 1024 bytes. 78 | buf := make([]byte, 1024) 79 | random.NewSeededRand(0xdeadbeef).Read(buf) 80 | r := bytes.NewReader(buf) 81 | 82 | // Build UnixFS File chunked in 256 byte parts. 83 | f, _, err := builder.BuildUnixFSFile(r, "size-256", &ls) 84 | if err != nil { 85 | t.Fatal(err) 86 | } 87 | 88 | // Load the file. 89 | fr, err := ls.Load(ipld.LinkContext{}, f, dagpb.Type.PBNode) 90 | if err != nil { 91 | t.Fatal(err) 92 | } 93 | 94 | // Create it. 95 | ufn, err := file.NewUnixFSFile(context.Background(), fr, &ls) 96 | if err != nil { 97 | t.Fatal(err) 98 | } 99 | 100 | rs, err := ufn.AsLargeBytes() 101 | if err != nil { 102 | t.Fatal(err) 103 | } 104 | 105 | testSeekIn1024ByteFile(t, rs) 106 | } 107 | 108 | func TestLargeFileReaderReadsOnlyNecessaryBlocks(t *testing.T) { 109 | tracker, ls := mockTrackingLinkSystem() 110 | 111 | // Make random file with 1024 bytes. 112 | buf := make([]byte, 1024) 113 | random.NewSeededRand(0xdeadbeef).Read(buf) 114 | r := bytes.NewReader(buf) 115 | 116 | // Build UnixFS File chunked in 256 byte parts. 117 | f, _, err := builder.BuildUnixFSFile(r, "size-256", ls) 118 | if err != nil { 119 | t.Fatal(err) 120 | } 121 | 122 | // Load the file. 123 | fr, err := ls.Load(ipld.LinkContext{}, f, dagpb.Type.PBNode) 124 | if err != nil { 125 | t.Fatal(err) 126 | } 127 | 128 | // Create it. 129 | ufn, err := file.NewUnixFSFile(context.Background(), fr, ls) 130 | if err != nil { 131 | t.Fatal(err) 132 | } 133 | 134 | // Prepare tracker for read. 135 | tracker.resetTracker() 136 | 137 | rs, err := ufn.AsLargeBytes() 138 | if err != nil { 139 | t.Fatal(err) 140 | } 141 | 142 | // Move the pointer to the 2nd block of the file. 143 | _, err = rs.Seek(256, io.SeekStart) 144 | if err != nil { 145 | t.Fatal(err) 146 | } 147 | 148 | // Read the 3rd and 4th blocks of the file. 149 | portion := make([]byte, 512) 150 | _, err = io.ReadAtLeast(rs, portion, 512) 151 | if err != nil { 152 | t.Fatal(err) 153 | } 154 | 155 | // Just be sure we read the right bytes. 156 | if !bytes.Equal(portion, buf[256:768]) { 157 | t.Fatal(fmt.Errorf("did not read correct bytes")) 158 | } 159 | 160 | // We must have read 2 CIDs for each of the 2 blocks! 161 | if l := len(tracker.cids); l != 2 { 162 | t.Fatal(fmt.Errorf("expected to have read 2 blocks, read %d", l)) 163 | } 164 | } 165 | 166 | type trackingReadOpener struct { 167 | cidlink.Memory 168 | mu sync.Mutex 169 | cids []cid.Cid 170 | } 171 | 172 | func (ro *trackingReadOpener) resetTracker() { 173 | ro.mu.Lock() 174 | ro.cids = nil 175 | ro.mu.Unlock() 176 | } 177 | 178 | func (ro *trackingReadOpener) OpenRead(lnkCtx ipld.LinkContext, lnk ipld.Link) (io.Reader, error) { 179 | cidLink, ok := lnk.(cidlink.Link) 180 | if !ok { 181 | return nil, fmt.Errorf("invalid link type for loading: %v", lnk) 182 | } 183 | 184 | ro.mu.Lock() 185 | ro.cids = append(ro.cids, cidLink.Cid) 186 | ro.mu.Unlock() 187 | 188 | return ro.Memory.OpenRead(lnkCtx, lnk) 189 | } 190 | 191 | func mockTrackingLinkSystem() (*trackingReadOpener, *ipld.LinkSystem) { 192 | ls := cidlink.DefaultLinkSystem() 193 | storage := &trackingReadOpener{Memory: cidlink.Memory{}} 194 | 195 | ls.StorageWriteOpener = storage.OpenWrite 196 | ls.StorageReadOpener = storage.OpenRead 197 | ls.TrustedStorage = true 198 | 199 | return storage, &ls 200 | } 201 | -------------------------------------------------------------------------------- /file/shard.go: -------------------------------------------------------------------------------- 1 | package file 2 | 3 | import ( 4 | "context" 5 | "io" 6 | "sync" 7 | 8 | "github.com/ipfs/go-cid" 9 | "github.com/ipfs/go-unixfsnode/data" 10 | dagpb "github.com/ipld/go-codec-dagpb" 11 | "github.com/ipld/go-ipld-prime" 12 | "github.com/ipld/go-ipld-prime/adl" 13 | cidlink "github.com/ipld/go-ipld-prime/linking/cid" 14 | "github.com/ipld/go-ipld-prime/node/basicnode" 15 | "github.com/multiformats/go-multicodec" 16 | ) 17 | 18 | type shardNodeFile struct { 19 | ctx context.Context 20 | lsys *ipld.LinkSystem 21 | substrate ipld.Node 22 | 23 | // unixfs data unpacked from the substrate. access via .unpack() 24 | metadata data.UnixFSData 25 | unpackLk sync.Once 26 | } 27 | 28 | var _ adl.ADL = (*shardNodeFile)(nil) 29 | 30 | type shardNodeReader struct { 31 | *shardNodeFile 32 | rdr io.Reader 33 | offset int64 34 | len int64 35 | } 36 | 37 | func (s *shardNodeReader) makeReader() (io.Reader, error) { 38 | links, err := s.shardNodeFile.substrate.LookupByString("Links") 39 | if err != nil { 40 | return nil, err 41 | } 42 | readers := make([]io.Reader, 0) 43 | lnkIter := links.ListIterator() 44 | at := int64(0) 45 | for !lnkIter.Done() { 46 | lnkIdx, lnk, err := lnkIter.Next() 47 | if err != nil { 48 | return nil, err 49 | } 50 | childSize, tr, err := s.linkSize(lnk, int(lnkIdx)) 51 | if err != nil { 52 | return nil, err 53 | } 54 | if s.offset >= at+childSize { 55 | at += childSize 56 | continue 57 | } 58 | if tr == nil { 59 | lnkhash, err := lnk.LookupByString("Hash") 60 | if err != nil { 61 | return nil, err 62 | } 63 | lnklnk, err := lnkhash.AsLink() 64 | if err != nil { 65 | return nil, err 66 | } 67 | target := newDeferredFileNode(s.ctx, s.lsys, lnklnk) 68 | tr, err = target.AsLargeBytes() 69 | if err != nil { 70 | return nil, err 71 | } 72 | } 73 | // fastforward the first one if needed. 74 | if at < s.offset { 75 | _, err := tr.Seek(s.offset-at, io.SeekStart) 76 | if err != nil { 77 | return nil, err 78 | } 79 | } 80 | at += childSize 81 | readers = append(readers, tr) 82 | } 83 | if len(readers) == 0 { 84 | return nil, io.EOF 85 | } 86 | s.len = at 87 | return io.MultiReader(readers...), nil 88 | } 89 | 90 | func (s *shardNodeFile) unpack() (data.UnixFSData, error) { 91 | var retErr error 92 | s.unpackLk.Do(func() { 93 | nodeData, err := s.substrate.LookupByString("Data") 94 | if err != nil { 95 | retErr = err 96 | return 97 | } 98 | nodeDataBytes, err := nodeData.AsBytes() 99 | if err != nil { 100 | retErr = err 101 | return 102 | } 103 | ud, err := data.DecodeUnixFSData(nodeDataBytes) 104 | if err != nil { 105 | retErr = err 106 | return 107 | } 108 | s.metadata = ud 109 | }) 110 | return s.metadata, retErr 111 | } 112 | 113 | // returns the size of the n'th link from this shard. 114 | // the io.ReadSeeker of the child will be return if it was loaded as part of the size calculation. 115 | func (s *shardNodeFile) linkSize(lnk ipld.Node, position int) (int64, io.ReadSeeker, error) { 116 | lnkhash, err := lnk.LookupByString("Hash") 117 | if err != nil { 118 | return 0, nil, err 119 | } 120 | lnklnk, err := lnkhash.AsLink() 121 | if err != nil { 122 | return 0, nil, err 123 | } 124 | _, c, err := cid.CidFromBytes([]byte(lnklnk.Binary())) 125 | if err != nil { 126 | return 0, nil, err 127 | } 128 | 129 | // efficiency shortcut: for raw blocks, the size will match the bytes of content 130 | if c.Prefix().Codec == cid.Raw { 131 | size, err := lnk.LookupByString("Tsize") 132 | if err != nil { 133 | return 0, nil, err 134 | } 135 | sz, err := size.AsInt() 136 | return sz, nil, err 137 | } 138 | 139 | // check if there are blocksizes written, use them if there are. 140 | // both err and md can be nil if this was not the first time unpack() 141 | // was called but there was an error on the first call. 142 | md, err := s.unpack() 143 | if err == nil && md != nil { 144 | pn, err := md.BlockSizes.LookupByIndex(int64(position)) 145 | if err == nil { 146 | innerNum, err := pn.AsInt() 147 | if err == nil { 148 | return innerNum, nil, nil 149 | } 150 | } 151 | } 152 | 153 | // open the link and get its size. 154 | target := newDeferredFileNode(s.ctx, s.lsys, lnklnk) 155 | tr, err := target.AsLargeBytes() 156 | if err != nil { 157 | return 0, nil, err 158 | } 159 | 160 | end, err := tr.Seek(0, io.SeekEnd) 161 | if err != nil { 162 | return end, nil, err 163 | } 164 | _, err = tr.Seek(0, io.SeekStart) 165 | return end, tr, err 166 | } 167 | 168 | func (s *shardNodeReader) Read(p []byte) (int, error) { 169 | // build reader 170 | if s.rdr == nil { 171 | rdr, err := s.makeReader() 172 | if err != nil { 173 | return 0, err 174 | } 175 | s.rdr = rdr 176 | } 177 | n, err := s.rdr.Read(p) 178 | s.offset += int64(n) 179 | return n, err 180 | } 181 | 182 | func (s *shardNodeReader) Seek(offset int64, whence int) (int64, error) { 183 | if s.rdr != nil { 184 | s.rdr = nil 185 | } 186 | switch whence { 187 | case io.SeekStart: 188 | s.offset = offset 189 | case io.SeekCurrent: 190 | s.offset += offset 191 | case io.SeekEnd: 192 | s.offset = s.length() + offset 193 | } 194 | return s.offset, nil 195 | } 196 | 197 | func (s *shardNodeFile) length() int64 { 198 | // see if we have size specified in the unixfs data. errors fall back to length from links 199 | nodeData, err := s.unpack() 200 | if err != nil || nodeData == nil { 201 | return s.lengthFromLinks() 202 | } 203 | if nodeData.FileSize.Exists() { 204 | if fs, err := nodeData.FileSize.Must().AsInt(); err == nil { 205 | return int64(fs) 206 | } 207 | } 208 | 209 | return s.lengthFromLinks() 210 | } 211 | 212 | func (s *shardNodeFile) lengthFromLinks() int64 { 213 | links, err := s.substrate.LookupByString("Links") 214 | if err != nil { 215 | return 0 216 | } 217 | size := int64(0) 218 | li := links.ListIterator() 219 | for !li.Done() { 220 | idx, l, err := li.Next() 221 | if err != nil { 222 | return 0 223 | } 224 | ll, _, err := s.linkSize(l, int(idx)) 225 | if err != nil { 226 | return 0 227 | } 228 | size += ll 229 | } 230 | return size 231 | } 232 | 233 | func (s *shardNodeFile) AsLargeBytes() (io.ReadSeeker, error) { 234 | return &shardNodeReader{s, nil, 0, 0}, nil 235 | } 236 | 237 | func (s *shardNodeFile) Substrate() ipld.Node { 238 | return s.substrate 239 | } 240 | 241 | func protoFor(link ipld.Link) ipld.NodePrototype { 242 | if lc, ok := link.(cidlink.Link); ok { 243 | if lc.Cid.Prefix().Codec == uint64(multicodec.DagPb) { 244 | return dagpb.Type.PBNode 245 | } 246 | } 247 | return basicnode.Prototype.Any 248 | } 249 | 250 | func (s *shardNodeFile) Kind() ipld.Kind { 251 | return ipld.Kind_Bytes 252 | } 253 | 254 | func (s *shardNodeFile) AsBytes() ([]byte, error) { 255 | rdr, err := s.AsLargeBytes() 256 | if err != nil { 257 | return nil, err 258 | } 259 | return io.ReadAll(rdr) 260 | } 261 | 262 | func (s *shardNodeFile) AsBool() (bool, error) { 263 | return false, ipld.ErrWrongKind{TypeName: "bool", MethodName: "AsBool", AppropriateKind: ipld.KindSet_JustBytes} 264 | } 265 | 266 | func (s *shardNodeFile) AsInt() (int64, error) { 267 | return 0, ipld.ErrWrongKind{TypeName: "int", MethodName: "AsInt", AppropriateKind: ipld.KindSet_JustBytes} 268 | } 269 | 270 | func (s *shardNodeFile) AsFloat() (float64, error) { 271 | return 0, ipld.ErrWrongKind{TypeName: "float", MethodName: "AsFloat", AppropriateKind: ipld.KindSet_JustBytes} 272 | } 273 | 274 | func (s *shardNodeFile) AsString() (string, error) { 275 | return "", ipld.ErrWrongKind{TypeName: "string", MethodName: "AsString", AppropriateKind: ipld.KindSet_JustBytes} 276 | } 277 | 278 | func (s *shardNodeFile) AsLink() (ipld.Link, error) { 279 | return nil, ipld.ErrWrongKind{TypeName: "link", MethodName: "AsLink", AppropriateKind: ipld.KindSet_JustBytes} 280 | } 281 | 282 | func (s *shardNodeFile) AsNode() (ipld.Node, error) { 283 | return nil, nil 284 | } 285 | 286 | func (s *shardNodeFile) Size() int { 287 | return 0 288 | } 289 | 290 | func (s *shardNodeFile) IsAbsent() bool { 291 | return false 292 | } 293 | 294 | func (s *shardNodeFile) IsNull() bool { 295 | return s.substrate.IsNull() 296 | } 297 | 298 | func (s *shardNodeFile) Length() int64 { 299 | return 0 300 | } 301 | 302 | func (s *shardNodeFile) ListIterator() ipld.ListIterator { 303 | return nil 304 | } 305 | 306 | func (s *shardNodeFile) MapIterator() ipld.MapIterator { 307 | return nil 308 | } 309 | 310 | func (s *shardNodeFile) LookupByIndex(idx int64) (ipld.Node, error) { 311 | return nil, ipld.ErrWrongKind{} 312 | } 313 | 314 | func (s *shardNodeFile) LookupByString(key string) (ipld.Node, error) { 315 | return nil, ipld.ErrWrongKind{} 316 | } 317 | 318 | func (s *shardNodeFile) LookupByNode(key ipld.Node) (ipld.Node, error) { 319 | return nil, ipld.ErrWrongKind{} 320 | } 321 | 322 | func (s *shardNodeFile) LookupBySegment(seg ipld.PathSegment) (ipld.Node, error) { 323 | return nil, ipld.ErrWrongKind{} 324 | } 325 | 326 | // shardded files / nodes look like dagpb nodes. 327 | func (s *shardNodeFile) Prototype() ipld.NodePrototype { 328 | return dagpb.Type.PBNode 329 | } 330 | -------------------------------------------------------------------------------- /file/wrapped.go: -------------------------------------------------------------------------------- 1 | package file 2 | 3 | import ( 4 | "github.com/ipfs/go-unixfsnode/data" 5 | "github.com/ipld/go-ipld-prime" 6 | "github.com/ipld/go-ipld-prime/node/basicnode" 7 | ) 8 | 9 | func newWrappedNode(substrate ipld.Node) (LargeBytesNode, error) { 10 | dataField, err := substrate.LookupByString("Data") 11 | if err != nil { 12 | return nil, err 13 | } 14 | // unpack as unixfs proto. 15 | dfb, err := dataField.AsBytes() 16 | if err != nil { 17 | return nil, err 18 | } 19 | ufd, err := data.DecodeUnixFSData(dfb) 20 | if err != nil { 21 | return nil, err 22 | } 23 | 24 | if ufd.Data.Exists() { 25 | return &singleNodeFile{ 26 | Node: ufd.Data.Must(), 27 | }, nil 28 | } 29 | 30 | // an empty degenerate one. 31 | return &singleNodeFile{ 32 | Node: basicnode.NewBytes(nil), 33 | }, nil 34 | } 35 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/ipfs/go-unixfsnode 2 | 3 | go 1.23.8 4 | 5 | require ( 6 | github.com/ipfs/boxo v0.30.0 7 | github.com/ipfs/go-bitfield v1.1.0 8 | github.com/ipfs/go-cid v0.5.0 9 | github.com/ipfs/go-ipld-format v0.6.1 10 | github.com/ipfs/go-test v0.2.2 11 | github.com/ipld/go-car/v2 v2.14.3 12 | github.com/ipld/go-codec-dagpb v1.7.0 13 | github.com/ipld/go-ipld-prime v0.21.0 14 | github.com/multiformats/go-multicodec v0.9.0 15 | github.com/multiformats/go-multihash v0.2.3 16 | github.com/spaolacci/murmur3 v1.1.0 17 | github.com/stretchr/testify v1.10.0 18 | google.golang.org/protobuf v1.36.6 19 | ) 20 | 21 | require ( 22 | github.com/crackcomm/go-gitignore v0.0.0-20241020182519-7843d2ba8fdf // indirect 23 | github.com/davecgh/go-spew v1.1.1 // indirect 24 | github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.0 // indirect 25 | github.com/gammazero/deque v1.0.0 // indirect 26 | github.com/go-logr/logr v1.4.2 // indirect 27 | github.com/go-logr/stdr v1.2.2 // indirect 28 | github.com/google/uuid v1.6.0 // indirect 29 | github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect 30 | github.com/ipfs/bbloom v0.0.4 // indirect 31 | github.com/ipfs/go-block-format v0.2.1 // indirect 32 | github.com/ipfs/go-datastore v0.8.2 // indirect 33 | github.com/ipfs/go-ipld-cbor v0.2.0 // indirect 34 | github.com/ipfs/go-ipld-legacy v0.2.1 // indirect 35 | github.com/ipfs/go-log/v2 v2.5.1 // indirect 36 | github.com/ipfs/go-metrics-interface v0.3.0 // indirect 37 | github.com/klauspost/cpuid/v2 v2.2.10 // indirect 38 | github.com/libp2p/go-buffer-pool v0.1.0 // indirect 39 | github.com/libp2p/go-libp2p v0.41.1 // indirect 40 | github.com/mattn/go-isatty v0.0.20 // indirect 41 | github.com/minio/sha256-simd v1.0.1 // indirect 42 | github.com/mr-tron/base58 v1.2.0 // indirect 43 | github.com/multiformats/go-base32 v0.1.0 // indirect 44 | github.com/multiformats/go-base36 v0.2.0 // indirect 45 | github.com/multiformats/go-multiaddr v0.15.0 // indirect 46 | github.com/multiformats/go-multibase v0.2.0 // indirect 47 | github.com/multiformats/go-varint v0.0.7 // indirect 48 | github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 // indirect 49 | github.com/pmezard/go-difflib v1.0.0 // indirect 50 | github.com/polydawn/refmt v0.89.0 // indirect 51 | github.com/whyrusleeping/cbor v0.0.0-20171005072247-63513f603b11 // indirect 52 | github.com/whyrusleeping/cbor-gen v0.1.2 // indirect 53 | github.com/whyrusleeping/chunker v0.0.0-20181014151217-fe64bd25879f // indirect 54 | go.opentelemetry.io/auto/sdk v1.1.0 // indirect 55 | go.opentelemetry.io/otel v1.35.0 // indirect 56 | go.opentelemetry.io/otel/metric v1.35.0 // indirect 57 | go.opentelemetry.io/otel/trace v1.35.0 // indirect 58 | go.uber.org/multierr v1.11.0 // indirect 59 | go.uber.org/zap v1.27.0 // indirect 60 | golang.org/x/crypto v0.38.0 // indirect 61 | golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6 // indirect 62 | golang.org/x/sync v0.14.0 // indirect 63 | golang.org/x/sys v0.33.0 // indirect 64 | golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect 65 | gopkg.in/yaml.v3 v3.0.1 // indirect 66 | lukechampine.com/blake3 v1.4.1 // indirect 67 | ) 68 | -------------------------------------------------------------------------------- /hamt/errors.go: -------------------------------------------------------------------------------- 1 | package hamt 2 | 3 | import "fmt" 4 | 5 | type errorType string 6 | 7 | func (e errorType) Error() string { 8 | return string(e) 9 | } 10 | 11 | const ( 12 | // ErrNotProtobuf indicates an error attempting to load a HAMT from a non-protobuf node 13 | ErrNotProtobuf errorType = "node was not a protobuf node" 14 | // ErrNotUnixFSNode indicates an error attempting to load a HAMT from a generic protobuf node 15 | ErrNotUnixFSNode errorType = "node was not a UnixFS node" 16 | // ErrInvalidChildIndex indicates there is no link to load for the given child index 17 | ErrInvalidChildIndex errorType = "invalid index passed to operate children (likely corrupt bitfield)" 18 | // ErrHAMTTooDeep indicates we attempted to load from a HAMT node that went past the depth of the tree 19 | ErrHAMTTooDeep errorType = "sharded directory too deep" 20 | // ErrInvalidHashType indicates the HAMT node's hash function is unsupported (must be Murmur3) 21 | ErrInvalidHashType errorType = "only murmur3 supported as hash function" 22 | // ErrNoDataField indicates the HAMT node's UnixFS structure lacked a data field, which is 23 | // where a bit mask is stored 24 | ErrNoDataField errorType = "'Data' field not present" 25 | // ErrNoFanoutField indicates the HAMT node's UnixFS structure lacked a fanout field, which is required 26 | ErrNoFanoutField errorType = "'Fanout' field not present" 27 | // ErrHAMTSizeInvalid indicates the HAMT's size property was not an exact power of 2 28 | ErrHAMTSizeInvalid errorType = "hamt size should be a power of two" 29 | // ErrMissingLinkName indicates a link in a HAMT had no Name property (required for all HAMTs) 30 | ErrMissingLinkName errorType = "missing link name" 31 | ) 32 | 33 | // ErrInvalidLinkName indicates a link's name was too short for a HAMT 34 | type ErrInvalidLinkName struct { 35 | Name string 36 | } 37 | 38 | func (e ErrInvalidLinkName) Error() string { 39 | return fmt.Sprintf("invalid link name '%s'", e.Name) 40 | } 41 | -------------------------------------------------------------------------------- /hamt/fixtures/wikipedia-cryptographic-hash-function.car: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipfs/go-unixfsnode/55bf436685936f51a82e553bab4776b54dda7932/hamt/fixtures/wikipedia-cryptographic-hash-function.car -------------------------------------------------------------------------------- /hamt/shardeddir.go: -------------------------------------------------------------------------------- 1 | package hamt 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | 7 | bitfield "github.com/ipfs/go-bitfield" 8 | "github.com/ipfs/go-unixfsnode/data" 9 | "github.com/ipfs/go-unixfsnode/iter" 10 | dagpb "github.com/ipld/go-codec-dagpb" 11 | "github.com/ipld/go-ipld-prime" 12 | "github.com/ipld/go-ipld-prime/schema" 13 | ) 14 | 15 | const ( 16 | // HashMurmur3 is the multiformats identifier for Murmur3 17 | HashMurmur3 uint64 = 0x22 18 | ) 19 | 20 | var _ ipld.Node = UnixFSHAMTShard(nil) 21 | var _ schema.TypedNode = UnixFSHAMTShard(nil) 22 | var _ ipld.ADL = UnixFSHAMTShard(nil) 23 | 24 | // UnixFSHAMTShared is an IPLD Prime Node that provides a read interface 25 | // to a UnixFS HAMT 26 | type UnixFSHAMTShard = *_UnixFSHAMTShard 27 | 28 | type _UnixFSHAMTShard struct { 29 | ctx context.Context 30 | _substrate dagpb.PBNode 31 | data data.UnixFSData 32 | lsys *ipld.LinkSystem 33 | bitfield bitfield.Bitfield 34 | shardCache map[ipld.Link]*_UnixFSHAMTShard 35 | cachedLength int64 36 | } 37 | 38 | // NewUnixFSHAMTShard attempts to construct a UnixFSHAMTShard node from the base protobuf node plus 39 | // a decoded UnixFSData structure 40 | func NewUnixFSHAMTShard(ctx context.Context, substrate dagpb.PBNode, data data.UnixFSData, lsys *ipld.LinkSystem) (ipld.Node, error) { 41 | if err := validateHAMTData(data); err != nil { 42 | return nil, err 43 | } 44 | shardCache := make(map[ipld.Link]*_UnixFSHAMTShard, substrate.FieldLinks().Length()) 45 | bf, err := bitField(data) 46 | if err != nil { 47 | return nil, err 48 | } 49 | return &_UnixFSHAMTShard{ 50 | ctx: ctx, 51 | _substrate: substrate, 52 | data: data, 53 | lsys: lsys, 54 | shardCache: shardCache, 55 | bitfield: bf, 56 | cachedLength: -1, 57 | }, nil 58 | } 59 | 60 | // NewUnixFSHAMTShardWithPreload attempts to construct a UnixFSHAMTShard node from the base protobuf node plus 61 | // a decoded UnixFSData structure, and then iterate through and load the full set of hamt shards. 62 | func NewUnixFSHAMTShardWithPreload(ctx context.Context, substrate dagpb.PBNode, data data.UnixFSData, lsys *ipld.LinkSystem) (ipld.Node, error) { 63 | n, err := NewUnixFSHAMTShard(ctx, substrate, data, lsys) 64 | if err != nil { 65 | return n, err 66 | } 67 | 68 | traverse, err := n.(*_UnixFSHAMTShard).length() 69 | if traverse == -1 { 70 | return n, fmt.Errorf("could not fully explore hamt during preload") 71 | } 72 | if err != nil { 73 | return n, err 74 | } 75 | 76 | return n, nil 77 | } 78 | 79 | func (n UnixFSHAMTShard) Substrate() ipld.Node { 80 | return n._substrate 81 | } 82 | 83 | func (n UnixFSHAMTShard) Kind() ipld.Kind { 84 | return n._substrate.Kind() 85 | } 86 | 87 | // LookupByString looks for the key in the list of links with a matching name 88 | func (n *_UnixFSHAMTShard) LookupByString(key string) (ipld.Node, error) { 89 | hv := &hashBits{b: hash([]byte(key))} 90 | return n.lookup(key, hv) 91 | } 92 | 93 | func (n UnixFSHAMTShard) lookup(key string, hv *hashBits) (dagpb.Link, error) { 94 | log2 := log2Size(n.data) 95 | maxPadLen := maxPadLength(n.data) 96 | childIndex, err := hv.Next(log2) 97 | if err != nil { 98 | return nil, err 99 | } 100 | 101 | if n.hasChild(childIndex) { 102 | pbLink, err := n.getChildLink(childIndex) 103 | if err != nil { 104 | return nil, err 105 | } 106 | isValue, err := isValueLink(pbLink, maxPadLen) 107 | if err != nil { 108 | return nil, err 109 | } 110 | if isValue { 111 | if MatchKey(pbLink, key, maxPadLen) { 112 | return pbLink.FieldHash(), nil 113 | } 114 | } else { 115 | childNd, err := n.loadChild(pbLink) 116 | if err != nil { 117 | return nil, err 118 | } 119 | return childNd.lookup(key, hv) 120 | } 121 | } 122 | return nil, schema.ErrNoSuchField{Type: nil /*TODO*/, Field: ipld.PathSegmentOfString(key)} 123 | } 124 | 125 | // AttemptHAMTShardFromNode attempts to read a HAMT shard from a general protobuf node 126 | func AttemptHAMTShardFromNode(ctx context.Context, nd ipld.Node, lsys *ipld.LinkSystem) (UnixFSHAMTShard, error) { 127 | // shortcut if node is already a hamt 128 | hnd, ok := nd.(UnixFSHAMTShard) 129 | if ok { 130 | return hnd, nil 131 | } 132 | pbnd, ok := nd.(dagpb.PBNode) 133 | if !ok { 134 | return nil, fmt.Errorf("hamt.AttemptHAMTShardFromNode: %w", ErrNotProtobuf) 135 | } 136 | if !pbnd.FieldData().Exists() { 137 | return nil, fmt.Errorf("hamt.AttemptHAMTShardFromNode: %w", ErrNotUnixFSNode) 138 | } 139 | data, err := data.DecodeUnixFSData(pbnd.FieldData().Must().Bytes()) 140 | if err != nil { 141 | return nil, err 142 | } 143 | und, err := NewUnixFSHAMTShard(ctx, pbnd, data, lsys) 144 | if err != nil { 145 | return nil, err 146 | } 147 | return und.(UnixFSHAMTShard), nil 148 | } 149 | 150 | func (n UnixFSHAMTShard) loadChild(pbLink dagpb.PBLink) (UnixFSHAMTShard, error) { 151 | cached, ok := n.shardCache[pbLink.FieldHash().Link()] 152 | if ok { 153 | return cached, nil 154 | } 155 | nd, err := n.lsys.Load(ipld.LinkContext{Ctx: n.ctx}, pbLink.FieldHash().Link(), dagpb.Type.PBNode) 156 | if err != nil { 157 | return nil, err 158 | } 159 | und, err := AttemptHAMTShardFromNode(n.ctx, nd, n.lsys) 160 | if err != nil { 161 | return nil, err 162 | } 163 | n.shardCache[pbLink.FieldHash().Link()] = und 164 | return und, nil 165 | } 166 | 167 | func (n UnixFSHAMTShard) LookupByNode(key ipld.Node) (ipld.Node, error) { 168 | ks, err := key.AsString() 169 | if err != nil { 170 | return nil, err 171 | } 172 | return n.LookupByString(ks) 173 | } 174 | 175 | func (n UnixFSHAMTShard) LookupByIndex(idx int64) (ipld.Node, error) { 176 | return n._substrate.LookupByIndex(idx) 177 | } 178 | 179 | func (n UnixFSHAMTShard) LookupBySegment(seg ipld.PathSegment) (ipld.Node, error) { 180 | return n.LookupByString(seg.String()) 181 | } 182 | 183 | func (n UnixFSHAMTShard) MapIterator() ipld.MapIterator { 184 | maxPadLen := maxPadLength(n.data) 185 | listItr := &_UnixFSShardedDir__ListItr{ 186 | _substrate: n.FieldLinks().Iterator(), 187 | maxPadLen: maxPadLen, 188 | nd: n, 189 | } 190 | st := stringTransformer{maxPadLen: maxPadLen} 191 | return iter.NewUnixFSDirMapIterator(listItr, st.transformNameNode) 192 | } 193 | 194 | type _UnixFSShardedDir__ListItr struct { 195 | _substrate *dagpb.PBLinks__Itr 196 | childIter *_UnixFSShardedDir__ListItr 197 | nd UnixFSHAMTShard 198 | maxPadLen int 199 | total int64 200 | } 201 | 202 | func (itr *_UnixFSShardedDir__ListItr) Next() (int64, dagpb.PBLink, error) { 203 | total := itr.total 204 | itr.total++ 205 | next, err := itr.next() 206 | if err != nil { 207 | return -1, nil, err 208 | } 209 | if next == nil { 210 | return -1, nil, nil 211 | } 212 | return total, next, nil 213 | } 214 | 215 | func (itr *_UnixFSShardedDir__ListItr) next() (dagpb.PBLink, error) { 216 | if itr.childIter == nil { 217 | if itr._substrate.Done() { 218 | return nil, nil 219 | } 220 | _, next := itr._substrate.Next() 221 | isValue, err := isValueLink(next, itr.maxPadLen) 222 | if err != nil { 223 | return nil, err 224 | } 225 | if isValue { 226 | return next, nil 227 | } 228 | child, err := itr.nd.loadChild(next) 229 | if err != nil { 230 | return nil, err 231 | } 232 | itr.childIter = &_UnixFSShardedDir__ListItr{ 233 | _substrate: child._substrate.FieldLinks().Iterator(), 234 | nd: child, 235 | maxPadLen: maxPadLength(child.data), 236 | } 237 | } 238 | _, next, err := itr.childIter.Next() 239 | if itr.childIter.Done() { 240 | // do this even on error to make sure we don't overrun a shard where the 241 | // end is missing and the user is ignoring NotFound errors 242 | itr.childIter = nil 243 | } 244 | if err != nil { 245 | return nil, err 246 | } 247 | return next, nil 248 | } 249 | 250 | func (itr *_UnixFSShardedDir__ListItr) Done() bool { 251 | return itr.childIter == nil && itr._substrate.Done() 252 | } 253 | 254 | // ListIterator returns an iterator which yields key-value pairs 255 | // traversing the node. 256 | // If the node kind is anything other than a list, nil will be returned. 257 | // 258 | // The iterator will yield every entry in the list; that is, it 259 | // can be expected that itr.Next will be called node.Length times 260 | // before itr.Done becomes true. 261 | func (n UnixFSHAMTShard) ListIterator() ipld.ListIterator { 262 | return nil 263 | } 264 | 265 | // Length returns the length of a list, or the number of entries in a map, 266 | // or -1 if the node is not of list nor map kind. 267 | func (n UnixFSHAMTShard) length() (int64, error) { 268 | if n.cachedLength != -1 { 269 | return n.cachedLength, nil 270 | } 271 | maxPadLen := maxPadLength(n.data) 272 | total := int64(0) 273 | itr := n.FieldLinks().Iterator() 274 | for !itr.Done() { 275 | _, pbLink := itr.Next() 276 | isValue, err := isValueLink(pbLink, maxPadLen) 277 | if err != nil { 278 | return 0, err 279 | } 280 | if isValue { 281 | total++ 282 | } else { 283 | child, err := n.loadChild(pbLink) 284 | if err != nil { 285 | return 0, err 286 | } 287 | cl, err := child.length() 288 | if err != nil { 289 | return 0, err 290 | } 291 | total += cl 292 | } 293 | } 294 | n.cachedLength = total 295 | return total, nil 296 | } 297 | 298 | // Length returns the length of a list, or the number of entries in a map, 299 | // or -1 if the node is not of list nor map kind. 300 | func (n UnixFSHAMTShard) Length() int64 { 301 | len, err := n.length() 302 | if err != nil { 303 | return 0 304 | } 305 | return len 306 | } 307 | 308 | func (n UnixFSHAMTShard) IsAbsent() bool { 309 | return false 310 | } 311 | 312 | func (n UnixFSHAMTShard) IsNull() bool { 313 | return false 314 | } 315 | 316 | func (n UnixFSHAMTShard) AsBool() (bool, error) { 317 | return n._substrate.AsBool() 318 | } 319 | 320 | func (n UnixFSHAMTShard) AsInt() (int64, error) { 321 | return n._substrate.AsInt() 322 | } 323 | 324 | func (n UnixFSHAMTShard) AsFloat() (float64, error) { 325 | return n._substrate.AsFloat() 326 | } 327 | 328 | func (n UnixFSHAMTShard) AsString() (string, error) { 329 | return n._substrate.AsString() 330 | } 331 | 332 | func (n UnixFSHAMTShard) AsBytes() ([]byte, error) { 333 | return n._substrate.AsBytes() 334 | } 335 | 336 | func (n UnixFSHAMTShard) AsLink() (ipld.Link, error) { 337 | return n._substrate.AsLink() 338 | } 339 | 340 | func (n UnixFSHAMTShard) Prototype() ipld.NodePrototype { 341 | // TODO: should this return something? 342 | // probobly not until we write the write interfaces 343 | return nil 344 | } 345 | 346 | // satisfy schema.TypedNode 347 | func (UnixFSHAMTShard) Type() schema.Type { 348 | return nil /*TODO:typelit*/ 349 | } 350 | 351 | func (n UnixFSHAMTShard) Representation() ipld.Node { 352 | return n._substrate.Representation() 353 | } 354 | 355 | // Native map accessors 356 | 357 | func (n UnixFSHAMTShard) Iterator() *iter.UnixFSDir__Itr { 358 | maxPadLen := maxPadLength(n.data) 359 | listItr := &_UnixFSShardedDir__ListItr{ 360 | _substrate: n.FieldLinks().Iterator(), 361 | maxPadLen: maxPadLen, 362 | nd: n, 363 | } 364 | st := stringTransformer{maxPadLen: maxPadLen} 365 | return iter.NewUnixFSDirIterator(listItr, st.transformNameNode) 366 | } 367 | 368 | func (n UnixFSHAMTShard) Lookup(key dagpb.String) dagpb.Link { 369 | hv := &hashBits{b: hash([]byte(key.String()))} 370 | link, err := n.lookup(key.String(), hv) 371 | if err != nil { 372 | return nil 373 | } 374 | return link 375 | } 376 | 377 | // direct access to the links and data 378 | 379 | func (n UnixFSHAMTShard) FieldLinks() dagpb.PBLinks { 380 | return n._substrate.FieldLinks() 381 | } 382 | 383 | func (n UnixFSHAMTShard) FieldData() dagpb.MaybeBytes { 384 | return n._substrate.FieldData() 385 | } 386 | 387 | func (n UnixFSHAMTShard) getChildLink(childIndex int) (dagpb.PBLink, error) { 388 | linkIndex := n.bitfield.OnesBefore(childIndex) 389 | if linkIndex >= int(n.FieldLinks().Length()) || linkIndex < 0 { 390 | return nil, ErrInvalidChildIndex 391 | } 392 | return n.FieldLinks().Lookup(int64(linkIndex)), nil 393 | } 394 | 395 | func (n UnixFSHAMTShard) hasChild(childIndex int) bool { 396 | return n.bitfield.Bit(childIndex) 397 | } 398 | 399 | type stringTransformer struct { 400 | maxPadLen int 401 | } 402 | 403 | func (s stringTransformer) transformNameNode(nd dagpb.String) dagpb.String { 404 | nb := dagpb.Type.String.NewBuilder() 405 | err := nb.AssignString(nd.String()[s.maxPadLen:]) 406 | if err != nil { 407 | return nil 408 | } 409 | return nb.Build().(dagpb.String) 410 | } 411 | -------------------------------------------------------------------------------- /hamt/shardeddir_test.go: -------------------------------------------------------------------------------- 1 | package hamt_test 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "fmt" 7 | "io" 8 | "math/rand" 9 | "os" 10 | "slices" 11 | "strings" 12 | "testing" 13 | "time" 14 | 15 | dag "github.com/ipfs/boxo/ipld/merkledag" 16 | mdtest "github.com/ipfs/boxo/ipld/merkledag/test" 17 | ft "github.com/ipfs/boxo/ipld/unixfs" 18 | legacy "github.com/ipfs/boxo/ipld/unixfs/hamt" 19 | format "github.com/ipfs/go-ipld-format" 20 | "github.com/ipfs/go-unixfsnode/hamt" 21 | "github.com/ipld/go-car/v2/storage" 22 | dagpb "github.com/ipld/go-codec-dagpb" 23 | "github.com/ipld/go-ipld-prime" 24 | "github.com/ipld/go-ipld-prime/fluent/qp" 25 | cidlink "github.com/ipld/go-ipld-prime/linking/cid" 26 | basicnode "github.com/ipld/go-ipld-prime/node/basic" 27 | "github.com/ipld/go-ipld-prime/schema" 28 | "github.com/stretchr/testify/require" 29 | ) 30 | 31 | // For now these tests use legacy UnixFS HAMT builders until we finish a builder 32 | // in go-ipld-prime 33 | func shuffle(seed int64, arr []string) { 34 | r := rand.New(rand.NewSource(seed)) 35 | for i := 0; i < len(arr); i++ { 36 | a := r.Intn(len(arr)) 37 | b := r.Intn(len(arr)) 38 | arr[a], arr[b] = arr[b], arr[a] 39 | } 40 | } 41 | 42 | func makeDir(ds format.DAGService, size int) ([]string, *legacy.Shard, error) { 43 | return makeDirWidth(ds, size, 256) 44 | } 45 | 46 | func makeDirWidth(ds format.DAGService, size, width int) ([]string, *legacy.Shard, error) { 47 | ctx := context.Background() 48 | 49 | s, err := legacy.NewShard(ds, width) 50 | if err != nil { 51 | return nil, nil, err 52 | } 53 | 54 | var dirs []string 55 | for i := 0; i < size; i++ { 56 | dirs = append(dirs, fmt.Sprintf("DIRNAME%d", i)) 57 | } 58 | 59 | shuffle(time.Now().UnixNano(), dirs) 60 | 61 | for i := 0; i < len(dirs); i++ { 62 | nd := ft.EmptyDirNode() 63 | err := ds.Add(ctx, nd) 64 | if err != nil { 65 | return nil, nil, err 66 | } 67 | err = s.Set(ctx, dirs[i], nd) 68 | if err != nil { 69 | return nil, nil, err 70 | } 71 | } 72 | 73 | return dirs, s, nil 74 | } 75 | 76 | func assertLinksEqual(linksA []*format.Link, linksB []*format.Link) error { 77 | if len(linksA) != len(linksB) { 78 | return fmt.Errorf("links arrays are different sizes") 79 | } 80 | 81 | sortLinks(linksA) 82 | sortLinks(linksB) 83 | for i, a := range linksA { 84 | b := linksB[i] 85 | if a.Name != b.Name { 86 | return fmt.Errorf("links names mismatch") 87 | } 88 | 89 | if a.Cid.String() != b.Cid.String() { 90 | return fmt.Errorf("link hashes dont match") 91 | } 92 | } 93 | 94 | return nil 95 | } 96 | 97 | func sortLinks(links []*format.Link) { 98 | slices.SortStableFunc(links, func(a, b *format.Link) int { 99 | return strings.Compare(a.Name, b.Name) 100 | }) 101 | } 102 | 103 | func mockDag() (format.DAGService, *ipld.LinkSystem) { 104 | bsrv := mdtest.Bserv() 105 | dsrv := dag.NewDAGService(bsrv) 106 | lsys := cidlink.DefaultLinkSystem() 107 | lsys.StorageReadOpener = func(lnkCtx ipld.LinkContext, lnk ipld.Link) (io.Reader, error) { 108 | cidLink, ok := lnk.(cidlink.Link) 109 | if !ok { 110 | return nil, fmt.Errorf("invalid link type for loading: %v", lnk) 111 | } 112 | 113 | blk, err := bsrv.GetBlock(lnkCtx.Ctx, cidLink.Cid) 114 | if err != nil { 115 | return nil, err 116 | } 117 | 118 | return bytes.NewReader(blk.RawData()), nil 119 | } 120 | lsys.TrustedStorage = true 121 | return dsrv, &lsys 122 | } 123 | 124 | func TestBasicSet(t *testing.T) { 125 | ds, lsys := mockDag() 126 | for _, w := range []int{128, 256, 512, 1024} { 127 | t.Run(fmt.Sprintf("BasicSet%d", w), func(t *testing.T) { 128 | names, s, err := makeDirWidth(ds, 1000, w) 129 | require.NoError(t, err) 130 | ctx := context.Background() 131 | legacyNode, err := s.Node() 132 | require.NoError(t, err) 133 | nd, err := lsys.Load(ipld.LinkContext{Ctx: ctx}, cidlink.Link{Cid: legacyNode.Cid()}, dagpb.Type.PBNode) 134 | require.NoError(t, err) 135 | hamtShard, err := hamt.AttemptHAMTShardFromNode(ctx, nd, lsys) 136 | require.NoError(t, err) 137 | for _, d := range names { 138 | _, err := hamtShard.LookupByString(d) 139 | require.NoError(t, err) 140 | } 141 | }) 142 | } 143 | } 144 | 145 | func TestIterator(t *testing.T) { 146 | ds, lsys := mockDag() 147 | _, s, err := makeDir(ds, 300) 148 | if err != nil { 149 | t.Fatal(err) 150 | } 151 | ctx := context.Background() 152 | 153 | legacyNode, err := s.Node() 154 | require.NoError(t, err) 155 | nds, err := legacy.NewHamtFromDag(ds, legacyNode) 156 | require.NoError(t, err) 157 | nd, err := lsys.Load(ipld.LinkContext{Ctx: ctx}, cidlink.Link{Cid: legacyNode.Cid()}, dagpb.Type.PBNode) 158 | require.NoError(t, err) 159 | hamtShard, err := hamt.AttemptHAMTShardFromNode(ctx, nd, lsys) 160 | require.NoError(t, err) 161 | 162 | linksA, err := nds.EnumLinks(ctx) 163 | require.NoError(t, err) 164 | 165 | require.Equal(t, int64(len(linksA)), hamtShard.Length()) 166 | 167 | linksB := make([]*format.Link, 0, len(linksA)) 168 | iter := hamtShard.Iterator() 169 | for !iter.Done() { 170 | name, link := iter.Next() 171 | linksB = append(linksB, &format.Link{ 172 | Name: name.String(), 173 | Cid: link.Link().(cidlink.Link).Cid, 174 | }) 175 | } 176 | require.NoError(t, assertLinksEqual(linksA, linksB)) 177 | } 178 | 179 | func TestLoadFailsFromNonShard(t *testing.T) { 180 | ds, lsys := mockDag() 181 | ctx := context.Background() 182 | legacyNode := ft.EmptyDirNode() 183 | ds.Add(ctx, legacyNode) 184 | nd, err := lsys.Load(ipld.LinkContext{Ctx: ctx}, cidlink.Link{Cid: legacyNode.Cid()}, dagpb.Type.PBNode) 185 | require.NoError(t, err) 186 | _, err = hamt.AttemptHAMTShardFromNode(ctx, nd, lsys) 187 | require.Error(t, err) 188 | 189 | // empty protobuf w/o data 190 | nd, err = qp.BuildMap(dagpb.Type.PBNode, -1, func(ma ipld.MapAssembler) { 191 | qp.MapEntry(ma, "Links", qp.List(-1, func(ipld.ListAssembler) {})) 192 | }) 193 | require.NoError(t, err) 194 | 195 | _, err = hamt.AttemptHAMTShardFromNode(ctx, nd, lsys) 196 | require.Error(t, err) 197 | } 198 | 199 | func TestFindNonExisting(t *testing.T) { 200 | ds, lsys := mockDag() 201 | _, s, err := makeDir(ds, 100) 202 | if err != nil { 203 | t.Fatal(err) 204 | } 205 | ctx := context.Background() 206 | legacyNode, err := s.Node() 207 | require.NoError(t, err) 208 | nd, err := lsys.Load(ipld.LinkContext{Ctx: ctx}, cidlink.Link{Cid: legacyNode.Cid()}, dagpb.Type.PBNode) 209 | require.NoError(t, err) 210 | hamtShard, err := hamt.AttemptHAMTShardFromNode(ctx, nd, lsys) 211 | require.NoError(t, err) 212 | for i := 0; i < 200; i++ { 213 | key := fmt.Sprintf("notfound%d", i) 214 | _, err := hamtShard.LookupByString(key) 215 | require.EqualError(t, err, schema.ErrNoSuchField{Field: ipld.PathSegmentOfString(key)}.Error()) 216 | } 217 | } 218 | 219 | func TestIncompleteShardedIteration(t *testing.T) { 220 | ctx := context.Background() 221 | req := require.New(t) 222 | 223 | fixture := "./fixtures/wikipedia-cryptographic-hash-function.car" 224 | f, err := os.Open(fixture) 225 | req.NoError(err) 226 | defer f.Close() 227 | carstore, err := storage.OpenReadable(f) 228 | req.NoError(err) 229 | lsys := cidlink.DefaultLinkSystem() 230 | lsys.TrustedStorage = true 231 | lsys.SetReadStorage(carstore) 232 | 233 | // classic recursive go-ipld-prime map iteration, being forgiving about 234 | // NotFound block loads to see what we end up with 235 | 236 | kvs := make(map[string]string) 237 | var iterNotFound int 238 | blockNotFound := make(map[string]struct{}) 239 | 240 | var iter func(string, ipld.Link) 241 | iter = func(dir string, lnk ipld.Link) { 242 | nd, err := lsys.Load(ipld.LinkContext{Ctx: ctx}, lnk, basicnode.Prototype.Any) 243 | if nf, ok := err.(interface{ NotFound() bool }); ok && nf.NotFound() { 244 | // got a named link that we can't load 245 | blockNotFound[dir] = struct{}{} 246 | return 247 | } 248 | req.NoError(err) 249 | if nd.Kind() == ipld.Kind_Bytes { 250 | bv, err := nd.AsBytes() 251 | req.NoError(err) 252 | kvs[dir] = string(bv) 253 | return 254 | } 255 | 256 | nb := dagpb.Type.PBNode.NewBuilder() 257 | req.NoError(nb.AssignNode(nd)) 258 | pbn := nb.Build() 259 | hamtShard, err := hamt.AttemptHAMTShardFromNode(ctx, pbn, &lsys) 260 | req.NoError(err) 261 | 262 | mi := hamtShard.MapIterator() 263 | for !mi.Done() { 264 | k, v, err := mi.Next() 265 | if nf, ok := err.(interface{ NotFound() bool }); ok && nf.NotFound() { 266 | // internal shard link that won't load, we don't know what it might 267 | // point to 268 | iterNotFound++ 269 | continue 270 | } 271 | req.NoError(err) 272 | ks, err := k.AsString() 273 | req.NoError(err) 274 | req.Equal(ipld.Kind_Link, v.Kind()) 275 | lv, err := v.AsLink() 276 | req.NoError(err) 277 | iter(dir+"/"+ks, lv) 278 | } 279 | } 280 | // walk the tree 281 | iter("", cidlink.Link{Cid: carstore.Roots()[0]}) 282 | 283 | req.Len(kvs, 1) 284 | req.Contains(kvs, "/wiki/Cryptographic_hash_function") 285 | req.Contains(kvs["/wiki/Cryptographic_hash_function"], "Cryptographic hash function\n") 286 | req.Equal(iterNotFound, 570) // tried to load 570 blocks that were not in the CAR 287 | req.Len(blockNotFound, 110) // 110 blocks, for named links, were not found in the CAR 288 | // some of the root block links 289 | req.Contains(blockNotFound, "/favicon.ico") 290 | req.Contains(blockNotFound, "/index.html") 291 | req.Contains(blockNotFound, "/zimdump_version") 292 | // some of the shard links 293 | req.Contains(blockNotFound, "/wiki/UK_railway_Signal") 294 | req.Contains(blockNotFound, "/wiki/Australian_House") 295 | req.Contains(blockNotFound, "/wiki/ICloud_Drive") 296 | req.Contains(blockNotFound, "/wiki/Édouard_Bamberger") 297 | } 298 | -------------------------------------------------------------------------------- /hamt/util.go: -------------------------------------------------------------------------------- 1 | package hamt 2 | 3 | // adapted from https://github.com/ipfs/go-unixfs/blob/master/hamt/util.go 4 | 5 | import ( 6 | "fmt" 7 | 8 | "math/bits" 9 | 10 | bitfield "github.com/ipfs/go-bitfield" 11 | "github.com/ipfs/go-unixfsnode/data" 12 | dagpb "github.com/ipld/go-codec-dagpb" 13 | "github.com/spaolacci/murmur3" 14 | ) 15 | 16 | // hashBits is a helper that allows the reading of the 'next n bits' as an integer. 17 | type hashBits struct { 18 | b []byte 19 | consumed int 20 | } 21 | 22 | func mkmask(n int) byte { 23 | return (1 << uint(n)) - 1 24 | } 25 | 26 | // Next returns the next 'i' bits of the hashBits value as an integer, or an 27 | // error if there aren't enough bits. 28 | func (hb *hashBits) Next(i int) (int, error) { 29 | if hb.consumed+i > len(hb.b)*8 { 30 | return 0, ErrHAMTTooDeep 31 | } 32 | return hb.next(i), nil 33 | } 34 | 35 | func (hb *hashBits) next(i int) int { 36 | curbi := hb.consumed / 8 37 | leftb := 8 - (hb.consumed % 8) 38 | 39 | curb := hb.b[curbi] 40 | if i == leftb { 41 | out := int(mkmask(i) & curb) 42 | hb.consumed += i 43 | return out 44 | } 45 | if i < leftb { 46 | a := curb & mkmask(leftb) // mask out the high bits we don't want 47 | b := a & ^mkmask(leftb-i) // mask out the low bits we don't want 48 | c := b >> uint(leftb-i) // shift whats left down 49 | hb.consumed += i 50 | return int(c) 51 | } 52 | out := int(mkmask(leftb) & curb) 53 | out <<= uint(i - leftb) 54 | hb.consumed += leftb 55 | out += hb.next(i - leftb) 56 | return out 57 | 58 | } 59 | 60 | func validateHAMTData(nd data.UnixFSData) error { 61 | if nd.FieldDataType().Int() != data.Data_HAMTShard { 62 | return data.ErrWrongNodeType{Expected: data.Data_HAMTShard, Actual: nd.FieldDataType().Int()} 63 | } 64 | 65 | if !nd.FieldHashType().Exists() || uint64(nd.FieldHashType().Must().Int()) != HashMurmur3 { 66 | return ErrInvalidHashType 67 | } 68 | 69 | if !nd.FieldData().Exists() { 70 | return ErrNoDataField 71 | } 72 | 73 | if !nd.FieldFanout().Exists() { 74 | return ErrNoFanoutField 75 | } 76 | if err := checkLogTwo(int(nd.FieldFanout().Must().Int())); err != nil { 77 | return err 78 | } 79 | 80 | return nil 81 | } 82 | 83 | func log2Size(nd data.UnixFSData) int { 84 | return bits.TrailingZeros(uint(nd.FieldFanout().Must().Int())) 85 | } 86 | 87 | func maxPadLength(nd data.UnixFSData) int { 88 | return len(fmt.Sprintf("%X", nd.FieldFanout().Must().Int()-1)) 89 | } 90 | 91 | const maximumHamtWidth = 1 << 10 92 | 93 | func bitField(nd data.UnixFSData) (bitfield.Bitfield, error) { 94 | fanout := int(nd.FieldFanout().Must().Int()) 95 | if fanout > maximumHamtWidth { 96 | return nil, fmt.Errorf("hamt witdh (%d) exceed maximum allowed (%d)", fanout, maximumHamtWidth) 97 | } 98 | bf, err := bitfield.NewBitfield(fanout) 99 | if err != nil { 100 | return nil, err 101 | } 102 | bf.SetBytes(nd.FieldData().Must().Bytes()) 103 | return bf, nil 104 | } 105 | 106 | func checkLogTwo(v int) error { 107 | if v <= 0 { 108 | return ErrHAMTSizeInvalid 109 | } 110 | lg2 := bits.TrailingZeros(uint(v)) 111 | if 1< 0 { 135 | // Wrap selector in ExploreFields as we walk back up through the path. 136 | // We can assume each segment to be a unixfs path section, so we 137 | // InterpretAs to make sure the node is reified through go-unixfsnode 138 | // (if possible) and we can traverse through according to unixfs pathing 139 | // rather than bare IPLD pathing - which also gives us the ability to 140 | // traverse through HAMT shards. 141 | ss = ssb.ExploreInterpretAs("unixfs", ssb.ExploreFields( 142 | func(efsb builder.ExploreFieldsSpecBuilder) { 143 | efsb.Insert(segments.Last().String(), ss) 144 | }, 145 | )) 146 | if matchPath { 147 | ss = ssb.ExploreUnion(ssb.Matcher(), ss) 148 | } 149 | segments = segments.Pop() 150 | } 151 | 152 | return ss.Node() 153 | } 154 | 155 | func specBuilder(b func(ssb builder.SelectorSpecBuilder) builder.SelectorSpec) builder.SelectorSpec { 156 | return b(builder.NewSelectorSpecBuilder(basicnode.Prototype.Any)) 157 | } 158 | -------------------------------------------------------------------------------- /signalling_test.go: -------------------------------------------------------------------------------- 1 | package unixfsnode_test 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | "testing" 7 | 8 | "github.com/ipfs/go-unixfsnode" 9 | "github.com/ipld/go-ipld-prime" 10 | "github.com/ipld/go-ipld-prime/codec/dagjson" 11 | "github.com/ipld/go-ipld-prime/traversal/selector/builder" 12 | selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse" 13 | "github.com/stretchr/testify/require" 14 | ) 15 | 16 | // Selectors are tested against JSON expected forms; this doesn't necessarily 17 | // validate that they work as advertised. It's just a sanity check that the 18 | // selectors are being built as expected. 19 | 20 | var exploreAllJson = mustDagJson(selectorparse.CommonSelector_ExploreAllRecursively) 21 | 22 | // explore interpret-as (~), next (>), match (.), interpreted as unixfs-preload 23 | var matchUnixfsPreloadJson = `{"~":{">":{".":{}},"as":"unixfs-preload"}}` 24 | 25 | // explore interpret-as (~), next (>), union (|) of match (.) and explore recursive (R) edge (@) with a depth of 1, interpreted as unixfs 26 | var matchUnixfsEntityJson = `{"~":{">":{"|":[{".":{}},{"R":{":>":{"a":{">":{"@":{}}}},"l":{"depth":1}}}]},"as":"unixfs"}}` 27 | 28 | // match interpret-as (~), next (>), match (.), interpreted as unixfs 29 | var matchUnixfsJson = `{"~":{">":{".":{}},"as":"unixfs"}}` 30 | 31 | func TestUnixFSPathSelector(t *testing.T) { 32 | testCases := []struct { 33 | name string 34 | path string 35 | expextedSelector string 36 | }{ 37 | { 38 | name: "empty path", 39 | path: "", 40 | expextedSelector: matchUnixfsJson, 41 | }, 42 | { 43 | name: "single field", 44 | path: "/foo", 45 | expextedSelector: jsonFields(matchUnixfsJson, "foo"), 46 | }, 47 | { 48 | name: "multiple fields", 49 | path: "/foo/bar", 50 | expextedSelector: jsonFields(matchUnixfsJson, "foo", "bar"), 51 | }, 52 | { 53 | name: "leading slash optional", 54 | path: "foo/bar", 55 | expextedSelector: jsonFields(matchUnixfsJson, "foo", "bar"), 56 | }, 57 | { 58 | name: "trailing slash optional", 59 | path: "/foo/bar/", 60 | expextedSelector: jsonFields(matchUnixfsJson, "foo", "bar"), 61 | }, 62 | { 63 | // a go-ipld-prime specific thing, not clearly specified by path spec (?) 64 | name: ".. is a field named ..", 65 | path: "/foo/../bar/", 66 | expextedSelector: jsonFields(matchUnixfsJson, "foo", "..", "bar"), 67 | }, 68 | { 69 | // a go-ipld-prime specific thing, not clearly specified by path spec 70 | name: "redundant slashes ignored", 71 | path: "foo///bar", 72 | expextedSelector: jsonFields(matchUnixfsJson, "foo", "bar"), 73 | }, 74 | } 75 | 76 | for _, tc := range testCases { 77 | t.Run(tc.name, func(t *testing.T) { 78 | sel := unixfsnode.UnixFSPathSelector(tc.path) 79 | require.Equal(t, tc.expextedSelector, mustDagJson(sel)) 80 | }) 81 | } 82 | } 83 | 84 | func TestUnixFSPathSelectorBuilder(t *testing.T) { 85 | testCases := []struct { 86 | name string 87 | path string 88 | target builder.SelectorSpec 89 | matchPath bool 90 | expextedSelector string 91 | }{ 92 | { 93 | name: "empty path", 94 | path: "", 95 | target: unixfsnode.ExploreAllRecursivelySelector, 96 | expextedSelector: exploreAllJson, 97 | }, 98 | { 99 | name: "empty path shallow (preload)", 100 | path: "", 101 | target: unixfsnode.MatchUnixFSPreloadSelector, 102 | expextedSelector: matchUnixfsPreloadJson, 103 | }, 104 | { 105 | name: "empty path shallow (entity)", 106 | path: "", 107 | target: unixfsnode.MatchUnixFSEntitySelector, 108 | expextedSelector: matchUnixfsEntityJson, 109 | }, 110 | { 111 | name: "single field", 112 | path: "/foo", 113 | expextedSelector: jsonFields(exploreAllJson, "foo"), 114 | target: unixfsnode.ExploreAllRecursivelySelector, 115 | }, 116 | { 117 | name: "single field, match path", 118 | path: "/foo", 119 | expextedSelector: jsonFieldsMatchPoint(exploreAllJson, "foo"), 120 | target: unixfsnode.ExploreAllRecursivelySelector, 121 | matchPath: true, 122 | }, 123 | { 124 | name: "single field shallow (preload)", 125 | path: "/foo", 126 | expextedSelector: jsonFields(matchUnixfsPreloadJson, "foo"), 127 | target: unixfsnode.MatchUnixFSPreloadSelector, 128 | }, 129 | { 130 | name: "single field shallow (entity)", 131 | path: "/foo", 132 | expextedSelector: jsonFields(matchUnixfsEntityJson, "foo"), 133 | target: unixfsnode.MatchUnixFSEntitySelector, 134 | }, 135 | { 136 | name: "multiple fields", 137 | path: "/foo/bar", 138 | expextedSelector: jsonFields(exploreAllJson, "foo", "bar"), 139 | target: unixfsnode.ExploreAllRecursivelySelector, 140 | }, 141 | { 142 | name: "multiple fields, match path", 143 | path: "/foo/bar", 144 | expextedSelector: jsonFieldsMatchPoint(exploreAllJson, "foo", "bar"), 145 | target: unixfsnode.ExploreAllRecursivelySelector, 146 | matchPath: true, 147 | }, 148 | { 149 | name: "multiple fields shallow", 150 | path: "/foo/bar", 151 | expextedSelector: jsonFields(matchUnixfsPreloadJson, "foo", "bar"), 152 | target: unixfsnode.MatchUnixFSPreloadSelector, 153 | }, 154 | { 155 | name: "leading slash optional", 156 | path: "foo/bar", 157 | expextedSelector: jsonFields(exploreAllJson, "foo", "bar"), 158 | target: unixfsnode.ExploreAllRecursivelySelector, 159 | }, 160 | { 161 | name: "trailing slash optional", 162 | path: "/foo/bar/", 163 | expextedSelector: jsonFields(exploreAllJson, "foo", "bar"), 164 | target: unixfsnode.ExploreAllRecursivelySelector, 165 | }, 166 | // a go-ipld-prime specific thing, not clearly specified by path spec (?) 167 | { 168 | name: ".. is a field named ..", 169 | path: "/foo/../bar/", 170 | expextedSelector: jsonFields(exploreAllJson, "foo", "..", "bar"), 171 | target: unixfsnode.ExploreAllRecursivelySelector, 172 | }, 173 | { 174 | // a go-ipld-prime specific thing, not clearly specified by path spec 175 | name: "redundant slashes ignored", 176 | path: "foo///bar", 177 | expextedSelector: jsonFields(exploreAllJson, "foo", "bar"), 178 | target: unixfsnode.ExploreAllRecursivelySelector, 179 | }, 180 | } 181 | 182 | for _, tc := range testCases { 183 | t.Run(tc.name, func(t *testing.T) { 184 | sel := unixfsnode.UnixFSPathSelectorBuilder(tc.path, tc.target, tc.matchPath) 185 | require.Equal(t, tc.expextedSelector, mustDagJson(sel)) 186 | }) 187 | } 188 | } 189 | 190 | func jsonFields(target string, fields ...string) string { 191 | var sb strings.Builder 192 | for _, n := range fields { 193 | // explore interpret-as (~) next (>), explore field (f) + specific field (f>), with field name 194 | sb.WriteString(fmt.Sprintf(`{"~":{">":{"f":{"f>":{"%s":`, n)) 195 | } 196 | sb.WriteString(target) 197 | sb.WriteString(strings.Repeat(`}}},"as":"unixfs"}}`, len(fields))) 198 | return sb.String() 199 | } 200 | 201 | func jsonFieldsMatchPoint(target string, fields ...string) string { 202 | var sb strings.Builder 203 | for _, n := range fields { 204 | // union (|) of match (.) and explore interpret-as (~) next (>), explore field (f) + specific field (f>), with field name 205 | sb.WriteString(fmt.Sprintf(`{"|":[{".":{}},{"~":{">":{"f":{"f>":{"%s":`, n)) 206 | } 207 | sb.WriteString(target) 208 | sb.WriteString(strings.Repeat(`}}},"as":"unixfs"}}]}`, len(fields))) 209 | return sb.String() 210 | } 211 | 212 | func mustDagJson(n ipld.Node) string { 213 | byts, err := ipld.Encode(n, dagjson.Encode) 214 | if err != nil { 215 | panic(err) 216 | } 217 | return string(byts) 218 | } 219 | -------------------------------------------------------------------------------- /test/doc.go: -------------------------------------------------------------------------------- 1 | // Package test provides ADL testing of the ipld specification around 2 | // * traversal making use of match subsets 3 | // * largeByteNode readers 4 | package test 5 | -------------------------------------------------------------------------------- /test/partial_file_access_test.go: -------------------------------------------------------------------------------- 1 | package test 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "io" 7 | "testing" 8 | 9 | "github.com/ipfs/go-test/random" 10 | "github.com/ipfs/go-unixfsnode/data/builder" 11 | "github.com/ipfs/go-unixfsnode/file" 12 | dagpb "github.com/ipld/go-codec-dagpb" 13 | "github.com/ipld/go-ipld-prime" 14 | "github.com/ipld/go-ipld-prime/datamodel" 15 | "github.com/ipld/go-ipld-prime/linking" 16 | cidlink "github.com/ipld/go-ipld-prime/linking/cid" 17 | basicnode "github.com/ipld/go-ipld-prime/node/basic" 18 | "github.com/ipld/go-ipld-prime/traversal" 19 | sb "github.com/ipld/go-ipld-prime/traversal/selector/builder" 20 | ) 21 | 22 | func TestPartialFileAccess(t *testing.T) { 23 | buf := make([]byte, 10*1024*1024) 24 | random.NewSeededRand(0xdeadbeef).Read(buf) 25 | r := bytes.NewReader(buf) 26 | 27 | ls := cidlink.DefaultLinkSystem() 28 | storage := cidlink.Memory{} 29 | ls.StorageReadOpener = storage.OpenRead 30 | ls.StorageWriteOpener = storage.OpenWrite 31 | 32 | f, _, err := builder.BuildUnixFSFile(r, "", &ls) 33 | if err != nil { 34 | t.Fatal(err) 35 | } 36 | 37 | // get back the root node substrate from the link at the top of the builder. 38 | fr, err := ls.Load(ipld.LinkContext{}, f, dagpb.Type.PBNode) 39 | if err != nil { 40 | t.Fatal(err) 41 | } 42 | 43 | ufn, err := file.NewUnixFSFile(context.Background(), fr, &ls) 44 | if err != nil { 45 | t.Fatal(err) 46 | } 47 | 48 | openedLinks := []ipld.Link{} 49 | ls.StorageReadOpener = func(lc linking.LinkContext, l datamodel.Link) (io.Reader, error) { 50 | openedLinks = append(openedLinks, l) 51 | return storage.OpenRead(lc, l) 52 | } 53 | 54 | // read back out the file. 55 | out, err := ufn.AsBytes() 56 | if err != nil { 57 | t.Fatal(err) 58 | } 59 | if !bytes.Equal(out, buf) { 60 | t.Fatal("Not equal") 61 | } 62 | 63 | fullLen := len(openedLinks) 64 | 65 | openedLinks = []ipld.Link{} 66 | 67 | partial, err := ufn.(datamodel.LargeBytesNode).AsLargeBytes() 68 | if err != nil { 69 | t.Fatal(err) 70 | } 71 | half := make([]byte, len(buf)/2) 72 | if _, err := partial.Read(half); err != nil { 73 | t.Fatal(err) 74 | } 75 | if len(openedLinks) >= fullLen { 76 | t.Fatal("should not have accessed full file on a partial read.") 77 | } 78 | 79 | openedLinks = []ipld.Link{} 80 | 81 | prog := traversal.Progress{ 82 | Cfg: &traversal.Config{ 83 | LinkSystem: ls, 84 | }, 85 | } 86 | sb := sb.NewSelectorSpecBuilder(basicnode.Prototype.Any) 87 | ss := sb.MatcherSubset(5*1024*1024, 6*1024*1024) 88 | sel, err := ss.Selector() 89 | if err != nil { 90 | t.Fatal(err) 91 | } 92 | 93 | if err := prog.WalkMatching(ufn, sel, func(_ traversal.Progress, n datamodel.Node) error { 94 | b, err := n.AsBytes() 95 | if err != nil { 96 | t.Fatal(err) 97 | } 98 | if len(b) != 1024*1024 { 99 | t.Fatalf("wrong length: %d", len(b)) 100 | } 101 | return nil 102 | }); err != nil { 103 | t.Fatal(err) 104 | } 105 | if len(openedLinks) >= fullLen { 106 | t.Fatal("should not have accessed full file on a partial traversal.") 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /testutil/directory.go: -------------------------------------------------------------------------------- 1 | package testutil 2 | 3 | import ( 4 | "context" 5 | "crypto/sha256" 6 | "encoding/hex" 7 | "fmt" 8 | "io" 9 | "testing" 10 | 11 | "github.com/ipfs/go-cid" 12 | dagpb "github.com/ipld/go-codec-dagpb" 13 | "github.com/ipld/go-ipld-prime" 14 | "github.com/ipld/go-ipld-prime/datamodel" 15 | "github.com/ipld/go-ipld-prime/linking" 16 | cidlink "github.com/ipld/go-ipld-prime/linking/cid" 17 | "github.com/ipld/go-ipld-prime/node/basicnode" 18 | "github.com/ipld/go-ipld-prime/traversal" 19 | "github.com/stretchr/testify/require" 20 | ) 21 | 22 | // DirEntry represents a flattened directory entry, where Path is from the 23 | // root of the directory and Content is the file contents. It is intended 24 | // that a DirEntry slice can be used to represent a full-depth directory without 25 | // needing nesting. 26 | type DirEntry struct { 27 | Path string 28 | Content []byte 29 | Root cid.Cid 30 | SelfCids []cid.Cid 31 | TSize uint64 32 | Children []DirEntry 33 | } 34 | 35 | func (de DirEntry) Size() (int64, error) { 36 | return int64(de.TSize), nil 37 | } 38 | 39 | func (de DirEntry) Link() ipld.Link { 40 | return cidlink.Link{Cid: de.Root} 41 | } 42 | 43 | // ToDirEntry takes a LinkSystem containing UnixFS data and builds a DirEntry 44 | // tree representing the file and directory structure it finds starting at the 45 | // rootCid. If expectFull is true, it will error if it encounters a UnixFS 46 | // node that it cannot fully load. If expectFull is false, it will ignore 47 | // errors and return nil for any node it cannot load. 48 | func ToDirEntry(t *testing.T, linkSys linking.LinkSystem, rootCid cid.Cid, expectFull bool) DirEntry { 49 | return ToDirEntryFrom(t, linkSys, rootCid, "", expectFull) 50 | } 51 | 52 | // ToDirEntryFrom is the same as ToDirEntry but allows specifying a rootPath 53 | // such that the resulting DirEntry tree will all have that path as a prefix. 54 | // This is useful when representing a sub-DAG of a larger DAG where you want 55 | // to make direct comparisons. 56 | func ToDirEntryFrom(t *testing.T, linkSys linking.LinkSystem, rootCid cid.Cid, rootPath string, expectFull bool) DirEntry { 57 | var proto datamodel.NodePrototype = dagpb.Type.PBNode 58 | isDagPb := rootCid.Prefix().Codec == cid.DagProtobuf 59 | if !isDagPb { 60 | proto = basicnode.Prototype.Any 61 | } 62 | node, err := linkSys.Load(linking.LinkContext{Ctx: context.TODO()}, cidlink.Link{Cid: rootCid}, proto) 63 | if expectFull { 64 | require.NoError(t, err) 65 | } else if err != nil { 66 | if e, ok := err.(interface{ NotFound() bool }); ok && e.NotFound() { 67 | return DirEntry{} 68 | } 69 | require.NoError(t, err) 70 | } 71 | 72 | if node.Kind() == ipld.Kind_Bytes { // is a file 73 | byts, err := node.AsBytes() 74 | require.NoError(t, err) 75 | return DirEntry{ 76 | Path: rootPath, 77 | Content: byts, 78 | Root: rootCid, 79 | } 80 | } 81 | 82 | children := make([]DirEntry, 0) 83 | if isDagPb { 84 | // else is likely a directory 85 | for itr := node.MapIterator(); !itr.Done(); { 86 | k, v, err := itr.Next() 87 | require.NoError(t, err) 88 | childName, err := k.AsString() 89 | require.NoError(t, err) 90 | childLink, err := v.AsLink() 91 | require.NoError(t, err) 92 | child := ToDirEntryFrom(t, linkSys, childLink.(cidlink.Link).Cid, rootPath+"/"+childName, expectFull) 93 | children = append(children, child) 94 | } 95 | } else { 96 | // not a dag-pb node, let's pretend it is but using IPLD pathing rules 97 | err := traversal.WalkLocal(node, func(prog traversal.Progress, n ipld.Node) error { 98 | if n.Kind() == ipld.Kind_Link { 99 | l, err := n.AsLink() 100 | if err != nil { 101 | return err 102 | } 103 | child := ToDirEntryFrom(t, linkSys, l.(cidlink.Link).Cid, rootPath+"/"+prog.Path.String(), expectFull) 104 | children = append(children, child) 105 | } 106 | return nil 107 | }) 108 | require.NoError(t, err) 109 | } 110 | 111 | return DirEntry{ 112 | Path: rootPath, 113 | Root: rootCid, 114 | Children: children, 115 | } 116 | } 117 | 118 | // CompareDirEntries is a safe, recursive comparison between two DirEntry 119 | // values. It doesn't strictly require child ordering to match, but it does 120 | // require that all children exist and match, in some order. 121 | func CompareDirEntries(t *testing.T, a, b DirEntry) { 122 | // t.Log("CompareDirEntries", a.Path, b.Path) // TODO: remove this 123 | require.Equal(t, a.Path, b.Path) 124 | require.Equal(t, a.Root.String(), b.Root.String(), a.Path+" root mismatch") 125 | hashA := sha256.Sum256(a.Content) 126 | hashB := sha256.Sum256(b.Content) 127 | require.Equal(t, hex.EncodeToString(hashA[:]), hex.EncodeToString(hashB[:]), a.Path+"content hash mismatch") 128 | require.Equal(t, len(a.Children), len(b.Children), fmt.Sprintf("%s child length mismatch %d <> %d", a.Path, len(a.Children), len(b.Children))) 129 | for i := range a.Children { 130 | // not necessarily in order 131 | var found bool 132 | for j := range b.Children { 133 | if a.Children[i].Path == b.Children[j].Path { 134 | found = true 135 | CompareDirEntries(t, a.Children[i], b.Children[j]) 136 | } 137 | } 138 | require.True(t, found, fmt.Sprintf("@ path [%s], a's child [%s] not found in b", a.Path, a.Children[i].Path)) 139 | } 140 | } 141 | 142 | // WrapContent embeds the content we want in some random nested content such 143 | // that it's fetchable under the provided path. If exclusive is true, the 144 | // content will be the only thing under the path. If false, there will be 145 | // content before and after the wrapped content at each point in the path. 146 | func WrapContent(t *testing.T, rndReader io.Reader, lsys *ipld.LinkSystem, content DirEntry, wrapPath string, exclusive bool) DirEntry { 147 | want := content 148 | ps := datamodel.ParsePath(wrapPath) 149 | for ps.Len() > 0 { 150 | de := []DirEntry{} 151 | if !exclusive { 152 | before := GenerateDirectory(t, lsys, rndReader, 4<<10, false) 153 | before.Path = "!before" 154 | de = append(de, before) 155 | } 156 | want.Path = ps.Last().String() 157 | de = append(de, want) 158 | if !exclusive { 159 | after := GenerateDirectory(t, lsys, rndReader, 4<<11, true) 160 | after.Path = "~after" 161 | de = append(de, after) 162 | } 163 | want = BuildDirectory(t, lsys, de, false) 164 | ps = ps.Pop() 165 | } 166 | return want 167 | } 168 | -------------------------------------------------------------------------------- /testutil/doc.go: -------------------------------------------------------------------------------- 1 | // Package testutil provides utilities for writing tests that require 2 | // nontrivial UnixFS data of various forms 3 | package testutil 4 | -------------------------------------------------------------------------------- /testutil/namegen/namegen.go: -------------------------------------------------------------------------------- 1 | package namegen 2 | 3 | import ( 4 | "encoding/binary" 5 | "io" 6 | "strings" 7 | ) 8 | 9 | var words = strings.Fields(wordData) 10 | var extensions = []string{"", ".txt", ".pdf", ".docx", ".png", ".jpg", ".csv", ".json", ".xml"} 11 | 12 | func getRandomIndex(r io.Reader, max int) (int, error) { 13 | var n uint32 14 | err := binary.Read(r, binary.BigEndian, &n) 15 | if err != nil { 16 | return 0, err 17 | } 18 | return int(n % uint32(max)), nil 19 | } 20 | 21 | // RandomDirectoryName returns a random directory name from the provided word list. 22 | func RandomDirectoryName(r io.Reader) (string, error) { 23 | index, err := getRandomIndex(r, len(words)) 24 | if err != nil { 25 | return "", err 26 | } 27 | return words[index], nil 28 | } 29 | 30 | // RandomFileName returns a random file name with an extension from the provided word list and common extensions. 31 | func RandomFileName(r io.Reader) (string, error) { 32 | wordIndex, err := getRandomIndex(r, len(words)) 33 | if err != nil { 34 | return "", err 35 | } 36 | ext, err := RandomFileExtension(r) 37 | if err != nil { 38 | return "", err 39 | } 40 | return words[wordIndex] + ext, nil 41 | } 42 | 43 | // RandomFileExtension returns a random file extension, including '.'. This may 44 | // also return an empty string. 45 | func RandomFileExtension(r io.Reader) (string, error) { 46 | index, err := getRandomIndex(r, len(extensions)) 47 | if err != nil { 48 | return "", err 49 | } 50 | return extensions[index], nil 51 | } 52 | 53 | const wordData = `jabberwocky Snark whiffling borogoves mome raths brillig slithy toves outgrabe 54 | Tumtum Frabjous Bandersnatch Jubjub Callay slumgullion snicker-snack brobdingnagian Jabberwock 55 | tree Poglorian Binkleborf Wockbristle Zizzotether dinglewock Flumgurgle Glimperwick RazzleDazzle8 56 | gyre tortlewhack whispyfangle Crumplehorn Higgledy7 Piggledy3 flibberwocky Zamborot Flizzleflink 57 | gimble Shakespearean Macbeth Othello Hamlet soliloquy iambic pentameter Benvolio Capulet Montague 58 | Puck Malvolio Beatrice Prospero Iago Falstaff Rosencrantz Guildenstern Cordelia Polonius 59 | Titania Oberon Tybalt Caliban Mercutio Portia Brabantio 4Lear Desdemona Lysander 60 | YossarianScar Jujimufu9 Gorgulon Oozyboozle Razzmatazz8 BlinkenWoggle Flibbertigibbet Quixotic2 61 | Galumphing Widdershins Pecksniffian Bandicoot11 Flapdoodle Fandango Whippersnapper Grandiloquent 62 | Lollygag Persnickety Gibberish Codswallop Rigmarole Nincompoop Flummox Snollygoster Poppycock 63 | Kerfuffle Balderdash Gobbledygook Fiddle-faddle Antidisestablishmentarianism 64 | Supercalifragilisticexpialidocious Rambunctious9 Lickety-split Hullabaloo Skullduggery Ballyhoo 65 | Flabbergasted Discombobulate Pernicious Bumfuzzle Bamboozle Pandemonium Tomfoolery Hobbledehoy7 66 | Claptrap Cockamamie Hocus-pocus8 Higgledy-piggledy Dodecahedron Nonsensical Contraption Quizzical 67 | Snuffleupagus Ostentatious Serendipity Ephemeral Melancholy Sonorous Plethora Brouhaha Absquatulate 68 | Gobbledygook3 Lilliputian Chortle Euphonious Mellifluous Obfuscate Perspicacious Prevaricate 69 | Sesquipedalian Tintinnabulation Quibble9 Umbrageous Quotidian Flapdoodle5 NoodleDoodle 70 | Zigzagumptious Throttlebottom WuzzleWump Canoodle Hodgepodge Blatherskite7 Hornswoggle 71 | BibbidiBobbidiBoo Prestidigitation Confabulate Abscond8 Lickspittle Ragamuffin Taradiddle 72 | Widdershins4 Boondoggle Snuffleupagus9 Gallivant Folderol Malarkey Skedaddle Hobgoblin 73 | BlubberCrumble ZibberZap Snickerdoodle Mooncalf LicketySplit8 Whatchamacallit Thingamajig 74 | Thingamabob GibbleGabble FuddleDuddle LoopyLoo Splendiferous Bumbershoot Catawampus Flibbertigibbet5 75 | Gobbledygook7 Whippersnapper9 Ragamuffin8 Splendiferous 76 | ætheling witan ealdorman leofwyrd swain bēorhall beorn mēarh scōp cyning hēahgerefa 77 | sceadugenga wilweorc hildoræswa þegn ælfscyne wyrmslaga wælwulf fyrd hrēowmōd dēor 78 | ealdorleornung scyldwiga þēodcwealm hāligbōc gūþweard wealdend gāstcynn wīfmann 79 | wīsestōw þrēatung rīcere scealc eorþwerod bealucræft cynerīce sceorp ættwer 80 | gāsthof ealdrīce wæpnedmann wæterfōr landgemære gafolgelda wīcstede mægenþrymm 81 | æscwiga læcedōm wīdferhþ eorlgestrēon brimrād wæterstede hūslēoþ searocraeft 82 | þegnunga wælscenc þrīstguma fyrdrinc wundorcræft cræftleornung eorþbūend 83 | sǣlācend þunorrad wætergifu wæterscipe wæterþenung eorþtilþ eorþgebyrde 84 | eorþhæbbend eorþgræf eorþbærn eorþhūs eorþscearu eorþsweg eorþtæfl eorþweorc 85 | eorþweall eorþwaru eorþwela eorþwīs eorþworn eorþyþ eorþweg eorþwīse eorþwyrhta 86 | eorþwīn eorþsceaða eorþsweart eorþscræf eorþscrūd eorþswyft eorþscīr eorþscūa 87 | eorþsēoc eorþsele eorþhūsl eorþsted eorþswyn eorþsittend eorþsniþ eorþscearp 88 | eorþscyld eorþsceaft eorþstapol eorþstede eorþsmitta eorþscēawere 89 | velociraptorious chimeraesque bellerophontic serendipitastic transmogrification ultracrepidarian 90 | prestidigitationary supraluminescence hemidemisemiquaver unquestionability intercontinentalism 91 | antediluvianistic disproportionately absquatulationism automagicalization 92 | floccinaucinihilipilification quintessentiality incomprehensibility juxtapositionally 93 | perpendicularitude transubstantiation synchronicityverse astronomicalunit thermodynamicness 94 | electromagnetismal procrastinatorily disenfranchisement neutrinooscillation hyperventilatingly 95 | pneumonoultramicroscopicsilicovolcanoconiosis supercalifragilisticexpialidocious thaumaturgeonomics 96 | idiosyncratically unencumberedness phantasmagoricity extraterrestrialism philanthropistastic 97 | xenotransplantation incontrovertibility spontaneityvolution teleportationally labyrinthinean 98 | megalomaniaction cryptozoologician ineffablemystique multiplicativity sisypheanquandary 99 | overenthusiastically irrefutablenotion exceptionalitysphere 100 | blibby ploof twindle zibbet jinty wiblo glimsy snaft trindle quopp vistly chark plizet snibber frint 101 | trazzle buvvy skipple flizz dworp grindle yipple zarfle clippet swazz mibber brackle tindle grozz 102 | vindle plazz freggle twazz snuzzle gwippet whindle juzzle krazz yazzle flippet skindle zapple prazz 103 | buzzle chazz gripple snozzle trizz wazzle blikket zib glup snof yipr tazz vlim frub dwex klop 104 | aa ab ad ae ag ah ai al am an as at aw ax ay ba be bi bo by de do ed ef eh el em en er es et ex fa 105 | fe go ha he hi hm ho id if in is it jo ka ki la li lo ma me mi mm mo mu my na ne no nu od oe of oh 106 | oi om on op or os ow ox oy pa pe pi qi re sh si so ta ti to uh um un up us ut we wo xi xu ya ye yo 107 | za zo 108 | hĕlłø cąfѐ ŝmîłe þřęê ċỏẽxist ǩāŕáōķê ŧrävèl кυгiοsity ŭпịςørn мëĺōđỳ ğħōšţ ŵăνę ẓẽṕhýr ғụzzlę 109 | пåŕŧy 僃êct ԁяêåм љúвïĺëë ѓåḿъḽë ţęmƿęşţ říše čajovna želva štěstí ýpsilon ďábel ňadraží ťava 110 | h3ll0 w0rld c0d1ng 3x3mpl3 pr0gr4mm1ng d3v3l0p3r 5cr4bbl3 3l3ph4nt 4pp 5y5t3m 1nput 0utput 3rr0r 111 | 5t4ck0v3rfl0w 5tr1ng 5l1c3 5h4k35p34r3 5t4nd4rd 3ncrypt10n 5h3ll 5cr1pt 5t4ck 5qu4r3 r3ct4ngl3 112 | tr14ngl3 c1rc13 5ph3r3 5qu4r3r00t 3xpr35510n 5t4t15t1c5 5t4t3m3nt 5ynt4x 5ugg35t10n 5y5t3m4t1c 113 | 5h0rtcut 5h4d0w 5h4r3d 114 | 1 2 3 4 5 6 7 8 9 0 115 | a b c d e f g h i j k l m n o p q r s t u v w x y z 116 | A B C D E F G H I J K L M N O P Q R S T U V W X Y Z` 117 | -------------------------------------------------------------------------------- /utils/utils.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import dagpb "github.com/ipld/go-codec-dagpb" 4 | 5 | // Lookup finds a name key in a list of dag pb links 6 | func Lookup(links dagpb.PBLinks, key string) dagpb.Link { 7 | li := links.Iterator() 8 | for !li.Done() { 9 | _, next := li.Next() 10 | name := "" 11 | if next.FieldName().Exists() { 12 | name = next.FieldName().Must().String() 13 | } 14 | if key == name { 15 | return next.FieldHash() 16 | } 17 | } 18 | return nil 19 | } 20 | -------------------------------------------------------------------------------- /version.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "v1.10.1" 3 | } 4 | --------------------------------------------------------------------------------