├── .github └── workflows │ └── go.yml ├── .gitignore ├── .gitmodules ├── FUNDING.json ├── LICENSE ├── Makefile ├── README.md ├── chunk.go ├── cmd └── graphsplit │ └── main.go ├── commp.go ├── commp_test.go ├── dataset ├── import.go └── types.go ├── doc └── README.md ├── go.mod ├── go.sum ├── restore.go └── utils.go /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | 11 | build: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | 16 | - name: Set up Go 17 | uses: actions/setup-go@v2 18 | with: 19 | go-version: 1.18 20 | 21 | - name: Prepare 22 | run: sudo apt-get update && sudo apt-get install ocl-icd-opencl-dev libhwloc-dev 23 | 24 | - name: Build ffi 25 | run: git submodule update --init --recursive && make ffi 26 | 27 | - name: Build 28 | run: make 29 | 30 | - name: Test 31 | run: go test -v ./... 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /graphsplit 2 | /commp 3 | 4 | # Binaries for programs and plugins 5 | *.exe 6 | *.exe~ 7 | *.dll 8 | *.so 9 | *.dylib 10 | 11 | # Test binary, built with `go test -c` 12 | *.test 13 | 14 | # Output of the go coverage tool, specifically when used with LiteIDE 15 | *.out 16 | 17 | # Dependency directories (remove the comment below to include it) 18 | # vendor/ 19 | 20 | .idea 21 | .vscode 22 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "extern/filecoin-ffi"] 2 | path = extern/filecoin-ffi 3 | url = https://github.com/filecoin-project/filecoin-ffi.git 4 | -------------------------------------------------------------------------------- /FUNDING.json: -------------------------------------------------------------------------------- 1 | { 2 | "drips": { 3 | "filecoin": { 4 | "ownedBy": "0x23F7FdEbB761609B70acA148052A2E3267Ce8e9B" 5 | } 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 filedrive-team 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | build: 2 | rm -rf ./graphsplit 3 | go build -ldflags "-s -w" -o graphsplit ./cmd/graphsplit/main.go 4 | .PHONY: build 5 | 6 | ## FFI 7 | 8 | ffi: 9 | ./extern/filecoin-ffi/install-filcrypto 10 | .PHONY: ffi 11 | 12 | test: 13 | go test -v ./... 14 | .PHONY: test 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Go-graphsplit 2 | ================== 3 | [![](https://img.shields.io/github/go-mod/go-version/filedrive-team/go-graphsplit)]() 4 | [![](https://goreportcard.com/badge/github.com/filedrive-team/go-graphsplit)](https://goreportcard.com/report/github.com/filedrive-team/go-graphsplit) 5 | [![](https://github.com/filedrive-team/go-graphsplit/actions/workflows/go.yml/badge.svg)]() 6 | [![](https://img.shields.io/github/license/filedrive-team/go-graphsplit)](https://github.com/filedrive-team/go-graphsplit/blob/main/LICENSE) 7 | 8 | > A tool for splitting a large dataset into graph slices to make deals in the Filecoin Network 9 | 10 | 11 | When storing a large dataset, we need to split it into smaller pieces to fit the sector's size, which could generally be 32GiB or 64GiB. 12 | 13 | If we make these data into a large tarball, chunk it into small pieces, and then make storage deals with miners with these pieces, on the side of storage, it will be pretty efficient and allow us to store hundreds of TiB data in a month. However, this way will also bring difficulties for data retrieval. Even if we only needed to retrieve a small file, we would first have to retrieve and download all the pieces of this tarball, decompress it, and find the specific file we needed. 14 | 15 | Graphsplit can solve this problem. It takes advantage of IPLD protocol, follows the [Unixfs](https://github.com/ipfs/go-unixfs) format data structures, and regards the dataset or its sub-directory as a big graph, then cuts it into small graphs. Each small graph will keep its file system structure as possible as it used to be. After that, we only need to organize these small graphs into a car file. If one data piece has a complete file and we need to retrieve it, we only need to use payload CID to retrieve it through the lotus client, fetch it back, and get the file. Besides, Graphsplit will create a manifest.csv to save the mapping with graph slice name, payload CID, Piece CID, and the inner file structure. 16 | 17 | Another advantage of Graphsplit is it can perfectly match IPFS. Like if you build an IPFS website as your Deal UI website, the inner file structure of each data piece can be shown on it, and it is easier for users to retrieve and download the data they stored. 18 | 19 | 20 | ## Build 21 | ```sh 22 | git clone https://github.com/filedrive-team/go-graphsplit.git 23 | 24 | cd go-graphsplit 25 | 26 | # get submodules 27 | git submodule update --init --recursive 28 | 29 | # build filecoin-ffi 30 | make ffi 31 | 32 | make 33 | ``` 34 | 35 | ## Usage 36 | 37 | [See the work flow of graphsplit](doc/README.md) 38 | 39 | Splitting dataset: 40 | ```sh 41 | 42 | 43 | ./graphsplit chunk \ 44 | # car-dir: folder for splitted smaller pieces, in form of .car 45 | --car-dir=path/to/car-dir \ 46 | # slice-size: size for each pieces 47 | --slice-size=17179869184 \ 48 | # parallel: number goroutines run when building ipld nodes 49 | --parallel=2 \ 50 | # graph-name: it will use graph-name for prefix of smaller pieces 51 | --graph-name=gs-test \ 52 | # calc-commp: calculation of pieceCID, default value is false. Be careful, a lot of cpu, memory and time would be consumed if slice size is very large. 53 | --calc-commp=false \ 54 | # set true if want padding the car file to fit piece size 55 | --add-padding=false \ 56 | # set true if want using piececid to name the chunk file 57 | --rename=false \ 58 | # parent-path: usually just be the same as /path/to/dataset, it's just a method to figure out relative path when building IPLD graph 59 | --parent-path=/path/to/dataset \ 60 | /path/to/dataset 61 | ``` 62 | Notes: A manifest.csv will created to save the mapping with graph slice name, the payload cid and slice inner structure. As following: 63 | ```sh 64 | cat /path/to/car-dir/manifest.csv 65 | payload_cid,filename,detail 66 | ba...,graph-slice-name.car,inner-structure-json 67 | ``` 68 | If set --calc-commp=true, two another fields would be add to manifest.csv 69 | ```sh 70 | cat /path/to/car-dir/manifest.csv 71 | payload_cid,filename,piece_cid,piece_size,detail 72 | ba...,graph-slice-name.car,baga...,16646144,inner-structure-json 73 | ``` 74 | 75 | Import car file to IPFS: 76 | ```sh 77 | ipfs dag import /path/to/car-dir/car-file 78 | ``` 79 | 80 | Restore files: 81 | ```sh 82 | # car-path: directory or file, in form of .car 83 | # output-dir: usually just be the same as /path/to/output-dir 84 | # parallel: number goroutines run when restoring 85 | ./graphsplit restore \ 86 | --car-path=/path/to/car-path \ 87 | --output-dir=/path/to/output-dir \ 88 | --parallel=2 89 | ``` 90 | 91 | PieceCID Calculation for a single car file: 92 | 93 | 94 | ```shell 95 | # Calculate pieceCID for a single car file 96 | # 97 | ./graphsplit commP /path/to/carfile 98 | ``` 99 | 100 | ## Contribute 101 | 102 | PRs are welcome! 103 | 104 | 105 | ## License 106 | 107 | MIT 108 | 109 | -------------------------------------------------------------------------------- /chunk.go: -------------------------------------------------------------------------------- 1 | package graphsplit 2 | 3 | import ( 4 | "context" 5 | "encoding/csv" 6 | "fmt" 7 | "os" 8 | "path" 9 | "strconv" 10 | "time" 11 | 12 | ipld "github.com/ipfs/go-ipld-format" 13 | logging "github.com/ipfs/go-log/v2" 14 | "golang.org/x/xerrors" 15 | ) 16 | 17 | var log = logging.Logger("graphsplit") 18 | 19 | type GraphBuildCallback interface { 20 | OnSuccess(node ipld.Node, graphName, fsDetail string) 21 | OnError(error) 22 | } 23 | 24 | type commPCallback struct { 25 | carDir string 26 | rename bool 27 | addPadding bool 28 | } 29 | 30 | func (cc *commPCallback) OnSuccess(node ipld.Node, graphName, fsDetail string) { 31 | fmt.Println("xxxxx") 32 | commpStartTime := time.Now() 33 | carfilepath := path.Join(cc.carDir, node.Cid().String()+".car") 34 | cpRes, err := CalcCommP(context.TODO(), carfilepath, cc.rename, cc.addPadding) 35 | if err != nil { 36 | log.Fatal(err) 37 | } 38 | log.Infof("calculation of pieceCID completed, time elapsed: %s", time.Now().Sub(commpStartTime)) 39 | // Add node inof to manifest.csv 40 | manifestPath := path.Join(cc.carDir, "manifest.csv") 41 | _, err = os.Stat(manifestPath) 42 | if err != nil && !os.IsNotExist(err) { 43 | log.Fatal(err) 44 | } 45 | var isCreateAction bool 46 | if err != nil && os.IsNotExist(err) { 47 | isCreateAction = true 48 | } 49 | f, err := os.OpenFile(manifestPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) 50 | if err != nil { 51 | log.Fatal(err) 52 | } 53 | defer f.Close() 54 | 55 | csvWriter := csv.NewWriter(f) 56 | csvWriter.UseCRLF = true 57 | defer csvWriter.Flush() 58 | if isCreateAction { 59 | csvWriter.Write([]string{ 60 | "playload_cid", "filename", "piece_cid", "payload_size", "piece_size", "detail", 61 | }) 62 | } 63 | 64 | if err := csvWriter.Write([]string{ 65 | node.Cid().String(), graphName, cpRes.Root.String(), strconv.FormatInt(cpRes.PayloadSize, 10), strconv.FormatUint(uint64(cpRes.Size), 10), fsDetail, 66 | }); err != nil { 67 | log.Fatal(err) 68 | } 69 | } 70 | 71 | func (cc *commPCallback) OnError(err error) { 72 | log.Fatal(err) 73 | } 74 | 75 | type csvCallback struct { 76 | carDir string 77 | } 78 | 79 | func (cc *csvCallback) OnSuccess(node ipld.Node, graphName, fsDetail string) { 80 | // Add node inof to manifest.csv 81 | manifestPath := path.Join(cc.carDir, "manifest.csv") 82 | _, err := os.Stat(manifestPath) 83 | if err != nil && !os.IsNotExist(err) { 84 | log.Fatal(err) 85 | } 86 | var isCreateAction bool 87 | if err != nil && os.IsNotExist(err) { 88 | isCreateAction = true 89 | } 90 | f, err := os.OpenFile(manifestPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) 91 | if err != nil { 92 | log.Fatal(err) 93 | } 94 | defer f.Close() 95 | if isCreateAction { 96 | if _, err := f.Write([]byte("playload_cid,filename,detail\n")); err != nil { 97 | log.Fatal(err) 98 | } 99 | } 100 | if _, err := f.Write([]byte(fmt.Sprintf("%s,%s,%s\n", node.Cid(), graphName, fsDetail))); err != nil { 101 | log.Fatal(err) 102 | } 103 | } 104 | 105 | func (cc *csvCallback) OnError(err error) { 106 | log.Fatal(err) 107 | } 108 | 109 | type errCallback struct{} 110 | 111 | func (cc *errCallback) OnSuccess(ipld.Node, string, string) {} 112 | func (cc *errCallback) OnError(err error) { 113 | log.Fatal(err) 114 | } 115 | 116 | func CommPCallback(carDir string, rename, addPadding bool) GraphBuildCallback { 117 | return &commPCallback{carDir: carDir, rename: rename, addPadding: addPadding} 118 | } 119 | 120 | func CSVCallback(carDir string) GraphBuildCallback { 121 | return &csvCallback{carDir: carDir} 122 | } 123 | func ErrCallback() GraphBuildCallback { 124 | return &errCallback{} 125 | } 126 | 127 | func Chunk(ctx context.Context, sliceSize int64, parentPath, targetPath, carDir, graphName string, parallel int, cb GraphBuildCallback) error { 128 | var cumuSize int64 = 0 129 | graphSliceCount := 0 130 | graphFiles := make([]Finfo, 0) 131 | if sliceSize == 0 { 132 | return xerrors.Errorf("Unexpected! Slice size has been set as 0") 133 | } 134 | if parallel <= 0 { 135 | return xerrors.Errorf("Unexpected! Parallel has to be greater than 0") 136 | } 137 | if parentPath == "" { 138 | parentPath = targetPath 139 | } 140 | 141 | args := []string{targetPath} 142 | sliceTotal := GetGraphCount(args, sliceSize) 143 | if sliceTotal == 0 { 144 | log.Warn("Empty folder or file!") 145 | return nil 146 | } 147 | files := GetFileListAsync(args) 148 | for item := range files { 149 | fileSize := item.Info.Size() 150 | switch { 151 | case cumuSize+fileSize < sliceSize: 152 | cumuSize += fileSize 153 | graphFiles = append(graphFiles, item) 154 | case cumuSize+fileSize == sliceSize: 155 | cumuSize += fileSize 156 | graphFiles = append(graphFiles, item) 157 | // todo build ipld from graphFiles 158 | BuildIpldGraph(ctx, graphFiles, GenGraphName(graphName, graphSliceCount, sliceTotal), parentPath, carDir, parallel, cb) 159 | fmt.Printf("cumu-size: %d\n", cumuSize) 160 | fmt.Printf(GenGraphName(graphName, graphSliceCount, sliceTotal)) 161 | fmt.Printf("=================\n") 162 | cumuSize = 0 163 | graphFiles = make([]Finfo, 0) 164 | graphSliceCount++ 165 | case cumuSize+fileSize > sliceSize: 166 | fileSliceCount := 0 167 | // need to split item to fit graph slice 168 | // 169 | // first cut 170 | firstCut := sliceSize - cumuSize 171 | var seekStart int64 = 0 172 | var seekEnd int64 = seekStart + firstCut - 1 173 | fmt.Printf("first cut %d, seek start at %d, end at %d", firstCut, seekStart, seekEnd) 174 | fmt.Printf("----------------\n") 175 | graphFiles = append(graphFiles, Finfo{ 176 | Path: item.Path, 177 | Name: fmt.Sprintf("%s.%08d", item.Info.Name(), fileSliceCount), 178 | Info: item.Info, 179 | SeekStart: seekStart, 180 | SeekEnd: seekEnd, 181 | }) 182 | fileSliceCount++ 183 | // todo build ipld from graphFiles 184 | BuildIpldGraph(ctx, graphFiles, GenGraphName(graphName, graphSliceCount, sliceTotal), parentPath, carDir, parallel, cb) 185 | fmt.Printf("cumu-size: %d\n", cumuSize+firstCut) 186 | fmt.Printf(GenGraphName(graphName, graphSliceCount, sliceTotal)) 187 | fmt.Printf("=================\n") 188 | cumuSize = 0 189 | graphFiles = make([]Finfo, 0) 190 | graphSliceCount++ 191 | for seekEnd < fileSize-1 { 192 | seekStart = seekEnd + 1 193 | seekEnd = seekStart + sliceSize - 1 194 | if seekEnd >= fileSize-1 { 195 | seekEnd = fileSize - 1 196 | } 197 | fmt.Printf("following cut %d, seek start at %d, end at %d", seekEnd-seekStart+1, seekStart, seekEnd) 198 | fmt.Printf("----------------\n") 199 | cumuSize += seekEnd - seekStart + 1 200 | graphFiles = append(graphFiles, Finfo{ 201 | Path: item.Path, 202 | Name: fmt.Sprintf("%s.%08d", item.Info.Name(), fileSliceCount), 203 | Info: item.Info, 204 | SeekStart: seekStart, 205 | SeekEnd: seekEnd, 206 | }) 207 | fileSliceCount++ 208 | if seekEnd-seekStart == sliceSize-1 { 209 | // todo build ipld from graphFiles 210 | BuildIpldGraph(ctx, graphFiles, GenGraphName(graphName, graphSliceCount, sliceTotal), parentPath, carDir, parallel, cb) 211 | fmt.Printf("cumu-size: %d\n", sliceSize) 212 | fmt.Printf(GenGraphName(graphName, graphSliceCount, sliceTotal)) 213 | fmt.Printf("=================\n") 214 | cumuSize = 0 215 | graphFiles = make([]Finfo, 0) 216 | graphSliceCount++ 217 | } 218 | } 219 | 220 | } 221 | } 222 | if cumuSize > 0 { 223 | // todo build ipld from graphFiles 224 | BuildIpldGraph(ctx, graphFiles, GenGraphName(graphName, graphSliceCount, sliceTotal), parentPath, carDir, parallel, cb) 225 | fmt.Printf("cumu-size: %d\n", cumuSize) 226 | fmt.Printf(GenGraphName(graphName, graphSliceCount, sliceTotal)) 227 | fmt.Printf("=================\n") 228 | } 229 | return nil 230 | } 231 | -------------------------------------------------------------------------------- /cmd/graphsplit/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "os" 7 | 8 | "github.com/filedrive-team/go-graphsplit" 9 | "github.com/filedrive-team/go-graphsplit/dataset" 10 | logging "github.com/ipfs/go-log/v2" 11 | "github.com/urfave/cli/v2" 12 | "golang.org/x/xerrors" 13 | ) 14 | 15 | var log = logging.Logger("graphsplit") 16 | 17 | func main() { 18 | logging.SetLogLevel("*", "INFO") 19 | local := []*cli.Command{ 20 | chunkCmd, 21 | restoreCmd, 22 | commpCmd, 23 | importDatasetCmd, 24 | } 25 | 26 | app := &cli.App{ 27 | Name: "graphsplit", 28 | Flags: []cli.Flag{}, 29 | Commands: local, 30 | } 31 | 32 | if err := app.Run(os.Args); err != nil { 33 | fmt.Println("Error: ", err) 34 | os.Exit(1) 35 | } 36 | } 37 | 38 | var chunkCmd = &cli.Command{ 39 | Name: "chunk", 40 | Usage: "Generate CAR files of the specified size", 41 | Flags: []cli.Flag{ 42 | &cli.Uint64Flag{ 43 | Name: "slice-size", 44 | Value: 17179869184, // 16G 45 | Usage: "specify chunk piece size", 46 | }, 47 | &cli.UintFlag{ 48 | Name: "parallel", 49 | Value: 2, 50 | Usage: "specify how many number of goroutines runs when generate file node", 51 | }, 52 | &cli.StringFlag{ 53 | Name: "graph-name", 54 | Required: true, 55 | Usage: "specify graph name", 56 | }, 57 | &cli.StringFlag{ 58 | Name: "car-dir", 59 | Required: true, 60 | Usage: "specify output CAR directory", 61 | }, 62 | &cli.StringFlag{ 63 | Name: "parent-path", 64 | Value: "", 65 | Usage: "specify graph parent path", 66 | }, 67 | &cli.BoolFlag{ 68 | Name: "save-manifest", 69 | Value: true, 70 | Usage: "create a mainfest.csv in car-dir to save mapping of data-cids and slice names", 71 | }, 72 | &cli.BoolFlag{ 73 | Name: "calc-commp", 74 | Value: false, 75 | Usage: "create a mainfest.csv in car-dir to save mapping of data-cids, slice names, piece-cids and piece-sizes", 76 | }, 77 | &cli.BoolFlag{ 78 | Name: "rename", 79 | Value: false, 80 | Usage: "rename carfile to piece", 81 | }, 82 | &cli.BoolFlag{ 83 | Name: "add-padding", 84 | Value: false, 85 | Usage: "add padding to carfile in order to convert it to piece file", 86 | }, 87 | }, 88 | Action: func(c *cli.Context) error { 89 | ctx := context.Background() 90 | parallel := c.Uint("parallel") 91 | sliceSize := c.Uint64("slice-size") 92 | parentPath := c.String("parent-path") 93 | carDir := c.String("car-dir") 94 | if !graphsplit.ExistDir(carDir) { 95 | return xerrors.Errorf("Unexpected! The path of car-dir does not exist") 96 | } 97 | graphName := c.String("graph-name") 98 | if sliceSize == 0 { 99 | return xerrors.Errorf("Unexpected! Slice size has been set as 0") 100 | } 101 | 102 | targetPath := c.Args().First() 103 | var cb graphsplit.GraphBuildCallback 104 | if c.Bool("calc-commp") { 105 | cb = graphsplit.CommPCallback(carDir, c.Bool("rename"), c.Bool("add-padding")) 106 | } else if c.Bool("save-manifest") { 107 | cb = graphsplit.CSVCallback(carDir) 108 | } else { 109 | cb = graphsplit.ErrCallback() 110 | } 111 | return graphsplit.Chunk(ctx, int64(sliceSize), parentPath, targetPath, carDir, graphName, int(parallel), cb) 112 | }, 113 | } 114 | 115 | var restoreCmd = &cli.Command{ 116 | Name: "restore", 117 | Usage: "Restore files from CAR files", 118 | Flags: []cli.Flag{ 119 | &cli.StringFlag{ 120 | Name: "car-path", 121 | Required: true, 122 | Usage: "specify source car path, directory or file", 123 | }, 124 | &cli.StringFlag{ 125 | Name: "output-dir", 126 | Required: true, 127 | Usage: "specify output directory", 128 | }, 129 | &cli.IntFlag{ 130 | Name: "parallel", 131 | Value: 4, 132 | Usage: "specify how many number of goroutines runs when generate file node", 133 | }, 134 | }, 135 | Action: func(c *cli.Context) error { 136 | parallel := c.Int("parallel") 137 | outputDir := c.String("output-dir") 138 | carPath := c.String("car-path") 139 | if parallel <= 0 { 140 | return xerrors.Errorf("Unexpected! Parallel has to be greater than 0") 141 | } 142 | 143 | graphsplit.CarTo(carPath, outputDir, parallel) 144 | graphsplit.Merge(outputDir, parallel) 145 | 146 | fmt.Println("completed!") 147 | return nil 148 | }, 149 | } 150 | 151 | var commpCmd = &cli.Command{ 152 | Name: "commP", 153 | Usage: "PieceCID and PieceSize calculation", 154 | Flags: []cli.Flag{ 155 | &cli.BoolFlag{ 156 | Name: "rename", 157 | Value: false, 158 | Usage: "rename carfile to piece", 159 | }, 160 | &cli.BoolFlag{ 161 | Name: "add-padding", 162 | Value: false, 163 | Usage: "add padding to carfile in order to convert it to piece file", 164 | }, 165 | }, 166 | Action: func(c *cli.Context) error { 167 | ctx := context.Background() 168 | targetPath := c.Args().First() 169 | 170 | res, err := graphsplit.CalcCommP(ctx, targetPath, c.Bool("rename"), c.Bool("add-padding")) 171 | if err != nil { 172 | return err 173 | } 174 | 175 | fmt.Printf("PieceCID: %s, PieceSize: %d\n", res.Root, res.Size) 176 | return nil 177 | }, 178 | } 179 | 180 | var importDatasetCmd = &cli.Command{ 181 | Name: "import-dataset", 182 | Usage: "import files from the specified dataset", 183 | Flags: []cli.Flag{ 184 | &cli.StringFlag{ 185 | Name: "dsmongo", 186 | Required: true, 187 | Usage: "specify the mongodb connection", 188 | }, 189 | }, 190 | Action: func(c *cli.Context) error { 191 | ctx := context.Background() 192 | 193 | targetPath := c.Args().First() 194 | if !graphsplit.ExistDir(targetPath) { 195 | return xerrors.Errorf("Unexpected! The path to dataset does not exist") 196 | } 197 | 198 | return dataset.Import(ctx, targetPath, c.String("dsmongo")) 199 | }, 200 | } 201 | -------------------------------------------------------------------------------- /commp.go: -------------------------------------------------------------------------------- 1 | package graphsplit 2 | 3 | import ( 4 | "bufio" 5 | "context" 6 | "fmt" 7 | "io" 8 | "os" 9 | "path" 10 | 11 | "github.com/filecoin-project/go-commp-utils/ffiwrapper" 12 | "github.com/filecoin-project/go-padreader" 13 | "github.com/filecoin-project/go-state-types/abi" 14 | "github.com/filedrive-team/filehelper/carv1" 15 | "github.com/ipfs/go-cid" 16 | "github.com/ipld/go-car" 17 | "golang.org/x/xerrors" 18 | ) 19 | 20 | type CommPRet struct { 21 | Root cid.Cid 22 | PayloadSize int64 23 | Size abi.UnpaddedPieceSize 24 | } 25 | 26 | // almost copy paste from https://github.com/filecoin-project/lotus/node/impl/client/client.go#L749-L770 27 | func CalcCommP(ctx context.Context, inpath string, rename, addPadding bool) (*CommPRet, error) { 28 | dir, _ := path.Split(inpath) 29 | // Hard-code the sector type to 32GiBV1_1, because: 30 | // - ffiwrapper.GeneratePieceCIDFromFile requires a RegisteredSealProof 31 | // - commP itself is sector-size independent, with rather low probability of that changing 32 | // ( note how the final rust call is identical for every RegSP type ) 33 | // https://github.com/filecoin-project/rust-filecoin-proofs-api/blob/v5.0.0/src/seal.rs#L1040-L1050 34 | // 35 | // IF/WHEN this changes in the future we will have to be able to calculate 36 | // "old style" commP, and thus will need to introduce a version switch or similar 37 | arbitraryProofType := abi.RegisteredSealProof_StackedDrg32GiBV1_1 38 | 39 | st, err := os.Stat(inpath) 40 | if err != nil { 41 | return nil, err 42 | } 43 | 44 | if st.IsDir() { 45 | return nil, fmt.Errorf("path %s is dir", inpath) 46 | } 47 | payloadSize := st.Size() 48 | 49 | rdr, err := os.OpenFile(inpath, os.O_RDWR, 0644) 50 | if err != nil { 51 | return nil, err 52 | } 53 | defer rdr.Close() //nolint:errcheck 54 | 55 | stat, err := rdr.Stat() 56 | if err != nil { 57 | return nil, err 58 | } 59 | carSize := stat.Size() 60 | // check that the data is a car file; if it's not, retrieval won't work 61 | _, err = car.ReadHeader(bufio.NewReader(rdr)) 62 | if err != nil { 63 | return nil, xerrors.Errorf("not a car file: %w", err) 64 | } 65 | 66 | if _, err := rdr.Seek(0, io.SeekStart); err != nil { 67 | return nil, xerrors.Errorf("seek to start: %w", err) 68 | } 69 | 70 | pieceReader, pieceSize := padreader.New(rdr, uint64(carSize)) 71 | commP, err := ffiwrapper.GeneratePieceCIDFromFile(arbitraryProofType, pieceReader, pieceSize) 72 | if err != nil { 73 | return nil, xerrors.Errorf("computing commP failed: %w", err) 74 | } 75 | 76 | if padreader.PaddedSize(uint64(payloadSize)) != pieceSize { 77 | return nil, xerrors.Errorf("assert car(%s) file to piece fail payload size(%d) piece size (%d)", inpath, payloadSize, pieceSize) 78 | } 79 | if addPadding { 80 | // make sure fd point to the end of file 81 | // better to check within carv1.PadCar, for now is a workaround 82 | if _, err := rdr.Seek(carSize, io.SeekStart); err != nil { 83 | return nil, xerrors.Errorf("seek to start: %w", err) 84 | } 85 | if err := carv1.PadCar(rdr, carSize); err != nil { 86 | return nil, xerrors.Errorf("failed to pad car file: %w", err) 87 | } 88 | } 89 | if rename { 90 | piecePath := path.Join(dir, commP.String()) 91 | err = os.Rename(inpath, piecePath) 92 | if err != nil { 93 | return nil, xerrors.Errorf("rename car(%s) file to piece %w", inpath, err) 94 | } 95 | } 96 | return &CommPRet{ 97 | Root: commP, 98 | Size: pieceSize, 99 | PayloadSize: payloadSize, 100 | }, nil 101 | } 102 | -------------------------------------------------------------------------------- /commp_test.go: -------------------------------------------------------------------------------- 1 | package graphsplit 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "encoding/base64" 7 | "io" 8 | "io/ioutil" 9 | "os" 10 | "testing" 11 | ) 12 | 13 | func TestCalcCommP(t *testing.T) { 14 | logo := "OKJlcm9vdHOB2CpYIwASIGXF0MT5o2y08uzjZs3zUPTeSL1Vfny7WPy/PqKWVzshZ3ZlcnNpb24BUhIgZcXQxPmjbLTy7ONmzfNQ9N5IvVV+fLtY/L8+opZXOyESKgoiEiAwQIn2XJ17hEKaD4q2CSTpipS3R390WfxP/mjU/yswwRIBLhjTSQoCCAFeEiAwQIn2XJ17hEKaD4q2CSTpipS3R390WfxP/mjU/yswwRI2CiISIJd0bLclaExyM6dhdmj78a6JqZ4tATiXJNtmxveTVy+ZEg1maWxlZHJpdmUucG5nGJdJCgIIAblJEiCXdGy3JWhMcjOnYXZo+/GuiameLQE4lyTbZsb3k1cvmQqUSQgCEoxJiVBORw0KGgoAAAANSUhEUgAAANAAAADMCAMAAAA1bt2mAAABxVBMVEUAAAAOjv8bif8alP8hkP8hkv8hkf8hkv8hkf8ikf8hkP8hkf8hkP8hjv8hkf8ikf4fj/8hkf8hkf8hkP8hkf8hk/8hkf8hkf8hkP8kj/8hkv8hkf8hkf8hkf8hkf8hkf8hkP8hkf8hkf8hkf8ikf8hkf8hkP8fkv8hkP8hkf8hkf8gkP8hkf8hkf4ikf8hkP8hj/8hkf8hkP8hkf8ikP8hkv8hkf8hj/0hj/4gj/7///8hkf8hkv8djv4cjf4hk/8fjv78/v8bjP4ilf/5/P8pk/76/f/3+//y8vIjkP7+/v/x+P8lkf4Yi/71+v+73f/z+f/L5f+Oxv4xl/7v9/9utv4ulv4mkv7k8f+Av/5Fov7h8P/e7v/G4v5BoP632/+fz//D4P5TqP7b7f/T6f+s1f6Hwv5zuf4rlf7W6v+Tyf57vf52uv47nP40mf7s9v+czf+Ewf5Jo/7q9f+/3/9nsv5hsP5brP5Mpf4+nv43m/6v1/+y2P7Y7P+l0v/P5v5Yq/5Vqv5drv5Qp/7o9P94u/5qtP2ZzP7m8/+p1P5ksf6LxP6Xyv+j0Py02v/18/Ly8/Ts7/Pp8/38+/v5+fn29fTf6vOC6jXrAAAAN3RSTlMAAgYEH/vo99oW8MwqErsMCtZB+Xga9OBMDzww6+TDjkbSk4hybGIUoJp9JaWCZ1hSta+rXTbHIlwyXwAAIj9JREFUeNrUmulW2zAQhbOTAg1Q1kIhbKEshRa63KsjneMf+cX7v1CRRkuCHSdOw1KVxLK+I3nGlmauldZeqDQ/rO7/uLvuX94frWx0dzud3e7JytHDZX/rYu/3zddG7f8pzY83P66/HW10NodPJcuM0frpz34Zk9mmncOT3uXW3s3nZu29l+bN3tbD7uajsY5oqqdCBQKKiiQUFKm0yZ4wdza+nR+33q9TzdZef+WwPcy0orJeWHegAFena+AToWBqmmy4uXt0drz9Dp1q/L6+P9TGGDr7SVpPaI8k7Idw7jzD2mSmvfvz/KBee0elcXB+tGyGxhpLgkq5P/eM3DSTlkkYetju3A9W12vvotRb573lodGk2Gk/ti6Pg3TtBMkSTJ1lXx7utmtvXtaOrzp6qMU+uADgZ5OtEUqBijNhnbW7/f212luWD3d/lpw3EEtJZW2UkOAfDTCGWYZNttP79bH2FkXm2omWkKbEJ9qKkjUjH1dnxMzh5711trkyuKm9QanfnO1mxk0ZRcTJ4+yPhlfHoDLtjdtXdUnc6R9mmgoSq2SFWBPlI8tlHuwc1e3ds9XaK5b69u3h0IgBY1IAXgqQcJaFuCwYRfhZb8hTNO3uK66ltcFGZtzCRpAC7mPNjKa6s+rYQofN8sPx6ySm5nFPG/isA7l8vOfSzvkxqZRgbTqnr6Af6q3TnSzmevVMCjBKATG8Gg5xXc5BbTYGH2ovWxrfT4aaICShQPmJEiaONAXM6hijWJmdh/2XfHOqr/5sa1UiBRClAObEdJjxWZrD21btpUrjYnc4LgUwRQogYpRjBoyAZdqBj0v3+y/zdlFvXbbNMynAMSng3HPYfQumxYgYihELGBcSjIOHoKF1d/C1tvjS/HQyzOV6VJUCnEEpJAz6CH65+DTbuOsYd4cZ537K+eHieIZZHSvBUhEPQa2PjhccGz6ctnWBFECBFKAriNMJrIIxLiQsd/quO/i40OjWGzJOdwV6t3yos8BjjGHYEt/AizAFp8EZMeioXIbmy1lrgctnIyM5IddjPNf7mx2x5CuHWYSV4KgU0uBygKw06KVvB4taPr86WRhZpVw/WSmAguWheIxRjIBzvS1SwRNpEwzd7n2qL8SfrSXtpk7K9fMrBVZRCvbgW9y3+bO3gIy0ftvWVXI9RjACxgjGNKUgOEUOabS1x5Uf/xzs1k61lmiGmMpBUhrchCMtVkxYsoucVMQqDU5/L+JbOvm48Wv9H/250mHAEFIrSIE5MBP2zlBJu2w5dO/W/8mfvtHhpoKu4q8abrEPzsEowRAmoBpWaXBQKOioqAwjHs0/33ySCRloilKQItjCgMFSzIijUoiqBIRc27tkuhdze7TeNzptGrAg16NQCqBICiDhKUohYSUXBgWLEfN71LjVOkmBYqUwRQpEjHJMjycqhSjKSZiT7825/BksaUkUIgWiUiiUAkC4nYLBiFmOU2/xwA8uDUkpiLsW06zs1efQO9+XNeIY8daGBSvZKFkAFmBLIkYpzvcOCSgtJR/voHVvf469nY5JUmCaUkiYEctXHiNi5nGJUkh/ZPvnalV9fdA1M+8KICcFkgmK1fBkpSBtEsD10tVNxfef3tC/kqQtWo4pBZGVY7keAQPPpQDLceqtkpAQpSDnUdWKjqD+cv25UkA4zXKbBqok1xPgGHZAMCIGczjXm6ODgxxVCvReiWS4aFTZ3mnrAimgcrleFUoBBCYgTbPQWK4UEh5XCl7lujNS/5k91DX3O8ZLAQmtxUrBejtVCiAuPMwkJBhxUgrihn9KjpCVAsP2UVbwi0eQOO6f4FBT4TEKTE4UYQqe0Fv5RnEGY2ohBD24ql4++zrjhOtns24aMC8FoqlRCsTpJdjW8jivFISKF0kpJIMw6zJqXGwSKv8DgRxSrnftZVKAUSlwdiEBwZOVgmuUO2yOPs3021zXKNKnGyYpUKgUJkkBf+KPFk7CsfdEpSBcrsSwCF1c2LxqzSCxr4aKarIUUBWkAIgijJLeSIOTk5SCLCJSHw4a0yVcW6uwmcioAmRVhP+p45MJGI0Mq5qIGGBonxU7mAanP1jIMEfFMv/Sbnr7UyfcSWYHjJsGxUoBAqDKpADTfohg5HC+N9PgxGSl4NeCXjptTYkIZ0PnRcHvJZJZ4w8iYgsEpyRp8WgNqZRjKvmkwd1pTEFSofgcMiNM96L83Wh/WUuH2TYN1BQpUB2rUUw6SqmMKwUfGnR5em38JeVMmNo2ojg+7UzP9G4nvZve7fS+d9eyZFnYOL4wGLDB2BzmMJc53GDAkIBNgAAhKWnTz1u9PYUk26VVJ7asvyT2aXff++3uU38MtZ00wAoFvKTgRQHkIgXUWcbITQqoMylwv6G990anAflbT2ntciN4Q7iMAqQTCiDpbBGXhelC5lcjDykgNykgLykwkzr7hVeuBX2WF9zD/vYogPxQADll1EGGG8un4iIFaoCLFNiedr0DAX0GLpuPT0ibzArmbuViiJSxU0ZMJpdk1Fnm9ShuzkyQQVVWj1rzYzrRrr3Vzp4XwWVz0BEo0IUUuqPA1WVFCshNCvRM4X/FEOn6jXbJDN+FxHoJ8VkvgQcsb83qhuGqaCJijk3KCF1dVjcXTMWQ2JW1AbuiQwTbVdGLrwbZUowiBeH225OCPwogLwrQHZfsvboLKXB7ZXBlOTSvf+yf2vl5iMOOhxRwO1Jgp3dGAfGQpYzagcQVSEHWJPwLfvSD70T2ByHhxBykgNqTApziRQF6XBrnlHFnWV3N9tykgPxIgSG4v6P7QVPLCw5SQD6kgDqTgvrpkbGS/y8pQMm47jswevKlEEGdMiuwXIvDAvIIAyHp0BXcswcpZfjsJMPVopzy5oLu5Mw+H5zJ7HzpF7Tnv3rZS3HXNVcWpUQBRQoY+ZOCFwW8pNBdFn3WSQq4CykQNhr34sITN3qo9SKyedZL5KNVU81OWY6J1Ic86JaRlHm9OWV1cwyy9NxcpRuziBsD+9r73z3hnip9mmH2/yEFkAQKsGL6yEjK2CV3JQXkQwoCgL582009msBmZrQPKbhjvT8pyOPYgQLKFbSTvTfHqBspIJnHhbRrX7tcAkzFYexDCvYxRQruYO4iBWd7Iqz8QsYu+b+SAkIeUoAyatddC/6/XofVbnOloXNSEIDdgRQQyF4UQER54avJTlIQcyPtSAErUqAHgi5aeKMHRKO8kWaPwpMbwfqPaToXRGRcko5ZdW1XNEUymirN/2p1cywjOXb6BC6J1XLOd8HfLmUQP3lNgzsZzVlM2pNCfHB7RU87ZwWQHwpgWXh8JdlLCsywLqTAvK32/PfOketrr2PQjcLIls5O81teIPnN891KzjBU/++KAkjKyFe+OinAbxcpgJHBG842dyNEa06vjjxQpCAnqiQpEKv6IFw8rhk6/PSJ9eL2qgaEjB0ykrL7aowcK6//nhRgL3jtXcfQ+4Me5gripVjF6kAKxIzOBgJDa1VLxKbuKMCXZMSz7gwSihSQmxTQZVJATlIA/HnP4efefV5jF6cnE2NlsAj7koL9qVcXAoHEWMU0nbIXBVQ0UXvuzR8k/EgBtSMFrhNYL/rqOTUS6uHja6MVSYxskLQuEvK8pECWAvYWO46iVJqgjiggaxj2pYylDKLv1VcmBVB7flax9eegWC/J7QUCkZPNQcOyLINtuq5LBoXq3h4Di3pPcs163lIYCV+yLpzFF1s7WVwtj8O3XIhRvlvWkIitck2b89yzr4lJ1Lff12RWwHIYSju2NzUxO3l/7nCjWdheycXTaUMOJ6EXwTmTjZ2Bfd3EfqSgnDpRe+7WJmR19X8kBQE034kxxIeQA8PnRgcHAq5taGwvcz7XxLrB1zEW2fHY0vaj5PGg1R0FHPzWHST8SQEpUlCUL8b+XFWw8FVQZVaYp30Bny1xsZct6AYtRJYfGimMBiITVctLCu5w6dpzye6rO5ECakMK8BF66QU+FPq5R00akHg24Lv9+fjifMvQMUmt8SO9J9VioLdUtTwooPZcm0fuMOUAhfYnBRHsnKRgf2lvvsPo54uPgo4cTDI/4W/RXw8fX5zZLSz3QBy5W7NbX+RBzkTYVSjYVVTmMUnKXrdBTEPnMzW8kqTPIcImlokh81FFOob2/OesE71wnS+hcIrb3mtj0c2bj0tlqzomDkSydbt99mcNfxSAfS/8ICV7QYKYqFEbjxrIjxSQIgXsIQU4HPr2C7bkELqUg0n02oKvQYk/bt58+PdRSx4I7xWK9uHiFtLdZXZ1nX8tV9ceJZKz27rTYvjPQXWSExQxMFogPXwN+Y2QKwfTGJ/y70Y3bYsustmIOPB7C5pn32Rzy2AuS226iGMQ0dJpyzCF7A8S3J4duG1yMg/R4BIpIC8pYCcpwHeQe4Vvgu43M/WVE1+LHlOL1tfD/Hds9Awq6nZ5d9i6jALxQn2zfLBVq22V9+tHlUp9qxo1LUMtl/iBBMkv91Fvc6tqXI0UWNjnofWJj4LuHExi5kfv+vcisOjoWHSi4wqNVJW5u6cWdg4IojuxWF9/fzLZ3x+L9YYTkeRq8U5rxTBBds1FSDem59iNI5kVvQspSK+tSAER7RnKp88BJ3BS4PEYfE1hNubb5qAflTPccd85ha+++wfhYs1wtqltv24YmVnPWR6QUFMO+uA5PS+55iIFDspyeOFPCva/z95mqO3OrIBbGag5W3SVKPwHs+i8vMoM2mmB1bGdrdXIRM50eLbmSMSvzcYyZXdkddSYOX8Whr8ys2Uwc7skXohUPBU1PnkOwOd1zZODSVcADKOxuVi6lwyrAjGDbl4cZZmBu5tJ2kTKxcDQ8nxOlw0qt3WQTTDfODSzMDIk7pEoDhv+pEALXcjEwslMmZ9yRVKA2Tlwc78ENex+W4P/3110Sye5g8rc2k5mam9m4XaRG/RwanyaGdQCg3qnygv2R/UwZ8oZOssq/M484aaOUoXDTD+3aKZsudMEsQpdg/VKM2fQAqrlFBVTRTYNEzHX5atVoR9foBM+zrc1uK08TRIUI217XZyaz+Vy9T8eM5Mu6qO0yc22ItSgZtFGu825+w4UiNdWqc0zUXDi6VQLwjU9edtQzOAmBd1KW8RLCrJxdiIFgtkw/EbPpeUFvzkFgFbT1I39SCL8518QX2fLQ3QIccq8UvNRwG5zldVTS/q5+H6Yarsp+tPUxwd48JrzgoTiDFZ7ihSwDykgf1JAOPgq0NyXPd63NZBfZgUx66zV2Cb9PV6i7FMJUKfQjAECHfXemjdFoVLD9OS+RVFwc595knCpYSpwMFlU1S3FCyYRs84M7KhFJjEsw0EKJm04OnGSAg6+CW+H/hTyvK3Rbk7hSHq7i+E1KG0lSwu9tsE+w7/XJXrn55gLXpZHorvs6pGKhXTdtiSeis5HqVX5rc15AwofT+Wj0RS00bSO8/MrhfE8wkg34tHq5tImBQgomqHnmqP3s5VxZDpIAWlPQyD6IOR9WwNdzsEUJHEo/ffD88OEXbQyLePQaBYMOj4K961JrxBlo4zVzbSojPhyLw8z1uDS8dr57MTUwO3ShoXM8ZP+kfXW/ePJk91MaWZhMY8b9cPF2VIxGZk60Em8tr6zYDvbgX2TeQK9MToToUOybMOUr3pANvcvNm+/GnTlYArmcOVgYhKfUyF297TX7vCNBfrI9zNQzrlKIDyQEp57foK56ZqFxLZ/j7XCk+iBjHD3ltKoXLK74zQN1rzb5Q/FMHPm1DRzk2HefOdNjLEZr5dElEsuDuqSFLD2zHd2ZH1f4x3NkVnhgice1FOLDtCu9NkjvBoAUu9Ac4QBUCC8EBX1kRug5w00dGlQbYAVeKJR/13EJTuIbpVouJJscXddj8pHVxxGKLo8zX7cWSEI6alhB4bstQwkGJVoT31qv4/8nka8pODNwYRuAWQixt+HyUDs8CgWAG+wFLaPTO9P2AbNS/i5zQrvgLfxW8xxl8a3K+s7SabnGyd0Z6CyNDlC9dv76Wh9cvJ2hPrEJSjpxgxVJgo6MvPLRaitsTGKZmPLKaJIQXvDNsgmH19ScIRjXqXRHfVkVpeHAn0Hc2Db2GmGInLtHq0hvh3Q0sUm00gZNCEikWVYg3eYD8+dRVjHQlZ+SbA2xKM6NaJ/PY6QPp+l5j8YN0l8GKprenSlegzH7o2mFCnYBtlDvGc0bw4m8b6tAQbtKoP6l4cS04Vd+pDrfXDg7DBsP/w44X67laTlHLUU4xSmWJO71TBssqbtt3jaYh1rZNkgqY0I3OhOCv6aWZhIQE1k81DgYaiVxJ0VXa9l4OzDuGFtTNMmSYgkBS30lW3QU9ifFLxvazgNilVWw5nCGPSdpckAtLhyBjydBIDhXlqRpw5029pjl04M6jZZr8FVe3W7wYX7EoGFAwvlK7TS1i1avMZ5BE6+nyMI69SgobOokVvsB8qHhbllaAOlsqFIAffAdLB26W0N7M2sEPJlg5Yf9a41odsvbE7Dc12E+a9VEYcIGmUdvKArgw7gRKgCZPLBaSwzOhSeXmqV7CGQgXJntNLqBv3D0SzgX2RyxcSEDc8WNnSysQC97MgkxvZsr21jNkrLLKaDv7UN+oeXa/1NIgjiJho1fjAaHzHxkeg3E42PD94tsOcdchLsKYXrlccJ5S0CpeUlVCpSoVgt7Qf/Ym92t7d3FgU1SpoCt1zZ2Z2d+c3MbyqIiyIF9Ytzh+LBWgWmN60qINbIOiAcKQiqNQbBrC5zgYbf6PHbB6mbWVjztbBS6L4sV8NvMEL1DLFbW5h5vW8k5FrHIq5nrN1S+lv+1ekrgJDWIUwPrFeBYho7kAIiO3TGuyhS0KZcoFdGvLUTAXPXITOrlWC9DNvp6EWK8lQuT8gIeOi5kQDq5WFZzGWzA+gby0iQt8JwaVtl0QTZDAUyJvJOirisEO61iccL6dWCYi3mlzR2IgWRnKFz3oWRQsNhFIz4h50WnPDJK0BzwzGYBz7991kidsaRp4999jDXJAtI+7rEYEMIHRGEkwXYv0pIJsoudyMkQrHmHEq0qNGm58aMNj4GwWkndOxoCkXMyt3wLooUkOHIeE++FQ3Fem7AqS4M29bvfM8PcnM3xI0cPNQsvfOzBm92mXRJya457JhgwAyJ5qtwLwxmwxIDJw8Vy1gkYpK+GYR9D8Cd5jSJsCunIFI/tDhSwMMA90OTjUYUhJjsWd/1Zh8uGX5e6U+ukMl3HG4oaVK/2QUh9QpDPtxoaJMNGM6xEARvEbcXbkqhmkk3yLr0nCnIx1JaxoiomgMphAApXFwYKQhpnmQYT8adIuhU1Po1mCzBbjhTb1T2vR2/jRRiNPv1qg96id73WX7HNutITcDJT6VZQgHXB3BDu/dyq6iAowohqRl+AXflN7urGKNjOQXvGciSPPFRHYTRGRxMTrywlQY0Yc94Czo+rn20cN2ngiVPA9P9obDPUIiRSzOFIkrINgiyczIN98YJHhHKxNUGD1ehwgbv9c9BIhA4IagMYCTlUnBXduSXCCTFEnKUU6DpC9D2o6eLIQWIqjZtgcxaahS3kEEj4GkZB5Y8VZkpqCMaUiL60a6hWMUZr4qj12R8He5iqk5A3XIGC1vJkDUZuRxdAYGa6TWFoG5BkIYRKpBMwvV0dLcsO3MKvtMPL0DEuihSEIR3gSOB8tW1UdCz/DlsoYPdAOibs7yAYgMKy7jN3l2iTpXWKrROi+BqnlNgyKjVEAyz0JSEI2/azlVb1npFy5Ba6xF4m5/AdwDAM3c1R06BRax3nsHwfKQAYzpVGzAGjX7uuWdcbT1PdfMWkHMzK1A9RQViF7BgbDCjRnVMrQapAXAk6nOwafGEZaJNAyNYgzgIVFuz1s8yDSBDuq+AWm7qGEla1fQog/eyAyn4zkJO4fKiOQVR4CGe8na/mFOUSNSz1znwLJfc5QUkATQlZ+glpOrldGaJCBgZYjJurS6tXvh59UEjGDewF/CsVFRi5giuNgsrAEFpWqKeobampMeSm2NQROTKKdy+TtnNC/V1EpZCc48JVCgVh8rz/rZl4YLL+4w6wSyOhHciLPjMGrlR900mzlAptRFwXgYsUOC8DLVK41KQB8NCSl9pIAeYV8c0mHlj0r9rflh5ASBKlUWeMWZFyWuXvHOQAu/rROL0SKDq9iclkCmY7yLLUVe3hqga4Q0H6LMzr8GpLrnOS3C7zPM/SJ8yD0flQYIfjKeH+iJMi8R4vUjzseyTddmFFHz3TgFV7pyXl1NcFE1hBvFiPc8EMta6ynImftAIDjTsylSnZ9bLlA8JzS6nhHIk6hw4yinye0B/JGKTqbuUjvY5b3A/b9Dvp4q3il3lFNFLCExXbnpdSOHXFE3cCXpomJrqLo8PzMSHlM7lIT9Dd9mCefbGez+vopBQLpgdStzIIRIgBT52ZLZrojRKURWMwkbS04zs/MJS+FNM5kiBqhEAhRMnL/p+ghTQj0gBxrR9OsNJuPN2XIxkWoBk7GEy2cjhwTSaSJQqB4eR10srZuRLYqT5naWhULO4Vvm0CvIg+1qvPY406pK9MKgezZtvI9noOr8mykKz8rEVT22+UyX0A1IQLz0g/KXHPqqD85EC2Ts1Q47l/qC0vbHdjnyB1XMxK9SyRhLiVpZaK+v1ul4WXtpCI/ZhLYZf4h94GWq9jDAZZuqu6fpqWQ25iReSXK7rKpLQj8QLq65PyTH3n9rVhzlIQSDDKoQBSnY3vL9y2M/3yPLxbg2mm4y3LMtYphJzlSTDsqv6wO7G9n3sZMDd6Fj1AS6iGdWHp7coTfPhU0e3hvizvk7OncSxxgakY7Kl1+3NviYIv0e8gMc/IV6Iz6A8BCW8S95jSEGcjRTYd+JR+7kSyRSzqUTVvwgHk899HoNzNkVTmE/RJDwFypi7ctY3v5zChlkftRR7kwochgfZ2icg1S5AvBBdezObwckpmn9GvLgKiWAogz9+tiBS4H2dyB/LTTPF7UTPz0mWnIPprmM5xRGdz24Gp30c/oiiKSIfswlgFbxOpDD/P0CQ15KkpWuldezQq2Mdgu6T88thd2MvyMgXY3bbJ7nsQAq+W4yNBeCHTvRYM4f4y2YOhAVdPU6yFN0UTScrXZwx/PcUTXbVd/cU70xxI4XFmznmdWu4dW9+M4f9HjlkhJezmzlcSMEL8Td9nPcx4sXPmzmoLIs2c9CV/r8UTd/N79SdXUvDMBiFLxQrDsF5o96I4tcUP1AY7wkJ9KL//z9JaDlNujdZRraxXS3skG5QthzaPE9HW+1LFzUFk20KaZhj/1s0Q5Va+zdy+8sGJbdTfFwIc0gO5qjYoon0xov2ady3fTdr2RRKsM9ymEO0WKphDqUpxDTHewsYrGkK4DJ5iDCHnS8jjwKyTQGTpoADhDkiJc7VubdLbop9ih/tCubAZk0BzWuEsj51Iuuxz/5lPfYphdhnPcxhhjHs4zICP68bKzvAPjWYY4y3iH3CLU5jsn3WHjf22fA/jrhAsingCLBPN5/acE4vbN8UqrBPU4F9QsE+iwQR/g37eTllwRcdb9glsM+cB1PHPvNU6ObYJ1JNwZ09nyhKH6kURHCtz2OfWxdE6EKFu5nzc/gwwZwxmx5M2pj5nYe4z0gehrEwho/C2A94cFo0YRSYo/9x9xIYoNGUF99ul4IIs0tBhO6NuXx0RyqIsI2ujXnu4qZgJk0BRYII1v+UAYJxP7tGEGH6M2nnP5ry1F+1r2wKjGmASBeJepUUc7tIWPM+umDjBaYPz4DiwRyNpP2I/kU2DS0WxtPZEOHBQ5iDS6uGfTqeoJVTdNMeoUoq59L8ank7pUqoLYKwgEUxGBMkD2MJddxFMIed/yR1bG/nNtUUoBqzMTYFxqVNAYzD2di4KahmLELhDiiXTkosnZS9Syd94u4f+AvS/Fid4bplkG4KFN7mhdrCWPIxZwsPHvm2fUjj19BmBgXyV9YkftvYfFPgR7ApDCXAFFxy4PWaMI5nCyZNwQRNwaw2Bfu7zIt1/6s736Y0YiCM3ykc/0RqQShKKwK2UlGLVHYzm5m8uFf9/l+oZoT1ejkuOeK1dEcYx8cw7JBLnkvC/m5jWxVN3K+KJpZRRdNeXDe8HMgN4CgB6UIAO5nDcckBgWXMcQpil1MQSadA1cWJrTj1TDpW0cSMitm2Kpr2etvgXm8bAXXBSVsMn5Qoo942GrJTvW3Mqbct5KhnZzqHzxGByMB5AhoUruTmRMZcj5lOIVfmeyV+YoYXJqHi+lfucNZOR571tn1kQJZRWJwCdzhLDBcKXXGerugNcCZzsJxH5ni9gKJZxxHjV9eGIY/MAea5DC8yBxpkDnu9bcGm1BrHP/qEf4vMAW5kDkiRObjqsSNKSeLBkznOfjphlXjsfj8yB5ZB5qBoNiwEk71WW7ugP18np8BynhXYKScZXvlOQfcd6l+dF+RhRoTClcxhL8dtgjusDC9OXwhMOwWqri4LEzFrxE7Bm+HlLoOd4QWI+zCvjnpVcSAML0gxvJD2opJ9mCr0ZHghFpSFE8NL1vUAVzyGc4WuZG9/hped9rl1CjR46OwJMl6o1zksk/aJJu3TBuli2UL7zGV40edee28085PudcZ3PYrSPrW2J+0TU7RPBIr2zycIz58UiVzaJ5RC+4TdtM+R/t6gR0ZLVZD2iQVon0ZrtNE+KeJKjD4ZCYHWcxkA4Ib4ym8ttjJPQiJh5GTU86bSf7uVebRP65KDq4wOtE9ZvxkG3jGcSAJh4YKDhQsOhbjgkMEF1/mcaRKrf3z4XqM3IyZEAafgI+uf5KUkqPV49G6kc+nHBbfLFqeAQL88OOcmy7Su+PS30A8T8QUZ4A5+++4y8IsnVp4B0ZNEby4zLBVxCpmIL2HAQPl/rbKdCy6jaSV412hPurRFLAnI2G0Bdl5vsoDCMiRk3k5BOnswhjf/C+mz2pofgwtuGgmtAaRkyJeRnUJiLkYAqi65au47Rnh5KintFNCY65MygrtsOoXt/rXS3a2UGN431U7EFwBuJHcjYbY2nQLR+rETlBTH47UiFJAFA8UMI6HFtBVIykZrwykI2ZyfBCVGYzqKSeDucxn6AVtZFJfhD5mqra/toNQ4vlhKCch3kbxW5uUUNq15m2EzC6no6i4oPTqzuiJjt2UDeE8ejnhRUjLmyrxEvHlx2T/V1QTKj7Bypfvdq9HiyRwQMW0kRHEZN9kgdddfnW8V/Efw25oiFMhLDjpgayR4rocdRsKUufVmvCA16vmO1QVXVp/6MYmEU8AsK1BcBi2jai5sU2kJKa1qMQHmLDmwFXCVN8V54tHio8eNgkdKi5GSyOfXzAOe+qmgjFJFc490fK+lq88UU2KutxgJiwykqmf3J2Hw7+L4rvelqyTC9jARn6J7G9YFuMkkVf/0phL86+hcTOrVmAlgug/lOwVTRhBIce16+vwhOIQIGz8mg19K0ea0NxsJkTISu2SSqnt2NW4EhxMvOd1e92NFfDgi2wqYMpFSzfVk3A6DA4uwfTFbRl0ZKxIibQUQEisSLJOKVW2wenhuBwcaYWP8fXXdV0pJSWRsxuD28BsRSRWrZmvR+3h4H42R1OWn3qI1GNUofgkliXRuQDpkrF7+JvujwXo+G58c7CdjRvjt5OPjbDpfnbYG0ajZBag1R1G9dbqa3988XvxPqfwZR51247xSqdzdVSrn541256jsHvYbNMZ4e46wP3cAAAAASUVORK5CYIIYjEk=" 15 | logob, err := base64.StdEncoding.DecodeString(logo) 16 | if err != nil { 17 | t.Fatalf("base64 decode error: %v", err) 18 | } 19 | tempDir := os.TempDir() 20 | f, err := ioutil.TempFile(tempDir, "test_car") 21 | if err != nil { 22 | t.Fatalf("create tempfile error: %v", err) 23 | } 24 | defer f.Close() 25 | 26 | tempfile := f.Name() 27 | _, err = io.Copy(f, bytes.NewReader(logob)) 28 | if err != nil { 29 | t.Fatal(err) 30 | } 31 | 32 | res, err := CalcCommP(context.TODO(), tempfile, false, false) 33 | if err != nil { 34 | t.Fatal(err) 35 | } 36 | if res.Root.String() != "baga6ea4seaqjri25o5pmj2aa7mrwd2kpadzr66ee7roj32savs5jvvunbdjboli" { 37 | t.Fatal("Unexpected piece CID") 38 | } 39 | if res.Size != 16256 { 40 | t.Fatal("Unexpected piece size") 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /dataset/import.go: -------------------------------------------------------------------------------- 1 | package dataset 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "fmt" 7 | "io/ioutil" 8 | "os" 9 | "path" 10 | 11 | dsrpc "github.com/beeleelee/go-ds-rpc" 12 | dsmongo "github.com/beeleelee/go-ds-rpc/ds-mongo" 13 | "github.com/filedrive-team/go-graphsplit" 14 | "github.com/ipfs/go-blockservice" 15 | dss "github.com/ipfs/go-datastore/sync" 16 | bstore "github.com/ipfs/go-ipfs-blockstore" 17 | offline "github.com/ipfs/go-ipfs-exchange-offline" 18 | logging "github.com/ipfs/go-log/v2" 19 | "github.com/ipfs/go-merkledag" 20 | ) 21 | 22 | var log = logging.Logger("graphsplit/dataset") 23 | 24 | func Import(ctx context.Context, target, mongouri string) error { 25 | recordPath := path.Join(target, record_json) 26 | // check if record.json has data 27 | records, err := readRecords(recordPath) 28 | if err != nil { 29 | return err 30 | } 31 | 32 | // go-ds-rpc dsmongo 33 | client, err := dsmongo.NewMongoStoreClient(mongouri) 34 | if err != nil { 35 | return err 36 | } 37 | ds, err := dsrpc.NewDataStore(client) 38 | if err != nil { 39 | return err 40 | } 41 | 42 | bs2 := bstore.NewBlockstore(dss.MutexWrap(ds)) 43 | dagServ := merkledag.NewDAGService(blockservice.New(bs2, offline.Exchange(bs2))) 44 | 45 | // cidbuilder 46 | cidBuilder, err := merkledag.PrefixForCidVersion(0) 47 | if err != nil { 48 | return err 49 | } 50 | 51 | // read files 52 | allfiles, err := graphsplit.GetFileList([]string{target}) 53 | if err != nil { 54 | return err 55 | } 56 | totol_files := len(allfiles) 57 | var ferr error 58 | files := graphsplit.GetFileListAsync([]string{target}) 59 | for item := range files { 60 | // ignore record_json 61 | if item.Name == record_json { 62 | totol_files -= 1 63 | continue 64 | } 65 | 66 | // ignore file which has been imported 67 | if _, ok := records[item.Path]; ok { 68 | continue 69 | } 70 | log.Infof("import file: %s", item.Path) 71 | fileNode, err := graphsplit.BuildFileNode(item, dagServ, cidBuilder) 72 | if err != nil { 73 | ferr = err 74 | break 75 | } 76 | records[item.Path] = &MetaData{ 77 | Path: item.Path, 78 | Name: item.Name, 79 | Size: item.Info.Size(), 80 | CID: fileNode.Cid().String(), 81 | } 82 | err = saveRecords(records, recordPath) 83 | if err != nil { 84 | ferr = err 85 | break 86 | } 87 | } 88 | fmt.Printf("total %d files, imported %d files, %.2f %%\n", len(allfiles), len(records), float64(len(records))/float64(totol_files)*100) 89 | return ferr 90 | } 91 | 92 | func readRecords(path string) (map[string]*MetaData, error) { 93 | res := make(map[string]*MetaData) 94 | bs, err := ioutil.ReadFile(path) 95 | if err != nil { 96 | if os.IsNotExist(err) { 97 | return res, nil 98 | } 99 | return nil, err 100 | } 101 | 102 | err = json.Unmarshal(bs, &res) 103 | if err != nil { 104 | return nil, err 105 | } 106 | return res, nil 107 | } 108 | func saveRecords(records map[string]*MetaData, path string) error { 109 | bs, err := json.Marshal(records) 110 | if err != nil { 111 | return err 112 | } 113 | return ioutil.WriteFile(path, bs, 0666) 114 | } 115 | -------------------------------------------------------------------------------- /dataset/types.go: -------------------------------------------------------------------------------- 1 | package dataset 2 | 3 | const record_json = "record.json" 4 | 5 | type MetaData struct { 6 | Path string `json:"path"` 7 | Name string `json:"name"` 8 | Size int64 `json:"size"` 9 | CID string `json:"cid"` 10 | } 11 | -------------------------------------------------------------------------------- /doc/README.md: -------------------------------------------------------------------------------- 1 | ## Data Chunk Flow 2 | ![未命名文件.png](https://cdn.nlark.com/yuque/0/2021/png/1732647/1622514242871-5e4e62cd-9282-4210-aba0-6784814dfb13.png#align=left&display=inline&height=685&margin=%5Bobject%20Object%5D&name=%E6%9C%AA%E5%91%BD%E5%90%8D%E6%96%87%E4%BB%B6.png&originHeight=1414&originWidth=512&size=95888&status=done&style=none&width=248) 3 | 4 | 5 | Firstly, traverse the entire directory and calculate the size of the directory. According to the slice size, calculate the number of ouput files. 6 | 7 | 8 | Then traverse the directory to check whether there is any unprocessed file or not. 9 | 10 | 1. If yes, go on checking whether the size of cumulative files or file slices meet the specified slice size or not. 11 | - If no, append file path into the small graph and repeat the process of traversing the directory to check if there is any unprocessed file. 12 | - If yes, continue checking if the size of files is larger than the specified slice size. If so, chunk pieces, and record the start offset and end offset of the file. 13 | 2. If no, append the files' path into the small graph, and then traverse the file directory to check if there is any unprocessed files. 14 | 15 | 16 | 17 | At the end, process the rest of the file list, import files from one small graph and transfer them into IPLD nodes. Then build a Merkle Tree with these nodes, write to disk and save as a CAR file. 18 | 19 | ## Restore Flow 20 | ![未命名文件 (1).png](https://cdn.nlark.com/yuque/0/2021/png/1732647/1622525954870-0663e5b3-35bc-41b9-b963-1835c7f4c245.png#align=left&display=inline&height=598&margin=%5Bobject%20Object%5D&name=%E6%9C%AA%E5%91%BD%E5%90%8D%E6%96%87%E4%BB%B6%20%281%29.png&originHeight=804&originWidth=390&size=39540&status=done&style=none&width=290) 21 | 22 | Firstly, traverse the CAR file directory to check if there is any unprocessed file. 23 | 24 | 1. If yes, load CAR files into IPLD nodes. After processing, export files from IPLD nodes. And then traverse the CAR file directory again to check if there is any unprocessed files. 25 | 2. If no, traverse the output directory directly, search file slices according to special suffix and merge slices of the same file into one file. 26 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/filedrive-team/go-graphsplit 2 | 3 | go 1.18 4 | 5 | require ( 6 | github.com/beeleelee/go-ds-rpc v0.1.0 // this needs to be updated too https://github.com/beeleelee/go-ds-rpc/pull/3 7 | github.com/filecoin-project/go-commp-utils v0.1.3 8 | github.com/filecoin-project/go-padreader v0.0.1 9 | github.com/filecoin-project/go-state-types v0.10.0 10 | github.com/filedrive-team/filehelper v0.1.1 11 | github.com/ipfs/go-blockservice v0.4.0 12 | github.com/ipfs/go-cid v0.3.2 13 | github.com/ipfs/go-datastore v0.6.0 14 | github.com/ipfs/go-ipfs-blockstore v1.2.0 15 | github.com/ipfs/go-ipfs-chunker v0.0.5 16 | github.com/ipfs/go-ipfs-exchange-offline v0.3.0 17 | github.com/ipfs/go-ipld-format v0.4.0 18 | github.com/ipfs/go-libipfs v0.4.1 19 | github.com/ipfs/go-log/v2 v2.5.1 20 | github.com/ipfs/go-merkledag v0.8.1 21 | github.com/ipfs/go-unixfs v0.4.3 22 | github.com/ipld/go-car v0.4.0 23 | github.com/ipld/go-ipld-prime v0.16.0 24 | github.com/urfave/cli/v2 v2.6.0 25 | golang.org/x/xerrors v0.0.0-20220517211312-f3a8303e98df 26 | ) 27 | 28 | require ( 29 | github.com/alecthomas/units v0.0.0-20210927113745-59d0afb8317a // indirect 30 | github.com/cpuguy83/go-md2man/v2 v2.0.1 // indirect 31 | github.com/crackcomm/go-gitignore v0.0.0-20170627025303-887ab5e44cc3 // indirect 32 | github.com/filecoin-project/filecoin-ffi v0.30.4-0.20200910194244-f640612a1a1f // indirect 33 | github.com/filecoin-project/go-address v1.1.0 // indirect 34 | github.com/filecoin-project/go-fil-commcid v0.1.0 // indirect 35 | github.com/fxamacker/cbor/v2 v2.4.0 // indirect 36 | github.com/go-logr/logr v1.2.3 // indirect 37 | github.com/go-logr/stdr v1.2.2 // indirect 38 | github.com/go-stack/stack v1.8.0 // indirect 39 | github.com/gogo/protobuf v1.3.2 // indirect 40 | github.com/golang/protobuf v1.5.2 // indirect 41 | github.com/golang/snappy v0.0.1 // indirect 42 | github.com/google/uuid v1.3.0 // indirect 43 | github.com/hashicorp/golang-lru v0.5.4 // indirect 44 | github.com/ipfs/bbloom v0.0.4 // indirect 45 | github.com/ipfs/go-bitfield v1.1.0 // indirect 46 | github.com/ipfs/go-block-format v0.1.1 // indirect 47 | github.com/ipfs/go-ipfs-ds-help v1.1.0 // indirect 48 | github.com/ipfs/go-ipfs-exchange-interface v0.2.0 // indirect 49 | github.com/ipfs/go-ipfs-posinfo v0.0.1 // indirect 50 | github.com/ipfs/go-ipfs-util v0.0.2 // indirect 51 | github.com/ipfs/go-ipld-cbor v0.0.6 // indirect 52 | github.com/ipfs/go-ipld-legacy v0.1.1 // indirect 53 | github.com/ipfs/go-log v1.0.5 // indirect 54 | github.com/ipfs/go-metrics-interface v0.0.1 // indirect 55 | github.com/ipfs/go-verifcid v0.0.1 // indirect 56 | github.com/ipld/go-codec-dagpb v1.4.0 // indirect 57 | github.com/jbenet/goprocess v0.1.4 // indirect 58 | github.com/klauspost/compress v1.11.7 // indirect 59 | github.com/klauspost/cpuid/v2 v2.1.1 // indirect 60 | github.com/libp2p/go-buffer-pool v0.1.0 // indirect 61 | github.com/mattn/go-isatty v0.0.16 // indirect 62 | github.com/minio/blake2b-simd v0.0.0-20160723061019-3f5f724cb5b1 // indirect 63 | github.com/minio/sha256-simd v1.0.0 // indirect 64 | github.com/mr-tron/base58 v1.2.0 // indirect 65 | github.com/multiformats/go-base32 v0.1.0 // indirect 66 | github.com/multiformats/go-base36 v0.1.0 // indirect 67 | github.com/multiformats/go-multibase v0.1.1 // indirect 68 | github.com/multiformats/go-multihash v0.2.1 // indirect 69 | github.com/multiformats/go-varint v0.0.6 // indirect 70 | github.com/opentracing/opentracing-go v1.2.0 // indirect 71 | github.com/pkg/errors v0.9.1 // indirect 72 | github.com/polydawn/refmt v0.0.0-20201211092308-30ac6d18308e // indirect 73 | github.com/russross/blackfriday/v2 v2.1.0 // indirect 74 | github.com/spaolacci/murmur3 v1.1.0 // indirect 75 | github.com/whyrusleeping/cbor-gen v0.0.0-20220323183124-98fa8256a799 // indirect 76 | github.com/whyrusleeping/chunker v0.0.0-20181014151217-fe64bd25879f // indirect 77 | github.com/x448/float16 v0.8.4 // indirect 78 | github.com/xdg-go/pbkdf2 v1.0.0 // indirect 79 | github.com/xdg-go/scram v1.0.2 // indirect 80 | github.com/xdg-go/stringprep v1.0.2 // indirect 81 | github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d // indirect 82 | go.mongodb.org/mongo-driver v1.6.0 // indirect 83 | go.opentelemetry.io/otel v1.7.0 // indirect 84 | go.opentelemetry.io/otel/trace v1.7.0 // indirect 85 | go.uber.org/atomic v1.10.0 // indirect 86 | go.uber.org/multierr v1.8.0 // indirect 87 | go.uber.org/zap v1.23.0 // indirect 88 | golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e // indirect 89 | golang.org/x/net v0.0.0-20220920183852-bf014ff85ad5 // indirect 90 | golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4 // indirect 91 | golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab // indirect 92 | golang.org/x/text v0.3.7 // indirect 93 | google.golang.org/genproto v0.0.0-20200825200019-8632dd797987 // indirect 94 | google.golang.org/grpc v1.40.0 // indirect 95 | google.golang.org/protobuf v1.28.1 // indirect 96 | lukechampine.com/blake3 v1.1.7 // indirect 97 | ) 98 | 99 | replace github.com/filecoin-project/filecoin-ffi => ./extern/filecoin-ffi 100 | -------------------------------------------------------------------------------- /restore.go: -------------------------------------------------------------------------------- 1 | package graphsplit 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "io" 7 | "os" 8 | pa "path" 9 | "path/filepath" 10 | "strings" 11 | "sync" 12 | 13 | "github.com/ipfs/go-blockservice" 14 | "github.com/ipfs/go-cid" 15 | "github.com/ipfs/go-datastore" 16 | dss "github.com/ipfs/go-datastore/sync" 17 | bstore "github.com/ipfs/go-ipfs-blockstore" 18 | offline "github.com/ipfs/go-ipfs-exchange-offline" 19 | files "github.com/ipfs/go-libipfs/files" 20 | "github.com/ipfs/go-merkledag" 21 | unixfile "github.com/ipfs/go-unixfs/file" 22 | "github.com/ipld/go-car" 23 | "golang.org/x/xerrors" 24 | ) 25 | 26 | func Import(ctx context.Context, path string, st car.Store) (cid.Cid, error) { 27 | f, err := os.Open(path) 28 | if err != nil { 29 | return cid.Undef, err 30 | } 31 | defer f.Close() //nolint:errcheck 32 | 33 | stat, err := f.Stat() 34 | if err != nil { 35 | return cid.Undef, err 36 | } 37 | 38 | file, err := files.NewReaderPathFile(path, f, stat) 39 | if err != nil { 40 | return cid.Undef, err 41 | } 42 | 43 | result, err := car.LoadCar(ctx, st, file) 44 | if err != nil { 45 | return cid.Undef, err 46 | } 47 | 48 | if len(result.Roots) != 1 { 49 | return cid.Undef, xerrors.New("cannot import car with more than one root") 50 | } 51 | 52 | return result.Roots[0], nil 53 | } 54 | 55 | func NodeWriteTo(nd files.Node, fpath string) error { 56 | switch nd := nd.(type) { 57 | case *files.Symlink: 58 | return os.Symlink(nd.Target, fpath) 59 | case files.File: 60 | f, err := os.Create(fpath) 61 | if err != nil { 62 | return err 63 | } 64 | defer f.Close() 65 | _, err = io.Copy(f, nd) 66 | if err != nil { 67 | return err 68 | } 69 | return nil 70 | case files.Directory: 71 | if !ExistDir(fpath) { 72 | err := os.Mkdir(fpath, 0777) 73 | if err != nil && os.IsNotExist(err) { 74 | return err 75 | } 76 | } 77 | 78 | entries := nd.Entries() 79 | for entries.Next() { 80 | child := filepath.Join(fpath, entries.Name()) 81 | if err := NodeWriteTo(entries.Node(), child); err != nil { 82 | return err 83 | } 84 | } 85 | return entries.Err() 86 | default: 87 | return fmt.Errorf("file type %T at %q is not supported", nd, fpath) 88 | } 89 | } 90 | 91 | func ExistDir(path string) bool { 92 | s, err := os.Stat(path) 93 | if err != nil { 94 | return false 95 | } 96 | return s.IsDir() 97 | } 98 | 99 | func CarTo(carPath, outputDir string, parallel int) { 100 | ctx := context.Background() 101 | 102 | workerCh := make(chan func()) 103 | go func() { 104 | defer close(workerCh) 105 | err := filepath.Walk(carPath, func(path string, fi os.FileInfo, err error) error { 106 | if err != nil { 107 | return err 108 | } 109 | if fi.IsDir() { 110 | return nil 111 | } 112 | if strings.ToLower(pa.Ext(fi.Name())) != ".car" { 113 | log.Warn(path, ", it's not a CAR file, skip it") 114 | return nil 115 | } 116 | workerCh <- func() { 117 | bs2 := bstore.NewBlockstore(dss.MutexWrap(datastore.NewMapDatastore())) 118 | rdag := merkledag.NewDAGService(blockservice.New(bs2, offline.Exchange(bs2))) 119 | log.Info(path) 120 | root, err := Import(ctx, path, bs2) 121 | if err != nil { 122 | log.Error("import error, ", err) 123 | return 124 | } 125 | nd, err := rdag.Get(ctx, root) 126 | if err != nil { 127 | log.Error("dagService.Get error, ", err) 128 | return 129 | } 130 | file, err := unixfile.NewUnixfsFile(ctx, rdag, nd) 131 | if err != nil { 132 | log.Error("NewUnixfsFile error, ", err) 133 | return 134 | } 135 | defer file.Close() 136 | err = NodeWriteTo(file, outputDir) 137 | if err != nil { 138 | log.Error("NodeWriteTo error, ", err) 139 | } 140 | } 141 | return nil 142 | }) 143 | if err != nil { 144 | log.Error("Walk path failed, ", err) 145 | } 146 | }() 147 | 148 | limitCh := make(chan struct{}, parallel) 149 | wg := sync.WaitGroup{} 150 | func() { 151 | for { 152 | select { 153 | case taskFunc, ok := <-workerCh: 154 | if !ok { 155 | return 156 | } 157 | limitCh <- struct{}{} 158 | wg.Add(1) 159 | go func() { 160 | defer func() { 161 | <-limitCh 162 | wg.Done() 163 | }() 164 | taskFunc() 165 | }() 166 | } 167 | } 168 | }() 169 | wg.Wait() 170 | } 171 | 172 | func Merge(dir string, parallel int) { 173 | wg := sync.WaitGroup{} 174 | limitCh := make(chan struct{}, parallel) 175 | mergeCh := make(chan string) 176 | wg.Add(1) 177 | go func() { 178 | defer wg.Done() 179 | for { 180 | select { 181 | case fpath, ok := <-mergeCh: 182 | if !ok { 183 | return 184 | } 185 | limitCh <- struct{}{} 186 | wg.Add(1) 187 | go func() { 188 | defer func() { 189 | <-limitCh 190 | wg.Done() 191 | }() 192 | log.Info("merge to ", fpath) 193 | f, err := os.Create(fpath) 194 | if err != nil { 195 | log.Error("Create file failed, ", err) 196 | return 197 | } 198 | defer f.Close() 199 | for i := 0; ; i++ { 200 | chunkPath := fmt.Sprintf("%s.%08d", fpath, i) 201 | err := func(path string) error { 202 | chunkF, err := os.Open(path) 203 | if err != nil { 204 | if os.IsExist(err) { 205 | log.Error("Open file failed, ", err) 206 | } 207 | return err 208 | } 209 | defer chunkF.Close() 210 | _, err = io.Copy(f, chunkF) 211 | if err != nil { 212 | log.Error("io.Copy failed, ", err) 213 | } 214 | return err 215 | }(chunkPath) 216 | os.Remove(chunkPath) 217 | if err != nil { 218 | break 219 | } 220 | } 221 | }() 222 | } 223 | } 224 | }() 225 | err := filepath.Walk(dir, func(path string, fi os.FileInfo, err error) error { 226 | if err != nil { 227 | return err 228 | } 229 | if fi.IsDir() { 230 | return nil 231 | } 232 | matched, err := filepath.Match("*.00000000", fi.Name()) 233 | if err != nil { 234 | log.Error("filepath.Match failed, ", err) 235 | return nil 236 | } else if matched { 237 | mergeCh <- strings.TrimSuffix(path, ".00000000") 238 | } 239 | return nil 240 | }) 241 | if err != nil { 242 | log.Error("Walk path failed, ", err) 243 | } 244 | close(mergeCh) 245 | wg.Wait() 246 | } 247 | -------------------------------------------------------------------------------- /utils.go: -------------------------------------------------------------------------------- 1 | package graphsplit 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "fmt" 7 | "io" 8 | "io/ioutil" 9 | "os" 10 | "path" 11 | "runtime" 12 | "strings" 13 | "sync" 14 | "time" 15 | 16 | "github.com/ipfs/go-blockservice" 17 | "github.com/ipfs/go-cid" 18 | "github.com/ipfs/go-datastore" 19 | dss "github.com/ipfs/go-datastore/sync" 20 | bstore "github.com/ipfs/go-ipfs-blockstore" 21 | chunker "github.com/ipfs/go-ipfs-chunker" 22 | offline "github.com/ipfs/go-ipfs-exchange-offline" 23 | format "github.com/ipfs/go-ipld-format" 24 | "github.com/ipfs/go-merkledag" 25 | dag "github.com/ipfs/go-merkledag" 26 | "github.com/ipfs/go-unixfs" 27 | "github.com/ipfs/go-unixfs/importer/balanced" 28 | ihelper "github.com/ipfs/go-unixfs/importer/helpers" 29 | "golang.org/x/xerrors" 30 | 31 | ipld "github.com/ipfs/go-ipld-format" 32 | "github.com/ipld/go-car" 33 | ipldprime "github.com/ipld/go-ipld-prime" 34 | basicnode "github.com/ipld/go-ipld-prime/node/basic" 35 | "github.com/ipld/go-ipld-prime/traversal/selector" 36 | "github.com/ipld/go-ipld-prime/traversal/selector/builder" 37 | ) 38 | 39 | const UnixfsLinksPerLevel = 1 << 10 40 | const UnixfsChunkSize uint64 = 1 << 20 41 | 42 | type Finfo struct { 43 | Path string 44 | Name string 45 | Info os.FileInfo 46 | SeekStart int64 47 | SeekEnd int64 48 | } 49 | 50 | // file system tree node 51 | type fsNode struct { 52 | Name string 53 | Hash string 54 | Size uint64 55 | Link []fsNode 56 | } 57 | 58 | type FSBuilder struct { 59 | root *dag.ProtoNode 60 | ds ipld.DAGService 61 | } 62 | 63 | func NewFSBuilder(root *dag.ProtoNode, ds ipld.DAGService) *FSBuilder { 64 | return &FSBuilder{root, ds} 65 | } 66 | 67 | func (b *FSBuilder) Build() (*fsNode, error) { 68 | fsn, err := unixfs.FSNodeFromBytes(b.root.Data()) 69 | if err != nil { 70 | return nil, xerrors.Errorf("input dag is not a unixfs node: %s", err) 71 | } 72 | 73 | rootn := &fsNode{ 74 | Hash: b.root.Cid().String(), 75 | Size: fsn.FileSize(), 76 | Link: []fsNode{}, 77 | } 78 | if !fsn.IsDir() { 79 | return rootn, nil 80 | } 81 | for _, ln := range b.root.Links() { 82 | fn, err := b.getNodeByLink(ln) 83 | if err != nil { 84 | return nil, err 85 | } 86 | rootn.Link = append(rootn.Link, fn) 87 | } 88 | 89 | return rootn, nil 90 | } 91 | 92 | func (b *FSBuilder) getNodeByLink(ln *format.Link) (fn fsNode, err error) { 93 | ctx := context.Background() 94 | fn = fsNode{ 95 | Name: ln.Name, 96 | Hash: ln.Cid.String(), 97 | Size: ln.Size, 98 | } 99 | nd, err := b.ds.Get(ctx, ln.Cid) 100 | if err != nil { 101 | log.Warn(err) 102 | return 103 | } 104 | 105 | nnd, ok := nd.(*dag.ProtoNode) 106 | if !ok { 107 | err = xerrors.Errorf("failed to transformed to dag.ProtoNode") 108 | return 109 | } 110 | fsn, err := unixfs.FSNodeFromBytes(nnd.Data()) 111 | if err != nil { 112 | log.Warnf("input dag is not a unixfs node: %s", err) 113 | return 114 | } 115 | if !fsn.IsDir() { 116 | return 117 | } 118 | for _, ln := range nnd.Links() { 119 | node, err := b.getNodeByLink(ln) 120 | if err != nil { 121 | return node, err 122 | } 123 | fn.Link = append(fn.Link, node) 124 | } 125 | return 126 | } 127 | 128 | func BuildIpldGraph(ctx context.Context, fileList []Finfo, graphName, parentPath, carDir string, parallel int, cb GraphBuildCallback) { 129 | node, fsDetail, err := buildIpldGraph(ctx, fileList, parentPath, carDir, parallel) 130 | if err != nil { 131 | //log.Fatal(err) 132 | cb.OnError(err) 133 | return 134 | } 135 | cb.OnSuccess(node, graphName, fsDetail) 136 | } 137 | 138 | func buildIpldGraph(ctx context.Context, fileList []Finfo, parentPath, carDir string, parallel int) (ipld.Node, string, error) { 139 | bs2 := bstore.NewBlockstore(dss.MutexWrap(datastore.NewMapDatastore())) 140 | dagServ := merkledag.NewDAGService(blockservice.New(bs2, offline.Exchange(bs2))) 141 | 142 | cidBuilder, err := merkledag.PrefixForCidVersion(1) 143 | if err != nil { 144 | return nil, "", err 145 | } 146 | fileNodeMap := make(map[string]*dag.ProtoNode) 147 | dirNodeMap := make(map[string]*dag.ProtoNode) 148 | 149 | var rootNode *dag.ProtoNode 150 | rootNode = unixfs.EmptyDirNode() 151 | rootNode.SetCidBuilder(cidBuilder) 152 | var rootKey = "root" 153 | dirNodeMap[rootKey] = rootNode 154 | 155 | fmt.Println("************ start to build ipld **************") 156 | // build file node 157 | // parallel build 158 | cpun := runtime.NumCPU() 159 | if parallel > cpun { 160 | parallel = cpun 161 | } 162 | pchan := make(chan struct{}, parallel) 163 | wg := sync.WaitGroup{} 164 | lock := sync.Mutex{} 165 | for i, item := range fileList { 166 | wg.Add(1) 167 | go func(i int, item Finfo) { 168 | defer func() { 169 | <-pchan 170 | wg.Done() 171 | }() 172 | pchan <- struct{}{} 173 | fileNode, err := BuildFileNode(item, dagServ, cidBuilder) 174 | if err != nil { 175 | log.Warn(err) 176 | return 177 | } 178 | fn, ok := fileNode.(*dag.ProtoNode) 179 | if !ok { 180 | emsg := "file node should be *dag.ProtoNode" 181 | log.Warn(emsg) 182 | return 183 | } 184 | lock.Lock() 185 | fileNodeMap[item.Path] = fn 186 | lock.Unlock() 187 | fmt.Println(item.Path) 188 | log.Infof("file node: %s", fileNode) 189 | }(i, item) 190 | } 191 | wg.Wait() 192 | 193 | // build dir tree 194 | for _, item := range fileList { 195 | // log.Info(item.Path) 196 | // log.Infof("file name: %s, file size: %d, item size: %d, seek-start:%d, seek-end:%d", item.Name, item.Info.Size(), item.SeekEnd-item.SeekStart, item.SeekStart, item.SeekEnd) 197 | dirStr := path.Dir(item.Path) 198 | parentPath = path.Clean(parentPath) 199 | // when parent path equal target path, and the parent path is also a file path 200 | if parentPath == path.Clean(item.Path) { 201 | dirStr = "" 202 | } else if parentPath != "" && strings.HasPrefix(dirStr, parentPath) { 203 | dirStr = dirStr[len(parentPath):] 204 | } 205 | 206 | if strings.HasPrefix(dirStr, "/") { 207 | dirStr = dirStr[1:] 208 | } 209 | var dirList []string 210 | if dirStr == "" { 211 | dirList = []string{} 212 | } else { 213 | dirList = strings.Split(dirStr, "/") 214 | } 215 | fileNode, ok := fileNodeMap[item.Path] 216 | if !ok { 217 | panic("unexpected, missing file node") 218 | } 219 | if len(dirList) == 0 { 220 | dirNodeMap[rootKey].AddNodeLink(item.Name, fileNode) 221 | continue 222 | } 223 | //log.Info(item.Path) 224 | //log.Info(dirList) 225 | i := len(dirList) - 1 226 | for ; i >= 0; i-- { 227 | // get dirNodeMap by index 228 | var ok bool 229 | var dirNode *dag.ProtoNode 230 | var parentNode *dag.ProtoNode 231 | var parentKey string 232 | dir := dirList[i] 233 | dirKey := getDirKey(dirList, i) 234 | log.Info(dirList) 235 | log.Infof("dirKey: %s", dirKey) 236 | dirNode, ok = dirNodeMap[dirKey] 237 | if !ok { 238 | dirNode = unixfs.EmptyDirNode() 239 | dirNode.SetCidBuilder(cidBuilder) 240 | dirNodeMap[dirKey] = dirNode 241 | } 242 | // add file node to its nearest parent node 243 | if i == len(dirList)-1 { 244 | dirNode.AddNodeLink(item.Name, fileNode) 245 | } 246 | if i == 0 { 247 | parentKey = rootKey 248 | } else { 249 | parentKey = getDirKey(dirList, i-1) 250 | } 251 | log.Infof("parentKey: %s", parentKey) 252 | parentNode, ok = dirNodeMap[parentKey] 253 | if !ok { 254 | parentNode = unixfs.EmptyDirNode() 255 | parentNode.SetCidBuilder(cidBuilder) 256 | dirNodeMap[parentKey] = parentNode 257 | } 258 | if isLinked(parentNode, dir) { 259 | parentNode, err = parentNode.UpdateNodeLink(dir, dirNode) 260 | if err != nil { 261 | return nil, "", err 262 | } 263 | dirNodeMap[parentKey] = parentNode 264 | } else { 265 | parentNode.AddNodeLink(dir, dirNode) 266 | } 267 | } 268 | } 269 | 270 | for _, node := range dirNodeMap { 271 | //fmt.Printf("add node to store: %v\n", node) 272 | //fmt.Printf("key: %s, links: %v\n", key, len(node.Links())) 273 | dagServ.Add(ctx, node) 274 | } 275 | 276 | rootNode = dirNodeMap[rootKey] 277 | fmt.Printf("root node cid: %s\n", rootNode.Cid()) 278 | log.Infof("start to generate car for %s", rootNode.Cid()) 279 | genCarStartTime := time.Now() 280 | //car 281 | carF, err := os.Create(path.Join(carDir, rootNode.Cid().String()+".car")) 282 | if err != nil { 283 | return nil, "", err 284 | } 285 | defer carF.Close() 286 | selector := allSelector() 287 | sc := car.NewSelectiveCar(ctx, bs2, []car.Dag{{Root: rootNode.Cid(), Selector: selector}}) 288 | err = sc.Write(carF) 289 | // cario := cario.NewCarIO() 290 | // err = cario.WriteCar(context.Background(), bs2, rootNode.Cid(), selector, carF) 291 | if err != nil { 292 | return nil, "", err 293 | } 294 | log.Infof("generate car file completed, time elapsed: %s", time.Now().Sub(genCarStartTime)) 295 | 296 | fsBuilder := NewFSBuilder(rootNode, dagServ) 297 | fsNode, err := fsBuilder.Build() 298 | if err != nil { 299 | return nil, "", err 300 | } 301 | fsNodeBytes, err := json.Marshal(fsNode) 302 | if err != nil { 303 | return nil, "", err 304 | } 305 | //log.Info(dirNodeMap) 306 | fmt.Println("++++++++++++ finished to build ipld +++++++++++++") 307 | return rootNode, fmt.Sprintf("%s", fsNodeBytes), nil 308 | } 309 | 310 | func allSelector() ipldprime.Node { 311 | ssb := builder.NewSelectorSpecBuilder(basicnode.Prototype.Any) 312 | return ssb.ExploreRecursive(selector.RecursionLimitNone(), 313 | ssb.ExploreAll(ssb.ExploreRecursiveEdge())). 314 | Node() 315 | } 316 | 317 | func getDirKey(dirList []string, i int) (key string) { 318 | for j := 0; j <= i; j++ { 319 | key += dirList[j] 320 | if j < i { 321 | key += "." 322 | } 323 | } 324 | return 325 | } 326 | 327 | func isLinked(node *dag.ProtoNode, name string) bool { 328 | for _, lk := range node.Links() { 329 | if lk.Name == name { 330 | return true 331 | } 332 | } 333 | return false 334 | } 335 | 336 | type fileSlice struct { 337 | r *os.File 338 | offset int64 339 | start int64 340 | end int64 341 | fileSize int64 342 | } 343 | 344 | func (fs *fileSlice) Read(p []byte) (n int, err error) { 345 | if fs.end == 0 { 346 | fs.end = fs.fileSize - 1 347 | } 348 | if fs.offset == 0 && fs.start > 0 { 349 | _, err = fs.r.Seek(fs.start, 0) 350 | if err != nil { 351 | log.Warn(err) 352 | return 0, err 353 | } 354 | fs.offset = fs.start 355 | } 356 | //fmt.Printf("offset: %d, end: %d, start: %d, size: %d\n", fs.offset, fs.end, fs.start, fs.fileSize) 357 | if fs.end-fs.offset+1 == 0 { 358 | return 0, io.EOF 359 | } 360 | if fs.end-fs.offset+1 < 0 { 361 | return 0, xerrors.Errorf("read data out bound of the slice") 362 | } 363 | plen := len(p) 364 | leftLen := fs.end - fs.offset + 1 365 | if leftLen > int64(plen) { 366 | n, err = fs.r.Read(p) 367 | if err != nil { 368 | log.Warn(err) 369 | return 370 | } 371 | //fmt.Printf("read num: %d\n", n) 372 | fs.offset += int64(n) 373 | return 374 | } 375 | b := make([]byte, leftLen) 376 | n, err = fs.r.Read(b) 377 | if err != nil { 378 | return 379 | } 380 | //fmt.Printf("read num: %d\n", n) 381 | fs.offset += int64(n) 382 | 383 | return copy(p, b), io.EOF 384 | } 385 | 386 | func BuildFileNode(item Finfo, bufDs ipld.DAGService, cidBuilder cid.Builder) (node ipld.Node, err error) { 387 | var r io.Reader 388 | f, err := os.Open(item.Path) 389 | if err != nil { 390 | return nil, err 391 | } 392 | r = f 393 | 394 | // read all data of item 395 | if item.SeekStart > 0 || item.SeekEnd > 0 { 396 | r = &fileSlice{ 397 | r: f, 398 | start: item.SeekStart, 399 | end: item.SeekEnd, 400 | fileSize: item.Info.Size(), 401 | } 402 | } 403 | 404 | params := ihelper.DagBuilderParams{ 405 | Maxlinks: UnixfsLinksPerLevel, 406 | RawLeaves: false, 407 | CidBuilder: cidBuilder, 408 | Dagserv: bufDs, 409 | NoCopy: false, 410 | } 411 | db, err := params.New(chunker.NewSizeSplitter(r, int64(UnixfsChunkSize))) 412 | if err != nil { 413 | return nil, err 414 | } 415 | node, err = balanced.Layout(db) 416 | if err != nil { 417 | return nil, err 418 | } 419 | return 420 | } 421 | 422 | func GenGraphName(graphName string, sliceCount, sliceTotal int) string { 423 | if sliceTotal == 1 { 424 | return fmt.Sprintf("%s.car", graphName) 425 | } 426 | return fmt.Sprintf("%s-total-%d-part-%d.car", graphName, sliceTotal, sliceCount+1) 427 | } 428 | 429 | func GetGraphCount(args []string, sliceSize int64) int { 430 | list, err := GetFileList(args) 431 | if err != nil { 432 | panic(err) 433 | } 434 | var totalSize int64 = 0 435 | for _, path := range list { 436 | finfo, err := os.Stat(path) 437 | if err != nil { 438 | panic(err) 439 | } 440 | totalSize += finfo.Size() 441 | } 442 | if totalSize == 0 { 443 | return 0 444 | } 445 | count := (totalSize / sliceSize) + 1 446 | return int(count) 447 | } 448 | 449 | func GetFileListAsync(args []string) chan Finfo { 450 | fichan := make(chan Finfo, 0) 451 | go func() { 452 | defer close(fichan) 453 | for _, path := range args { 454 | finfo, err := os.Stat(path) 455 | if err != nil { 456 | log.Warn(err) 457 | return 458 | } 459 | // 忽略隐藏目录 460 | if strings.HasPrefix(finfo.Name(), ".") { 461 | continue 462 | } 463 | if finfo.IsDir() { 464 | files, err := ioutil.ReadDir(path) 465 | if err != nil { 466 | log.Warn(err) 467 | return 468 | } 469 | templist := make([]string, 0) 470 | for _, n := range files { 471 | templist = append(templist, fmt.Sprintf("%s/%s", path, n.Name())) 472 | } 473 | embededChan := GetFileListAsync(templist) 474 | if err != nil { 475 | log.Warn(err) 476 | return 477 | } 478 | 479 | for item := range embededChan { 480 | fichan <- item 481 | } 482 | } else { 483 | fichan <- Finfo{ 484 | Path: path, 485 | Name: finfo.Name(), 486 | Info: finfo, 487 | } 488 | } 489 | } 490 | }() 491 | 492 | return fichan 493 | } 494 | 495 | func GetFileList(args []string) (fileList []string, err error) { 496 | fileList = make([]string, 0) 497 | for _, path := range args { 498 | finfo, err := os.Stat(path) 499 | if err != nil { 500 | return nil, err 501 | } 502 | // 忽略隐藏目录 503 | if strings.HasPrefix(finfo.Name(), ".") { 504 | continue 505 | } 506 | if finfo.IsDir() { 507 | files, err := ioutil.ReadDir(path) 508 | if err != nil { 509 | return nil, err 510 | } 511 | templist := make([]string, 0) 512 | for _, n := range files { 513 | templist = append(templist, fmt.Sprintf("%s/%s", path, n.Name())) 514 | } 515 | list, err := GetFileList(templist) 516 | if err != nil { 517 | return nil, err 518 | } 519 | fileList = append(fileList, list...) 520 | } else { 521 | fileList = append(fileList, path) 522 | } 523 | } 524 | 525 | return 526 | } 527 | 528 | // piece info 529 | type PieceInfo struct { 530 | PayloadCid string `csv:"payload_cid"` 531 | Filename string `csv:"filename"` 532 | PieceCid string `csv:"piece_cid"` 533 | PieceSize uint64 `csv:"piece_size"` 534 | } 535 | 536 | // manifest 537 | type Manifest struct { 538 | PayloadCid string `csv:"payload_cid"` 539 | Filename string `csv:"filename"` 540 | } 541 | --------------------------------------------------------------------------------