├── .github └── workflows │ ├── ci.yml │ ├── docs-publish.yml │ └── release.yml ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── cmd ├── clone.go ├── config.go ├── diff.go ├── docs.go ├── get.go ├── init.go ├── list.go ├── log.go ├── pull.go ├── push.go ├── put.go ├── root.go ├── status.go ├── tag.go ├── util_test.go ├── utils.go └── version.go ├── docs ├── .gitignore ├── Makefile ├── README.md ├── assets │ ├── ArtiVC_workspace.svg │ ├── art-overview.png │ ├── cheatsheet.png │ └── sprites │ │ └── regular.svg ├── config │ └── _default │ │ ├── config.yaml │ │ ├── languages.yaml │ │ └── params.yaml ├── content │ └── en │ │ ├── _includes │ │ ├── _index.md │ │ └── include-page.md │ │ ├── _index.md │ │ ├── backends │ │ ├── _index.md │ │ ├── azureblob.md │ │ ├── gcs.md │ │ ├── local.md │ │ ├── rclone.md │ │ ├── s3.md │ │ └── ssh.md │ │ ├── commands │ │ ├── _index.md │ │ ├── avc.md │ │ ├── avc_clone.md │ │ ├── avc_completion.md │ │ ├── avc_completion_bash.md │ │ ├── avc_completion_fish.md │ │ ├── avc_completion_powershell.md │ │ ├── avc_completion_zsh.md │ │ ├── avc_config.md │ │ ├── avc_diff.md │ │ ├── avc_docs.md │ │ ├── avc_get.md │ │ ├── avc_init.md │ │ ├── avc_list.md │ │ ├── avc_log.md │ │ ├── avc_pull.md │ │ ├── avc_push.md │ │ ├── avc_put.md │ │ ├── avc_status.md │ │ ├── avc_tag.md │ │ ├── avc_version.md │ │ └── images │ │ │ └── geekdoc-dark.png │ │ ├── design │ │ ├── _index.md │ │ ├── alternatives.md │ │ ├── benchmark.md │ │ ├── faq.md │ │ ├── how-it-works.md │ │ ├── images │ │ │ ├── artiv-overview.png │ │ │ ├── benchmark1.svg │ │ │ ├── benchmark2.svg │ │ │ └── benchmark3.svg │ │ └── mlops.md │ │ ├── posts │ │ ├── _index.md │ │ ├── initial-release.md │ │ └── rename-project.md │ │ ├── usage │ │ ├── _index.md │ │ ├── cheatsheet.md │ │ ├── dryrun.md │ │ ├── expose.md │ │ ├── getting-started.md │ │ ├── ignore-file.md │ │ ├── images │ │ │ └── cheatsheet.png │ │ ├── partial-download.md │ │ └── windows-supports.md │ │ └── use-cases │ │ ├── _index.md │ │ ├── backup.md │ │ ├── dataprep.md │ │ └── experiment.md ├── data │ └── menu │ │ ├── extra.yaml │ │ ├── main.yaml │ │ └── more.yaml ├── layouts │ └── shortcodes │ │ └── sprites.html └── static │ ├── .htaccess │ ├── ArtiVC_workspace.png │ ├── _includes │ ├── example.html.part │ └── example.md.part │ ├── brand.svg │ ├── custom.css │ ├── custom.css.example │ ├── favicon │ ├── android-chrome-192x192.png │ ├── android-chrome-512x512.png │ ├── apple-touch-icon.png │ ├── favicon-16x16.png │ ├── favicon-32x32.png │ ├── favicon.ico │ └── favicon.svg │ ├── media │ ├── bundle-menu.png │ ├── file-tree.png │ └── more-menu.png │ └── socialartiv.png ├── go.mod ├── go.sum ├── internal ├── core │ ├── config.go │ ├── errors.go │ ├── hash.go │ ├── manager.go │ ├── manager_test.go │ ├── types.go │ ├── utils.go │ └── utils_test.go ├── executor │ ├── executor.go │ └── executor_test.go ├── log │ └── log.go └── repository │ ├── azureblob.go │ ├── azureblob_test.go │ ├── errors.go │ ├── gcs.go │ ├── http.go │ ├── local.go │ ├── local_test.go │ ├── meter.go │ ├── rclone.go │ ├── repo_integration_test.go │ ├── repository.go │ ├── repository_test.go │ ├── s3.go │ └── ssh.go └── main.go /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Go test 2 | 3 | on: 4 | push: 5 | branches: "*" 6 | pull_request: 7 | branches: "*" 8 | 9 | jobs: 10 | unit-test: 11 | strategy: 12 | matrix: 13 | os: [ubuntu-latest, macos-latest] 14 | go: [1.17, 1.18, 1.19, '1.20'] 15 | include: 16 | - os: ubuntu-latest 17 | go-build: ~/.cache/go-build 18 | name: ${{ matrix.os }} @ Go ${{ matrix.go }} 19 | runs-on: ${{ matrix.os }} 20 | steps: 21 | - name: Set up Go ${{ matrix.go }} 22 | uses: actions/setup-go@v2 23 | with: 24 | go-version: ${{ matrix.go }} 25 | 26 | - name: Checkout Code 27 | uses: actions/checkout@v3 28 | with: 29 | ref: ${{ github.ref }} 30 | 31 | - uses: actions/cache@v2 32 | with: 33 | path: | 34 | ${{ matrix.go-build }} 35 | ~/go/pkg/mod 36 | key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} 37 | restore-keys: | 38 | ${{ runner.os }}-go- 39 | - name: Run Tests 40 | run: | 41 | make test 42 | 43 | it-test-s3: 44 | runs-on: ubuntu-latest 45 | steps: 46 | - uses: actions/checkout@v2 47 | 48 | - name: Set up Go 49 | uses: actions/setup-go@v2 50 | with: 51 | go-version: 1.17 52 | 53 | - name: Run test 54 | run: make integration-test 55 | env: 56 | TEST_REPOSITORY: ${{ secrets.REPOSITORY_S3 }} 57 | AWS_REGION: ${{ secrets.AWS_REGION }} 58 | AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} 59 | AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 60 | -------------------------------------------------------------------------------- /.github/workflows/docs-publish.yml: -------------------------------------------------------------------------------- 1 | name: Netlify Publish 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | publish: 9 | runs-on: ubuntu-20.04 10 | steps: 11 | - name: Setup Hugo 12 | uses: peaceiris/actions-hugo@v2 13 | with: 14 | hugo-version: latest 15 | 16 | - name: Checkout files 17 | uses: actions/checkout@v2 18 | 19 | - name: Build docs files 20 | run: make -C docs/ build 21 | 22 | - name: Deploy to Netlify 23 | uses: nwtgck/actions-netlify@v1.2 24 | with: 25 | publish-dir: ./docs/public 26 | deploy-message: "Deploy from GitHub Actions with tag ${{ github.event.release.tag_name }}" 27 | env: 28 | NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }} 29 | NETLIFY_SITE_ID: ${{ secrets.NETLIFY_SITE_ID }} 30 | timeout-minutes: 1 31 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Upload to release assets 2 | 3 | on: 4 | release: 5 | types: [ created ] 6 | 7 | jobs: 8 | releases-matrix: 9 | name: Release Go Binary 10 | runs-on: ubuntu-latest 11 | strategy: 12 | matrix: 13 | goos: [ linux, darwin ] 14 | goarch: [ arm64, amd64 ] 15 | steps: 16 | - name: Show environment 17 | run: export 18 | - uses: actions/checkout@v2 19 | - uses: wangyoucao577/go-release-action@v1.25 20 | with: 21 | github_token: ${{ secrets.GITHUB_TOKEN }} 22 | goos: ${{ matrix.goos }} 23 | goarch: ${{ matrix.goarch }} 24 | build_command: "make" 25 | build_flags: "build" 26 | ldflags: "-I." 27 | extra_files: bin/avc 28 | pre_command: "git clean -df" 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # If you prefer the allow list template instead of the deny list, see community template: 2 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore 3 | # 4 | # Binaries for programs and plugins 5 | *.exe 6 | *.exe~ 7 | *.dll 8 | *.so 9 | *.dylib 10 | 11 | # Test binary, built with `go test -c` 12 | *.test 13 | 14 | # Output of the go coverage tool, specifically when used with LiteIDE 15 | *.out 16 | 17 | # Dependency directories (remove the comment below to include it) 18 | # vendor/ 19 | 20 | # Go workspace file 21 | go.work 22 | 23 | main 24 | .vscode 25 | bin 26 | .DS_Store 27 | 28 | .idea 29 | generated_docs 30 | .avc 31 | .avcignore 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | VERSION = 2 | LDFLAGS = 3 | GO ?= go 4 | 5 | GIT_COMMIT = $(shell git rev-parse HEAD) 6 | GIT_SHA = $(shell git rev-parse --short HEAD) 7 | GIT_TAG = $(shell git describe --tags --abbrev=0 --exact-match 2>/dev/null) 8 | GIT_DIRTY = $(shell test -n "`git status --porcelain`" && echo "dirty" || echo "clean") 9 | 10 | ifeq ($(VERSION),) 11 | VERSION := $(shell echo $${GITHUB_REF_NAME}) 12 | endif 13 | 14 | LDFLAGS += -X github.com/infuseai/artivc/cmd.tagVersion=${VERSION} 15 | LDFLAGS += -X github.com/infuseai/artivc/cmd.gitCommit=${GIT_COMMIT} 16 | LDFLAGS += -X github.com/infuseai/artivc/cmd.gitTreeState=${GIT_DIRTY} 17 | LDFLAGS += -s -w 18 | LDFLAGS += $(EXT_LDFLAGS) 19 | 20 | 21 | build: 22 | mkdir -p bin 23 | $(GO) build -o bin/avc -ldflags '$(LDFLAGS)' main.go 24 | 25 | test: 26 | $(GO) test ./... 27 | 28 | integration-test: 29 | $(GO) test -v ./internal/repository 30 | 31 | .PHONY: doc-server 32 | doc-server: 33 | make -C docs/ start 34 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

4 | 5 |

6 | 7 | Apache License 8 | 9 | Go tests status 10 |

11 | 12 | 13 | # ArtiVC 14 | 15 | [ArtiVC](https://artivc.io/) (**Arti**facts **V**ersion **C**ontrol) is a handy command-line tool for data versioning on cloud storage. With only one command, it helps you neatly snapshot your data and Switch data between versions. Even better, it seamlessly integrates your existing cloud environment. ArtiVC supports three major cloud providers (AWS S3, Google Cloud Storage, Azure Blob Storage) and the remote filesystem using SSH. 16 | 17 | [![asciicast](https://asciinema.org/a/6JEhzpJ5QMiSkiC74s5CyT257.svg)](https://asciinema.org/a/6JEhzpJ5QMiSkiC74s5CyT257?autoplay=1) 18 | 19 | Try it out from the [Getting Started](https://artivc.io/usage/getting-started/) guide 20 | 21 | # Features 22 | 23 | - **Data Versioning**: Version your data like versioning code. ArtiVC supports commit history, commit message, and version tag. You can diff two commits, and pull data from the specific version. 24 | - **Use your own storage**: We are used to putting large files in NFS or S3. To use ArtiVC, you can keep putting your files on the same storage without changes. 25 | - **No additional server is required**: ArtiVC is a CLI tool. No server or gateway is required to install and operate. 26 | - **Multiple backends support**: ArtiVC natively supports local filesystem, remote filesystem (by SSH), AWS S3, Google Cloud Storage, and Azure Blob Storage as backend. And 40+ backends are supported through [Rclone](https://artivc.io/backends/rclone/) integration. [Learn more](https://artivc.io/backends/) 27 | - **Painless Configuration**: No one like to configure. So we leverage the original configuration as much as possible. Use `.ssh/config` for ssh access, and use `aws configure`, `gcloud auth application-default login`, `az login` for the cloud platforms. 28 | - **Efficient storage and transfer**: The file structure of the repository is stored and transferred efficiently by [design](https://artivc.io/design/how-it-works/). It prevents storing duplicated content and minimum the number of files to upload when pushing a new version. [Learn more](https://artivc.io/design/benchmark/) 29 | 30 | # Documentation 31 | 32 | For more detail, please read the [ArtiVC documentation](https://artivc.io/usage/getting-started/) 33 | -------------------------------------------------------------------------------- /cmd/clone.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "io/fs" 7 | "os" 8 | "path/filepath" 9 | "strings" 10 | 11 | "github.com/infuseai/artivc/internal/core" 12 | "github.com/infuseai/artivc/internal/repository" 13 | "github.com/spf13/cobra" 14 | ) 15 | 16 | var cloneCommand = &cobra.Command{ 17 | Use: "clone []", 18 | Short: "Clone a workspace", 19 | DisableFlagsInUseLine: true, 20 | Example: ` # clone a workspace with local repository 21 | avc clone /path/to/mydataset 22 | 23 | # clone a workspace with s3 repository 24 | avc clone s3://mybucket/path/to/mydataset`, 25 | Args: cobra.RangeArgs(1, 2), 26 | Run: func(cmd *cobra.Command, args []string) { 27 | cwd, err := os.Getwd() 28 | exitWithError(err) 29 | result, err := repository.ParseRepo(args[0]) 30 | exitWithError(err) 31 | repo := result.Repo 32 | 33 | if strings.HasPrefix(repo, "http") && !repository.IsAzureStorageUrl(repo) { 34 | exitWithError(errors.New("clone not support under http(s) repo")) 35 | } 36 | 37 | _, err = repository.NewRepository(result) 38 | exitWithError(err) 39 | 40 | destDir, err := repository.ParseRepoName(result) 41 | exitWithError(err) 42 | 43 | if len(args) > 1 { 44 | destDir = args[1] 45 | } 46 | 47 | baseDir := filepath.Join(cwd, destDir) 48 | err = os.Mkdir(baseDir, fs.ModePerm) 49 | if err == nil || (os.IsExist(err) && isDirEmpty(baseDir)) { 50 | // pass 51 | } else if os.IsExist(err) { 52 | exitWithFormat("fatal: destination path '%s' already exists and is not an empty directory.", destDir) 53 | } else { 54 | exitWithFormat("fatal: cannot create destination path '%s'.", destDir) 55 | } 56 | fmt.Printf("Cloning into '%s'...\n", destDir) 57 | 58 | exitWithError(core.InitWorkspace(baseDir, repo)) 59 | 60 | config, err := core.LoadConfig(baseDir) 61 | exitWithError(err) 62 | 63 | mngr, err := core.NewArtifactManager(config) 64 | exitWithError(err) 65 | 66 | err = mngr.Pull(core.PullOptions{}) 67 | if err != nil { 68 | os.RemoveAll(baseDir) // remove created dir 69 | exitWithError(err) 70 | } 71 | }, 72 | } 73 | -------------------------------------------------------------------------------- /cmd/config.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "os" 7 | "strings" 8 | 9 | "github.com/infuseai/artivc/internal/core" 10 | "github.com/infuseai/artivc/internal/repository" 11 | "github.com/spf13/cobra" 12 | ) 13 | 14 | var configCommand = &cobra.Command{ 15 | Use: "config [ []]", 16 | Short: "Configure the workspace", 17 | Long: "Configure the workspace. The config file is stored at \".avc/config\".", 18 | DisableFlagsInUseLine: true, 19 | Example: ` # List the config 20 | avc config 21 | 22 | # Get the config 23 | avc config repo.url 24 | 25 | # Set the config 26 | avc config repo.url s3://your-bucket/data`, 27 | Args: cobra.RangeArgs(0, 2), 28 | Run: func(cmd *cobra.Command, args []string) { 29 | config, err := core.LoadConfig("") 30 | exitWithError(err) 31 | 32 | switch len(args) { 33 | case 0: 34 | config.Print() 35 | case 1: 36 | value := config.Get(args[0]) 37 | if value != nil { 38 | fmt.Println(value) 39 | } else { 40 | fmt.Fprintf(os.Stderr, "key not found: %s\n", args[0]) 41 | } 42 | case 2: 43 | key := args[0] 44 | value := args[1] 45 | if key == "repo.url" { 46 | if strings.HasPrefix(value, "http") && !repository.IsAzureStorageUrl(value) { 47 | exitWithError(errors.New("http(s) repository is not supported")) 48 | } 49 | 50 | result, err := repository.ParseRepo(value) 51 | exitWithError(err) 52 | 53 | _, err = repository.NewRepository(result) 54 | exitWithError(err) 55 | } 56 | 57 | config.Set(key, value) 58 | exitWithError(config.Save()) 59 | } 60 | }, 61 | } 62 | -------------------------------------------------------------------------------- /cmd/diff.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/infuseai/artivc/internal/core" 5 | "github.com/spf13/cobra" 6 | ) 7 | 8 | var diffCommand = &cobra.Command{ 9 | Use: "diff", 10 | Short: "Diff workspace/commits/references", 11 | Example: `# Diff two version 12 | avc diff v0.1.0 v0.2.0`, 13 | Args: cobra.ExactArgs(2), 14 | Run: func(cmd *cobra.Command, args []string) { 15 | left := args[0] 16 | right := args[1] 17 | config, err := core.LoadConfig("") 18 | exitWithError(err) 19 | 20 | mngr, err := core.NewArtifactManager(config) 21 | exitWithError(err) 22 | 23 | err = mngr.Fetch() 24 | exitWithError(err) 25 | 26 | result, err := mngr.Diff(core.DiffOptions{ 27 | LeftRef: left, 28 | RightRef: right, 29 | }) 30 | exitWithError(err) 31 | 32 | result.Print(true) 33 | }, 34 | } 35 | -------------------------------------------------------------------------------- /cmd/docs.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "io/fs" 5 | "os" 6 | "path" 7 | "strings" 8 | 9 | "github.com/spf13/cobra" 10 | "github.com/spf13/cobra/doc" 11 | ) 12 | 13 | var docsCommand = &cobra.Command{ 14 | Use: "docs", 15 | Short: "Generate docs", 16 | Long: `Generate docs. For example: 17 | 18 | avc docs`, 19 | Run: func(cmd *cobra.Command, args []string) { 20 | const DocDir = "./generated_docs" 21 | err := os.Mkdir(DocDir, fs.ModePerm) 22 | 23 | if err == nil || (err != nil && os.IsExist(err)) { 24 | // pass when directory existing 25 | } else { 26 | exitWithFormat("Failed to create %s, skip to generate documents\n", DocDir) 27 | } 28 | linkHandler := func(name string) string { 29 | base := strings.TrimSuffix(name, path.Ext(name)) 30 | return "/commands/" + strings.ToLower(base) + "/" 31 | } 32 | 33 | exitWithError(doc.GenMarkdownTreeCustom(cmd.Root(), DocDir, func(filestring string) string { return "" }, linkHandler)) 34 | }, 35 | } 36 | -------------------------------------------------------------------------------- /cmd/get.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "errors" 5 | "os" 6 | "path/filepath" 7 | "strings" 8 | 9 | "github.com/infuseai/artivc/internal/core" 10 | "github.com/spf13/cobra" 11 | ) 12 | 13 | // getCmd represents the download command 14 | var getCmd = &cobra.Command{ 15 | Use: "get [-o ] [@|] [--] ...", 16 | DisableFlagsInUseLine: true, 17 | Short: "Download data from a repository", 18 | Example: ` # Download the latest version. The data go to "mydataset" folder. 19 | avc get s3://bucket/mydataset 20 | 21 | # Download the specific version 22 | avc get s3://mybucket/path/to/mydataset@v1.0.0 23 | 24 | # Download to a specific folder 25 | avc get -o /tmp/mydataset s3://bucket/mydataset 26 | 27 | # Download partial files 28 | avc get -o /tmp/mydataset s3://bucket/mydataset -- path/to/file1 path/to/file2 data/`, 29 | Args: cobra.MinimumNArgs(1), 30 | Run: func(cmd *cobra.Command, args []string) { 31 | var err error 32 | 33 | repoUrl, ref, err := parseRepoStr(args[0]) 34 | exitWithError(err) 35 | 36 | baseDir, err := cmd.Flags().GetString("output") 37 | exitWithError(err) 38 | 39 | if baseDir == "" { 40 | comps := strings.Split(repoUrl, "/") 41 | if len(comps) == 0 { 42 | exitWithFormat("invlaid path: %v", repoUrl) 43 | } 44 | baseDir = comps[len(comps)-1] 45 | } 46 | baseDir, err = filepath.Abs(baseDir) 47 | exitWithError(err) 48 | 49 | metadataDir, _ := os.MkdirTemp(os.TempDir(), "*-avc") 50 | defer os.RemoveAll(metadataDir) 51 | 52 | config := core.NewConfig(baseDir, metadataDir, repoUrl) 53 | 54 | mngr, err := core.NewArtifactManager(config) 55 | exitWithError(err) 56 | 57 | options := core.PullOptions{NoFetch: true} 58 | if ref != "" { 59 | options.RefOrCommit = &ref 60 | } 61 | 62 | options.Delete, err = cmd.Flags().GetBool("delete") 63 | exitWithError(err) 64 | 65 | if len(args) > 1 { 66 | if options.Delete { 67 | exitWithError(errors.New("cannot download partial files and specify delete flag at the same time")) 68 | } 69 | fileInclude := core.NewAvcInclude(args[1:]) 70 | options.FileFilter = func(path string) bool { 71 | return fileInclude.MatchesPath(path) 72 | } 73 | } 74 | exitWithError(mngr.Pull(options)) 75 | }, 76 | } 77 | 78 | func init() { 79 | getCmd.Flags().StringP("output", "o", "", "Output directory") 80 | getCmd.Flags().Bool("delete", false, "Delete extra files which are not listed in commit") 81 | } 82 | -------------------------------------------------------------------------------- /cmd/init.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "os" 7 | "strings" 8 | 9 | "github.com/infuseai/artivc/internal/core" 10 | "github.com/infuseai/artivc/internal/repository" 11 | "github.com/spf13/cobra" 12 | ) 13 | 14 | var initCommand = &cobra.Command{ 15 | Use: "init ", 16 | Short: "Initiate a workspace", 17 | DisableFlagsInUseLine: true, 18 | Example: ` # Init a workspace with local repository 19 | avc init /path/to/mydataset 20 | 21 | # Init a workspace with s3 repository 22 | avc init s3://mybucket/path/to/mydataset`, 23 | Args: cobra.ExactArgs(1), 24 | Run: func(cmd *cobra.Command, args []string) { 25 | cwd, err := os.Getwd() 26 | exitWithError(err) 27 | 28 | result, err := repository.ParseRepo(args[0]) 29 | exitWithError(err) 30 | repo := result.Repo 31 | 32 | if strings.HasPrefix(repo, "http") && !repository.IsAzureStorageUrl(repo) { 33 | exitWithError(errors.New("init not support under http(s) repo")) 34 | } 35 | 36 | _, err = repository.NewRepository(result) 37 | exitWithError(err) 38 | 39 | fmt.Printf("Initialize the artivc workspace of the repository '%s'\n", repo) 40 | exitWithError(core.InitWorkspace(cwd, repo)) 41 | }, 42 | } 43 | 44 | func init() { 45 | } 46 | -------------------------------------------------------------------------------- /cmd/list.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/infuseai/artivc/internal/core" 5 | "github.com/spf13/cobra" 6 | ) 7 | 8 | var listCommand = &cobra.Command{ 9 | Use: "list", 10 | Short: "List files of a commit", 11 | Aliases: []string{"ls"}, 12 | Example: ` # List files for the latest version 13 | avc list 14 | 15 | # List files for the specific version 16 | avc list v1.0.0`, 17 | Args: cobra.RangeArgs(0, 1), 18 | Run: func(cmd *cobra.Command, args []string) { 19 | var ref string 20 | if len(args) == 0 { 21 | ref = core.RefLatest 22 | } else { 23 | ref = args[0] 24 | } 25 | 26 | config, err := core.LoadConfig("") 27 | exitWithError(err) 28 | 29 | mngr, err := core.NewArtifactManager(config) 30 | exitWithError(err) 31 | 32 | exitWithError(mngr.List(ref)) 33 | }, 34 | } 35 | 36 | func init() { 37 | } 38 | -------------------------------------------------------------------------------- /cmd/log.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/infuseai/artivc/internal/core" 5 | "github.com/spf13/cobra" 6 | ) 7 | 8 | var logCommand = &cobra.Command{ 9 | Use: "log [|]", 10 | DisableFlagsInUseLine: true, 11 | Short: "Log commits", 12 | Example: ` # Log commits from the latest 13 | avc log 14 | 15 | # Log commits from a specific version 16 | avc log v1.0.0`, 17 | Args: cobra.RangeArgs(0, 1), 18 | Run: func(cmd *cobra.Command, args []string) { 19 | config, err := core.LoadConfig("") 20 | exitWithError(err) 21 | 22 | var ref string 23 | if len(args) == 0 { 24 | ref = core.RefLatest 25 | } else { 26 | ref = args[0] 27 | } 28 | 29 | mngr, err := core.NewArtifactManager(config) 30 | exitWithError(err) 31 | 32 | exitWithError(mngr.Log(ref)) 33 | }, 34 | } 35 | 36 | func init() { 37 | } 38 | -------------------------------------------------------------------------------- /cmd/pull.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "errors" 5 | 6 | "github.com/infuseai/artivc/internal/core" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | // getCmd represents the download command 11 | var pullCmd = &cobra.Command{ 12 | Use: "pull [|] [flags] -- ...", 13 | Short: "Pull data from the repository", 14 | Example: ` # Pull the latest version 15 | avc pull 16 | 17 | # Pull from a specifc version 18 | avc pull v1.0.0 19 | 20 | # Pull partial files 21 | avc pull -- path/to/partia 22 | avc pull v0.1.0 -- path/to/partia ...`, 23 | Run: func(cmd *cobra.Command, args []string) { 24 | config, err := core.LoadConfig("") 25 | exitWithError(err) 26 | 27 | mngr, err := core.NewArtifactManager(config) 28 | exitWithError(err) 29 | 30 | // options 31 | option := core.PullOptions{} 32 | 33 | option.DryRun, err = cmd.Flags().GetBool("dry-run") 34 | exitWithError(err) 35 | 36 | option.Delete, err = cmd.Flags().GetBool("delete") 37 | exitWithError(err) 38 | 39 | argsLenBeforeDash := cmd.Flags().ArgsLenAtDash() 40 | if argsLenBeforeDash == -1 { 41 | if len(args) == 1 { 42 | option.RefOrCommit = &args[0] 43 | } else if len(args) > 1 { 44 | exitWithError(errors.New("please specify \"--\" flag teminator")) 45 | } 46 | } else { 47 | if argsLenBeforeDash == 1 { 48 | option.RefOrCommit = &args[0] 49 | } 50 | 51 | if len(args)-argsLenBeforeDash > 0 { 52 | if option.Delete { 53 | exitWithError(errors.New("cannot pull partial files and specify delete flag at the same time")) 54 | } 55 | 56 | fileInclude := core.NewAvcInclude(args[argsLenBeforeDash:]) 57 | option.FileFilter = func(path string) bool { 58 | return fileInclude.MatchesPath(path) 59 | } 60 | } 61 | } 62 | 63 | exitWithError(mngr.Pull(option)) 64 | }, 65 | } 66 | 67 | func init() { 68 | pullCmd.Flags().Bool("dry-run", false, "Dry run") 69 | pullCmd.Flags().Bool("delete", false, "Delete extra files which are not listed in commit") 70 | } 71 | -------------------------------------------------------------------------------- /cmd/push.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/infuseai/artivc/internal/core" 5 | "github.com/spf13/cobra" 6 | ) 7 | 8 | // getCmd represents the download command 9 | var pushCmd = &cobra.Command{ 10 | Use: "push [-m ]", 11 | DisableFlagsInUseLine: true, 12 | Short: "Push data to the repository", 13 | Long: `Push data to the repository. There is no branch implemented yet, all put and push commands are always creating a commit and treat as the latest commit.`, 14 | Example: ` # Push to the latest version 15 | avc push -m 'Initial version' 16 | 17 | # Push to the latest version and tag to specific version 18 | avc push -m 'Initial version' 19 | avc tag v1.0.0`, 20 | Args: cobra.NoArgs, 21 | Run: func(cmd *cobra.Command, args []string) { 22 | config, err := core.LoadConfig("") 23 | exitWithError(err) 24 | 25 | // options 26 | option := core.PushOptions{} 27 | message, err := cmd.Flags().GetString("message") 28 | exitWithError(err) 29 | 30 | if message != "" { 31 | option.Message = &message 32 | } 33 | 34 | option.DryRun, err = cmd.Flags().GetBool("dry-run") 35 | exitWithError(err) 36 | 37 | // push 38 | mngr, err := core.NewArtifactManager(config) 39 | exitWithError(err) 40 | 41 | exitWithError(mngr.Push(option)) 42 | }, 43 | } 44 | 45 | func init() { 46 | pushCmd.Flags().StringP("message", "m", "", "Commit meessage") 47 | pushCmd.Flags().Bool("dry-run", false, "Dry run") 48 | } 49 | -------------------------------------------------------------------------------- /cmd/put.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "os" 5 | "path/filepath" 6 | 7 | "github.com/infuseai/artivc/internal/core" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | var putCmd = &cobra.Command{ 12 | Use: "put [-m ] [@]", 13 | DisableFlagsInUseLine: true, 14 | Short: "Upload data to a repository", 15 | Example: ` # Upload the latest version 16 | avc put ./folder/ /path/to/mydataset 17 | 18 | # Upload the specific version 19 | avc put ./folder/ /path/to/mydataset@v1.0.0`, 20 | Args: cobra.ExactArgs(2), 21 | Run: func(cmd *cobra.Command, args []string) { 22 | baseDir, err := filepath.Abs(args[0]) 23 | exitWithError(err) 24 | 25 | repoUrl, ref, err := parseRepoStr(args[1]) 26 | exitWithError(err) 27 | 28 | // options 29 | option := core.PushOptions{} 30 | message, err := cmd.Flags().GetString("message") 31 | exitWithError(err) 32 | 33 | if message != "" { 34 | option.Message = &message 35 | } 36 | if ref != "" { 37 | option.Tag = &ref 38 | } 39 | 40 | // Create temp metadata 41 | metadataDir, _ := os.MkdirTemp(os.TempDir(), "*-avc") 42 | defer os.RemoveAll(metadataDir) 43 | 44 | config := core.NewConfig(baseDir, metadataDir, repoUrl) 45 | 46 | // push 47 | mngr, err := core.NewArtifactManager(config) 48 | exitWithError(err) 49 | 50 | exitWithError(mngr.Push(option)) 51 | }, 52 | } 53 | 54 | func init() { 55 | putCmd.Flags().StringP("message", "m", "", "Commit meessage") 56 | } 57 | -------------------------------------------------------------------------------- /cmd/root.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "os" 5 | 6 | "github.com/infuseai/artivc/internal/log" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | var debug bool 11 | 12 | // rootCmd represents the base command when called without any subcommands 13 | var rootCmd = &cobra.Command{ 14 | Use: "avc", 15 | Short: "ArtiVC is a version control system for large files", 16 | Example: ` # Push data to the repository 17 | cd /path/to/my/data 18 | avc init s3://mybucket/path/to/repo 19 | avc push -m "my first commit" 20 | 21 | # Pull data from the repository 22 | cd /path/to/download 23 | avc init s3://mybucket/path/to/repo 24 | avc pull 25 | 26 | # Download by quick command 27 | avc get -o /path/to/download s3://mybucket/path/to/repo 28 | 29 | # Show command help 30 | avc -h 31 | 32 | For more information, please check https://github.com/infuseai/artivc`, 33 | PersistentPreRun: func(cmd *cobra.Command, args []string) { 34 | log.SetDebug(debug) 35 | }, 36 | } 37 | 38 | // Execute adds all child commands to the root command and sets flags appropriately. 39 | // This is called by main.main(). It only needs to happen once to the rootCmd. 40 | func Execute() { 41 | err := rootCmd.Execute() 42 | if err != nil { 43 | os.Exit(1) 44 | } 45 | } 46 | 47 | func init() { 48 | cobra.EnableCommandSorting = false 49 | rootCmd.PersistentFlags().BoolVar(&debug, "debug", false, "enable the debug message") 50 | 51 | rootCmd.SetUsageTemplate(usageTemplate) 52 | 53 | addCommandWithGroup(GROUP_QUICK, 54 | getCmd, 55 | putCmd, 56 | ) 57 | 58 | addCommandWithGroup(GROUP_BASIC, 59 | initCommand, 60 | cloneCommand, 61 | configCommand, 62 | statusCommand, 63 | pullCmd, 64 | pushCmd, 65 | tagCommand, 66 | listCommand, 67 | logCommand, 68 | diffCommand, 69 | ) 70 | 71 | addCommandWithGroup("", 72 | versionCommand, 73 | docsCommand, 74 | ) 75 | } 76 | 77 | func addCommandWithGroup(group string, cmds ...*cobra.Command) { 78 | for _, cmd := range cmds { 79 | cmd.Annotations = map[string]string{ 80 | "group": group, 81 | } 82 | } 83 | 84 | rootCmd.AddCommand(cmds...) 85 | } 86 | 87 | var usageTemplate = `{{- /* usage template */ -}} 88 | {{define "command" -}} 89 | {{if (or .IsAvailableCommand (eq .Name "help"))}} 90 | {{rpad .Name .NamePadding }} {{.Short}} 91 | {{- end -}} 92 | {{- end -}} 93 | {{- /* 94 | Body 95 | */ 96 | -}} 97 | Usage:{{if .Runnable}} 98 | {{.UseLine}}{{end}}{{if .HasAvailableSubCommands}} 99 | {{.CommandPath}} [command]{{end}}{{if gt (len .Aliases) 0}} 100 | 101 | Aliases: 102 | {{.NameAndAliases}}{{end}}{{if .HasExample}} 103 | 104 | Examples: 105 | {{.Example}}{{end}} 106 | {{if .HasAvailableSubCommands}} 107 | {{- if not .HasParent}} 108 | Basic Commands:{{range .Commands}}{{if (eq .Annotations.group "basic")}}{{template "command" .}}{{end}}{{end}} 109 | 110 | Quick Commands (Download or upload without a workspace):{{range .Commands}}{{if (eq .Annotations.group "quick")}}{{template "command" .}}{{end}}{{end}} 111 | 112 | Other Commands:{{range .Commands}}{{if not .Annotations.group}}{{template "command" .}}{{end}}{{end}} 113 | {{- else}} 114 | Available Commands:{{range .Commands}}{{if (or .IsAvailableCommand (eq .Name "help"))}} 115 | {{rpad .Name .NamePadding }} {{.Short}}{{end}}{{end}} 116 | {{- end -}} 117 | {{end}} 118 | {{if .HasAvailableLocalFlags}} 119 | Flags: 120 | {{.LocalFlags.FlagUsages | trimTrailingWhitespaces}}{{end}}{{if .HasAvailableInheritedFlags}} 121 | 122 | Global Flags: 123 | {{.InheritedFlags.FlagUsages | trimTrailingWhitespaces}}{{end}}{{if .HasHelpSubCommands}} 124 | 125 | Additional help topics:{{range .Commands}}{{if .IsAdditionalHelpTopicCommand}} 126 | {{rpad .CommandPath .CommandPathPadding}} {{.Short}}{{end}}{{end}}{{end}}{{if .HasAvailableSubCommands}} 127 | 128 | Use "{{.CommandPath}} [command] --help" for more information about a command.{{end}} 129 | ` 130 | -------------------------------------------------------------------------------- /cmd/status.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/infuseai/artivc/internal/core" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | var statusCommand = &cobra.Command{ 11 | Use: "status", 12 | Short: "Show the status of the workspace", 13 | DisableFlagsInUseLine: true, 14 | Example: ` # check current status 15 | avc status`, 16 | Args: cobra.NoArgs, 17 | Run: func(cmd *cobra.Command, args []string) { 18 | config, err := core.LoadConfig("") 19 | exitWithError(err) 20 | 21 | mngr, err := core.NewArtifactManager(config) 22 | exitWithError(err) 23 | 24 | exitWithError(mngr.Fetch()) 25 | 26 | fmt.Printf("workspace of the repository '%s'\n\n", config.RepoUrl()) 27 | 28 | result, err := mngr.Status() 29 | exitWithError(err) 30 | 31 | result.Print(true) 32 | }, 33 | } 34 | 35 | func init() { 36 | } 37 | -------------------------------------------------------------------------------- /cmd/tag.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/infuseai/artivc/internal/core" 5 | "github.com/spf13/cobra" 6 | ) 7 | 8 | var tagCommand = &cobra.Command{ 9 | Use: "tag [--delete ] []", 10 | DisableFlagsInUseLine: true, 11 | Short: "List or manage tags", 12 | Example: ` # List the tags 13 | avc tag 14 | 15 | # Tag the lastest commit 16 | avc tag v1.0.0 17 | 18 | # Tag the specific commit 19 | avc tag --ref a1b2c3d4 v1.0.0 20 | 21 | # Delete a tags 22 | avc tag --delete v1.0.0`, 23 | Args: cobra.RangeArgs(0, 2), 24 | Run: func(cmd *cobra.Command, args []string) { 25 | config, err := core.LoadConfig("") 26 | exitWithError(err) 27 | 28 | mngr, err := core.NewArtifactManager(config) 29 | exitWithError(err) 30 | 31 | if len(args) == 0 { 32 | exitWithError(mngr.ListTags()) 33 | } else if len(args) == 1 { 34 | tag := args[0] 35 | refOrCommit, err := cmd.Flags().GetString("ref") 36 | exitWithError(err) 37 | delete, err := cmd.Flags().GetBool("delete") 38 | exitWithError(err) 39 | 40 | if !delete { 41 | exitWithError(mngr.AddTag(refOrCommit, tag)) 42 | } else { 43 | exitWithError(mngr.DeleteTag(tag)) 44 | } 45 | } else { 46 | exitWithFormat("requires 0 or 1 argument\n") 47 | } 48 | }, 49 | } 50 | 51 | func init() { 52 | tagCommand.Flags().BoolP("delete", "D", false, "Delete a tag") 53 | tagCommand.Flags().String("ref", core.RefLatest, "The source commit or reference to be tagged") 54 | } 55 | -------------------------------------------------------------------------------- /cmd/util_test.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestTransformRepoUrl(t *testing.T) { 10 | baseDir := "/tmp/artivc" 11 | testCases := []struct { 12 | desc string 13 | in string 14 | out string 15 | }{ 16 | {desc: "local file", in: "/this/is/my/path", out: "/this/is/my/path"}, 17 | {desc: "relative path", in: "../path", out: "/tmp/path"}, 18 | {desc: "relative path2", in: "../../../path", out: "/path"}, 19 | {desc: "normal url (file)", in: "file://mybucket/this/is/my/path", out: "file://mybucket/this/is/my/path"}, 20 | {desc: "normal url (s3)", in: "s3://mybucket/this/is/my/path", out: "s3://mybucket/this/is/my/path"}, 21 | } 22 | 23 | for _, tC := range testCases { 24 | t.Run(tC.desc, func(t *testing.T) { 25 | result, err := transformRepoUrl(baseDir, tC.in) 26 | if err != nil { 27 | assert.Empty(t, tC.out) 28 | } 29 | assert.Equal(t, tC.out, result) 30 | }) 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /cmd/utils.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "io" 7 | neturl "net/url" 8 | "os" 9 | "path/filepath" 10 | "strings" 11 | 12 | "github.com/spf13/cobra" 13 | ) 14 | 15 | const ( 16 | GROUP_BASIC = "basic" 17 | GROUP_QUICK = "quick" 18 | ) 19 | 20 | func exitWithError(err error) { 21 | cobra.CheckErr(err) 22 | } 23 | 24 | func exitWithFormat(format string, a ...interface{}) { 25 | cobra.CheckErr(fmt.Sprintf(format, a...)) 26 | } 27 | 28 | func parseRepoStr(repoAndRef string) (repoUrl string, ref string, err error) { 29 | comps := strings.Split(repoAndRef, "@") 30 | if len(comps) == 1 { 31 | repoUrl = repoAndRef 32 | } else if len(comps) == 2 { 33 | repoUrl = comps[0] 34 | ref = comps[1] 35 | } else { 36 | err = errors.New("Invalid repository: " + repoAndRef) 37 | } 38 | return 39 | } 40 | 41 | func transformRepoUrl(base string, repo string) (string, error) { 42 | url, err := neturl.Parse(repo) 43 | if err != nil { 44 | return "", err 45 | } 46 | 47 | if url.Scheme != "" { 48 | return repo, nil 49 | } 50 | 51 | if strings.HasPrefix(repo, "/") { 52 | return repo, nil 53 | } 54 | 55 | return filepath.Abs(filepath.Join(base, url.Path)) 56 | } 57 | 58 | func isDirEmpty(dir string) bool { 59 | f, err := os.Open(dir) 60 | if err != nil { 61 | return false 62 | } 63 | defer f.Close() 64 | 65 | _, err = f.Readdirnames(1) 66 | return err == io.EOF 67 | } 68 | -------------------------------------------------------------------------------- /cmd/version.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "runtime" 7 | 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | var ( 12 | version = "v0.1-dev" 13 | 14 | // overwrite version when tagVersion exists 15 | tagVersion = "" 16 | 17 | // gitCommit is the git sha1 18 | gitCommit = "" 19 | 20 | // gitTreeState is the state of the git tree {dirty or clean} 21 | gitTreeState = "" 22 | ) 23 | 24 | type BuildInfo struct { 25 | Version string 26 | GitCommit string 27 | GitTreeState string 28 | GoVersion string 29 | } 30 | 31 | func GetVersion() string { 32 | info := BuildInfo{ 33 | Version: version, 34 | GitCommit: gitCommit, 35 | GitTreeState: gitTreeState, 36 | GoVersion: runtime.Version(), 37 | } 38 | 39 | if tagVersion != "" { 40 | info.Version = tagVersion 41 | } 42 | 43 | data, _ := json.Marshal(info) 44 | return fmt.Sprintf("version.BuildInfo%s", string(data)) 45 | } 46 | 47 | var versionCommand = &cobra.Command{ 48 | Use: "version", 49 | Short: "Print the version information", 50 | Long: `Print the version information. For example: 51 | 52 | avc version 53 | version.BuildInfo{"Version":"v0.1-dev","GitCommit":"59b5c650fbed4d91c1e54b7cb3c3f6f0c50e5fa4","GitTreeState":"dirty","GoVersion":"go1.17.5"} 54 | `, 55 | Run: func(cmd *cobra.Command, args []string) { 56 | fmt.Println(GetVersion()) 57 | }, 58 | } 59 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | themes 2 | resources 3 | public 4 | .hugo_build.lock -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | THEME_VERSION := v0.27.4 2 | THEME := hugo-geekdoc 3 | BASEDIR := . 4 | THEMEDIR := $(BASEDIR)/themes 5 | 6 | build: doc-assets doc-build 7 | 8 | start: doc-assets 9 | hugo server -D 10 | 11 | doc-assets: 12 | mkdir -p $(THEMEDIR)/$(THEME)/ ; \ 13 | curl -sSL "https://github.com/thegeeklab/$(THEME)/releases/download/${THEME_VERSION}/$(THEME).tar.gz" | tar -xz -C $(THEMEDIR)/$(THEME)/ --strip-components=1 14 | 15 | 16 | doc-commands: 17 | rm -rf content/en/commands/avc.md 18 | rm -rf content/en/commands/avc_*.md 19 | go run ../main.go docs 20 | cp -R ./generated_docs/ content/en/commands/ 21 | rm -rf ./generated_docs/ 22 | 23 | doc-build: 24 | cd $(BASEDIR); hugo 25 | 26 | clean: 27 | rm -rf $(THEMEDIR) && \ 28 | rm -rf $(BASEDIR)/public 29 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | The root of document site. The doc site is generated by [hugo](https://gohugo.io/) 2 | 3 | ## Prepare Environment 4 | Install [hugo](https://gohugo.io/getting-started/quick-start/) 5 | 6 | ## Run dev server 7 | 8 | ``` 9 | make start 10 | ``` 11 | 12 | ## Build doc site 13 | 14 | ``` 15 | make build 16 | ``` -------------------------------------------------------------------------------- /docs/assets/ArtiVC_workspace.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/assets/art-overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/assets/art-overview.png -------------------------------------------------------------------------------- /docs/assets/cheatsheet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/assets/cheatsheet.png -------------------------------------------------------------------------------- /docs/config/_default/config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | baseURL: https://artivc.io/ 3 | title: ArtiVC 4 | theme: hugo-geekdoc 5 | 6 | pygmentsUseClasses: true 7 | pygmentsCodeFences: true 8 | timeout: 180000 9 | pluralizeListTitles: false 10 | 11 | defaultContentLanguage: en 12 | 13 | disablePathToLower: true 14 | enableGitInfo: true 15 | 16 | enableRobotsTXT: true 17 | 18 | markup: 19 | goldmark: 20 | renderer: 21 | unsafe: true 22 | tableOfContents: 23 | startLevel: 1 24 | endLevel: 9 25 | 26 | taxonomies: 27 | tag: tags 28 | 29 | outputs: 30 | home: 31 | - HTML 32 | page: 33 | - HTML 34 | section: 35 | - HTML 36 | taxonomy: 37 | - HTML 38 | term: 39 | - HTML 40 | -------------------------------------------------------------------------------- /docs/config/_default/languages.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | en: 3 | languageName: "English" 4 | contentDir: "content/en" 5 | weight: 10 6 | -------------------------------------------------------------------------------- /docs/config/_default/params.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | description: > 3 | ArtiVC (Artifact Version Control) is a version control system for large files. 4 | images: 5 | - "ArtiVC_workspace.png" 6 | 7 | geekdocToC: 3 8 | geekdocTagsToMenu: true 9 | 10 | geekdocRepo: https://github.com/InfuseAI/ArtiVC 11 | geekdocEditPath: edit/main/docs 12 | 13 | geekdocSearch: true 14 | geekdocSearchShowParent: true 15 | 16 | geekdocLegalNotice: https://thegeeklab.de/legal-notice/#contact-information 17 | geekdocPrivacyPolicy: https://thegeeklab.de/legal-notice/#privacy-policy 18 | 19 | geekdocImageLazyLoading: true 20 | geekdocDarkModeDim: true 21 | -------------------------------------------------------------------------------- /docs/content/en/_includes/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | GeekdocHidden: true 3 | --- 4 | -------------------------------------------------------------------------------- /docs/content/en/_includes/include-page.md: -------------------------------------------------------------------------------- 1 | _**Example page include**_ 2 | 3 | {{< hint info >}} 4 | **Example Shortcode**\ 5 | Shortcode used in an include page. 6 | {{< /hint >}} 7 | 8 | | Head 1 | Head 2 | Head 3 | 9 | | ------ | ------ | ------ | 10 | | 1 | 2 | 3 | 11 | -------------------------------------------------------------------------------- /docs/content/en/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 3 | geekdocNav: false 4 | geekdocBreadcrumb: false 5 | geekdocAlign: center 6 | geekdocAnchor: false 7 | --- 8 | 9 | {{< columns >}} 10 | 11 |

12 | ArtiVC (Artifact Version Control) is a handy command-line tool for data versioning on cloud storage. With only one command, it helps you neatly snapshot your data and switch data between versions. Even better, it seamlessly integrates your existing cloud environment. ArtiVC supports three major cloud providers (AWS S3, Google Cloud Storage, Azure Blob Storage) and the remote filesystem using SSH. 13 |

14 | 15 | 16 | 17 | Getting Started 18 | 19 | 20 | 21 | <---> 22 | [![asciicast](https://asciinema.org/a/6JEhzpJ5QMiSkiC74s5CyT257.svg)](https://asciinema.org/a/6JEhzpJ5QMiSkiC74s5CyT257?autoplay=1) 23 | {{< /columns >}} 24 | 25 | 26 | # Feature Overview 27 | 28 | {{< columns >}} 29 | ## Data Versioning 30 | 31 | Version your data like versioning code. ArtiVC supports commit history, commit message, and version tag. You can diff two commits, and pull data from the specific version. 32 | 33 | <---> 34 | 35 | ## Use your own storage 36 | 37 | We are used to putting large files in NFS or S3. To use ArtiVC, you can keep putting your files on the same storage without changes. 38 | 39 | <---> 40 | 41 | ## No additional server is required 42 | 43 | ArtiVC is a CLI tool. No server or gateway is required to install and operate. 44 | 45 | {{< /columns >}} 46 | 47 | {{< columns >}} 48 | 49 | ## Multiple backends support 50 | 51 | ArtiVC natively supports local filesystem, remote filesystem (by SSH), AWS S3, Google Cloud Storage, and Azure Blob Storage as backend. And 40+ backends are supported through [Rclone](backends/rclone/) integration. [Learn more](backends/) 52 | 53 | <---> 54 | 55 | ## Painless Configuration 56 | 57 | No one like to configure. So we leverage the original configuration as much as possible. Use `.ssh/config` for ssh access, and use `aws configure`, `gcloud auth application-default login`, `az login` for the cloud platforms. 58 | 59 | <---> 60 | 61 | ## Efficient storage and transfer 62 | 63 | The file structure of the repository is stored and transferred efficiently by [design](design/how-it-works/). It prevents storing duplicated content and minimum the number of files to upload when pushing a new version. [Learn more](design/benchmark/) 64 | 65 | 66 | {{< /columns >}} 67 | -------------------------------------------------------------------------------- /docs/content/en/backends/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Backends 3 | weight: 2 4 | --- 5 | 6 | 7 | | Backend | Repository URL | Doc | 8 | | --- | --- | --- | 9 | | Local Filesystem | `/path/to/data` | [{{< icon "gdoc_link" >}}](local) | 10 | | Remote Filesystem (SSH) | `:path/to/data` | [{{< icon "gdoc_link" >}}](ssh) | 11 | | AWS S3 [{{< icon "gdoc_language" >}}](https://aws.amazon.com/s3/) | `s3:///path/to/data` | [{{< icon "gdoc_link" >}}](s3) | 12 | | Google Cloud Storage [{{< icon "gdoc_language" >}}](https://cloud.google.com/storage) | `gs:///path/to/data` | [{{< icon "gdoc_link" >}}](gcs) | 13 | | Azure Blob Storage [{{< icon "gdoc_language" >}}](https://azure.microsoft.com/services/storage/blobs/) | `https://.blob.core.windows.net//path/to/data` | [{{< icon "gdoc_link" >}}](azureblob) | 14 | | Rclone [{{< icon "gdoc_language" >}}](https://rclone.org/) | `rclone:///path/to/data` | [{{< icon "gdoc_link" >}}](rclone) | 15 | 16 | -------------------------------------------------------------------------------- /docs/content/en/backends/azureblob.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Azure Blob Storage 3 | weight: 13 4 | --- 5 | 6 | {{< toc >}} 7 | 8 | Use [Azure Blob Storage](https://azure.microsoft.com/services/storage/blobs/) as the repository backend. 9 | 10 | ## Configuration 11 | 12 | Before using the backend, you have to setup the credential. There are two methods to configure. 13 | 14 | - **Use Azure CLI to login:** Suitable for development environment. 15 | - **Use environment variables:** Suitable for production or CI environment 16 | 17 | 18 | {{< hint warning >}} 19 | **Assign the Permission**\ 20 | The logged-in account requires **Storage Blob Data Contributor** role to the storage account. Assign it in the **Azure Portal** 21 | 22 | *Storage Accounts* > *my account* > *Access Control (IAM)* > *Role assignments* 23 | 24 | For more information, please see https://docs.microsoft.com/azure/storage/blobs/assign-azure-role-data-access 25 | {{< /hint >}} 26 | 27 | The azure blob storage backend authenticates by a default procedure defined by [Azure SDK for Go](https://docs.microsoft.com/azure/developer/go/azure-sdk-authentication) 28 | 29 | ### Use Azure CLI to login 30 | 31 | This backend supports to use [Azure CLI](https://docs.microsoft.com/cli/azure/install-azure-cli) to configure the login account. It will open the browser and start the login process. 32 | 33 | ``` 34 | az login 35 | ``` 36 | 37 | It also supports other login options provided by az login, such as 38 | 39 | ``` 40 | az login --service-principal -u -p -t 41 | ``` 42 | 43 | ### Use Environment Variables 44 | 45 | - Service principal with a secret 46 | 47 | | Name | Description 48 | | --- | --- | 49 | AZURE_TENANT_ID | ID of the application's Azure AD tenant 50 | AZURE_CLIENT_ID | Application ID of an Azure service principal 51 | AZURE_CLIENT_SECRET | Password of the Azure service principal 52 | 53 | - Service principal with certificate 54 | 55 | | Name | Description 56 | | --- | --- | 57 | AZURE_TENANT_ID | ID of the application's Azure AD tenant 58 | AZURE_CLIENT_ID | ID of an Azure AD application 59 | AZURE_CLIENT_CERTIFICATE_PATH | Path to a certificate file including private key (without password protection) 60 | 61 | - Username and password 62 | 63 | | Name | Description 64 | | --- | --- | 65 | AZURE_CLIENT_ID | ID of an Azure AD application 66 | AZURE_USERNAME | A username (usually an email address) 67 | AZURE_PASSWORD | That user's password 68 | 69 | - Managed identity 70 | 71 | [Managed identities](https://docs.microsoft.com/azure/active-directory/managed-identities-azure-resources/overview) eliminate the need for developers to manage credentials. By connecting to resources that support Azure AD authentication, applications can use Azure AD tokens instead of credentials. 72 | 73 | | Name | Description 74 | | --- | --- | 75 | AZURE_CLIENT_ID | User assigned managed identity client id 76 | 77 | - Storage account key 78 | 79 | | Name | Description 80 | | --- | --- | 81 | AZURE_STORAGE_ACCOUNT_KEY | The access key of the storage account 82 | 83 | ## Usage 84 | 85 | Init a workspace 86 | ```shell 87 | avc init https://mystorageaccount.blob.core.windows.net/mycontainer/path/to/mydataset 88 | ``` 89 | 90 | Clone a repository 91 | ```shell 92 | avc clone https://mystorageaccount.blob.core.windows.net/mycontainer/path/to/mydataset 93 | cd mydataset/ 94 | ``` 95 | -------------------------------------------------------------------------------- /docs/content/en/backends/gcs.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Google Cloud Storage 3 | weight: 12 4 | --- 5 | 6 | {{< toc >}} 7 | 8 | Use [Google Cloud Storage (GCS)](https://cloud.google.com/storage) as the repository backend. 9 | 10 | Note that Google Cloud Storage is not [Google Drive](https://www.google.com.tw/drive/). They are different google product. 11 | 12 | ## Configuration 13 | 14 | Before using the backend, you have to configure the service account credential. There are three method to configure it. 15 | 16 | 1. Use application default credentials. It is recommended way to use in your development environment. 17 | 18 | ``` 19 | gcloud auth application-default login 20 | ``` 21 | 22 | It will open the browser and start the login process. 23 | 24 | 1. Use service account credentials. It is recommended way to use in CI, job, or production environment. 25 | 26 | ``` 27 | export GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account-credentials.json 28 | ``` 29 | 30 | to get this json file, please see the [Passing credential manually](https://cloud.google.com/docs/authentication/production#manually) document 31 | 32 | 1. Use the service account in the GCP resources (e.g. GCE, GKE). It is recommended way if the `ArtiVC` is run in the GCP environment. Please see [default service accounts](https://cloud.google.com/iam/docs/service-accounts#default) document 33 | 34 | 35 | The GCS backend finds credentials by a default procedure defined by [Google Cloud](https://cloud.google.com/docs/authentication/production) 36 | 37 | 38 | 39 | ## Usage 40 | 41 | Init a workspace 42 | ```shell 43 | avc init gs://mybucket/path/to/mydataset 44 | ``` 45 | 46 | Clone a repository 47 | ```shell 48 | avc clone gs://mybucket/path/to/mydataset 49 | cd mydataset/ 50 | ``` 51 | 52 | 53 | ## Environment Variables 54 | 55 | | Name | Description | Default value | 56 | | --- | --- | --- | 57 | | `GOOGLE_APPLICATION_CREDENTIALS` | The location of service account keys in JSON | | -------------------------------------------------------------------------------- /docs/content/en/backends/local.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Local Filesystem 3 | weight: 1 4 | --- 5 | 6 | Use the local filesystem as the repository backend. The folder can reside in a local disk or a mounted NFS. 7 | 8 | ## Usage 9 | Init a workspace 10 | 11 | ```shell 12 | avc init /path/to/mydataset 13 | 14 | # Or use relative path 15 | # avc init ../mydataset 16 | ``` 17 | 18 | Clone a repository 19 | 20 | ```shell 21 | avc clone /path/to/mydataset 22 | cd mydataset/ 23 | ``` -------------------------------------------------------------------------------- /docs/content/en/backends/rclone.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Rclone 3 | weight: 50 4 | --- 5 | 6 | Use [Rclone](https://rclone.org/) as the repository backned. 7 | 8 | Rclone is a command-line program to manage files on cloud storage. As a analogy, **Rclone** is the **rsync** for cloud storage. Rclone supports [40+ providers](https://rclone.org/#providers). The Rclone backend is to use **Rclone** command to communicate with the remote backend. 9 | 10 | 11 | ## Configuration 12 | 13 | 1. [Install the Rclone](https://rclone.org/install/) 14 | 1. Configure the Rclone 15 | ``` 16 | rclone config 17 | ``` 18 | 1. Check if the Rclone commmand can access your remote backend and path. 19 | ``` 20 | rclone lsd : 21 | ``` 22 | 23 | ## Usage 24 | 25 | Init a workspace 26 | 27 | ```shell 28 | # avc init rclone:/// 29 | avc init rclone://myremote/path/to/mydataset 30 | ``` 31 | 32 | Clone a repository 33 | 34 | ```shell 35 | avc clone rclone://myremote/path/to/mydataset 36 | cd mydataset/ 37 | ``` -------------------------------------------------------------------------------- /docs/content/en/backends/s3.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: AWS S3 3 | weight: 11 4 | --- 5 | 6 | {{< toc >}} 7 | 8 | Use the S3 as the repository backend. 9 | 10 | ## Features 11 | 12 | 1. Support `~/.aws/credentials` as the AWS CLI command 13 | 1. Support [Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/userguide/mpuoverview.html) 14 | 15 | ## Configuration 16 | 17 | 1. Install the [AWS CLI](https://aws.amazon.com/cli/) 18 | 2. Configure the AWS CLI. Please see the [AWS documentation](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html) 19 | ``` 20 | aws configure 21 | ``` 22 | 3. Check current config 23 | ``` 24 | aws configure list 25 | ``` 26 | 27 | The S3 backend loads configuration by a default procedure of [AWS SDK for Go](https://aws.github.io/aws-sdk-go-v2/docs/configuring-sdk/#specifying-credentials) 28 | 29 | ## Usage 30 | 31 | Init a workspace 32 | ```shell 33 | avc init s3://mybucket/path/to/mydataset 34 | ``` 35 | 36 | Clone a repository 37 | ```shell 38 | avc clone s3://mybucket/path/to/mydataset 39 | cd mydataset/ 40 | ``` 41 | 42 | ## Environment Variables 43 | 44 | | Name | Description | Default value | 45 | | --- | --- | --- | 46 | | `AWS_ACCESS_KEY_ID` | The access key | | 47 | | `AWS_SECRET_ACCESS_KEY` | The access secret key | | 48 | | `AWS_PROFILE` | The profile to use in the credential file | `default` | 49 | | `AWS_REGION` | The region to use | the region from profile | 50 | -------------------------------------------------------------------------------- /docs/content/en/backends/ssh.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Remote Filesystem (SSH) 3 | weight: 2 4 | --- 5 | 6 | {{< toc >}} 7 | 8 | Use remote filesystem through SSH as the repository backend. 9 | 10 | ## Features 11 | 12 | - Use the original `~/.ssh/config` without pain 13 | - Support password authentication 14 | - Support public key authentication 15 | - Support SSH agent 16 | - Support SSH proxy by `ProxyCommand` or `ProxyJump` 17 | - Support host key checking through `~/.ssh/known_hosts` 18 | - Concurrent uploading and downloading 19 | 20 | ## Configuration 21 | To use SSH backend, you have to configure `~/.ssh/config`. 22 | 23 | 1. Configure the `~/.ssh/config` 24 | ``` 25 | Host myserver 26 | HostName myserver.hosts 27 | User myname 28 | IdentityFile ~/.ssh/id_ed25519 29 | ``` 30 | 31 | 1. Check if you can access the SSH server successfully 32 | 33 | ``` 34 | ssh myserver 35 | ``` 36 | 37 | For more information, please see [ssh config manual](https://man.openbsd.org/ssh_config) 38 | 39 | ## Usage 40 | 41 | Init a workspace 42 | 43 | ```shell 44 | avc init myserver:path/to/mydataset 45 | ``` 46 | 47 | Clone a repository 48 | 49 | ```shell 50 | avc clone myserver:path/to/mydataset 51 | cd mydataset/ 52 | ``` 53 | 54 | ## SSH Proxy 55 | 56 | There are two ways to connect to the destination server through bastion (proxy) server. 57 | 58 | ### ProxyCommand 59 | 60 | 1. Configure `~/.ssh/config` 61 | ``` 62 | host bastion 63 | hostname mybastion.hosts 64 | user myname 65 | identityfile ~/id_ed25519 66 | 67 | host myserver 68 | hostname myserver.hosts 69 | user myname 70 | identityfile ~/id_ed25519 71 | ProxyCommand ssh bastion -W %h:%p 72 | ``` 73 | 1. Init the workspace 74 | ``` 75 | avc init myserver:path/to/mydataset 76 | ``` 77 | 78 | ### ProxyJump 79 | 80 | 81 | 1. Configure `~/.ssh/config` 82 | ``` 83 | host bastion 84 | hostname mybastion.hosts 85 | user myname 86 | identityfile ~/id_ed25519 87 | 88 | host myserver 89 | hostname myserver.hosts 90 | user myname 91 | identityfile ~/id_ed25519 92 | ProxyJump bastion 93 | ``` 94 | 95 | 1. Init the workspace 96 | ``` 97 | avc init myserver:path/to/mydataset 98 | ``` 99 | 100 | ## Environment Variables 101 | 102 | 103 | | Name | Description | Default value | 104 | | --- | --- | --- | 105 | | `SSH_USER` | The login user | The current user. | 106 | | `SSH_PASSWORD` | The passowrd to be used for password authentication | | 107 | | `SSH_PORT` | The port of the ssh server | 22 | 108 | | `SSH_IDENTITY_FILE` | The identity file to be used for pubkey authentication | | 109 | | `SSH_KEY_PASSPHRASE` | The passphrase of the identity key | | 110 | | `SSH_STRICT_HOST_KEY_CHECKING` | Set the value to `no` to disable the key checking | | 111 | 112 | ## Supported Directives for SSH config 113 | 114 | - [Port](https://man.openbsd.org/ssh_config#Port) 115 | - [User](https://man.openbsd.org/ssh_config#User) 116 | - [IdentityFile](https://man.openbsd.org/ssh_config#IdentityFile) 117 | - [StrictHostKeyChecking](https://man.openbsd.org/ssh_config#StrictHostKeyChecking) 118 | - [ProxyCommand](https://man.openbsd.org/ssh_config#ProxyCommand) 119 | - [ProxyJump](https://man.openbsd.org/ssh_config#ProxyJump) 120 | -------------------------------------------------------------------------------- /docs/content/en/commands/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Commands 3 | weight: 99 4 | geekdocCollapseSection: true 5 | --- 6 | -------------------------------------------------------------------------------- /docs/content/en/commands/avc.md: -------------------------------------------------------------------------------- 1 | ## avc 2 | 3 | ArtiVC is a version control system for large files 4 | 5 | ### Examples 6 | 7 | ``` 8 | # Push data to the repository 9 | cd /path/to/my/data 10 | avc init s3://mybucket/path/to/repo 11 | avc push -m "my first commit" 12 | 13 | # Pull data from the repository 14 | cd /path/to/download 15 | avc init s3://mybucket/path/to/repo 16 | avc pull 17 | 18 | # Download by quick command 19 | avc get -o /path/to/download s3://mybucket/path/to/repo 20 | 21 | # Show command help 22 | avc -h 23 | 24 | For more information, please check https://github.com/infuseai/artivc 25 | ``` 26 | 27 | ### Options 28 | 29 | ``` 30 | -h, --help help for avc 31 | ``` 32 | 33 | ### SEE ALSO 34 | 35 | * [avc clone](/commands/avc_clone/) - Clone a workspace 36 | * [avc completion](/commands/avc_completion/) - Generate the autocompletion script for the specified shell 37 | * [avc config](/commands/avc_config/) - Configure the workspace 38 | * [avc diff](/commands/avc_diff/) - Diff workspace/commits/references 39 | * [avc docs](/commands/avc_docs/) - Generate docs 40 | * [avc get](/commands/avc_get/) - Download data from a repository 41 | * [avc init](/commands/avc_init/) - Initiate a workspace 42 | * [avc list](/commands/avc_list/) - List files of a commit 43 | * [avc log](/commands/avc_log/) - Log commits 44 | * [avc pull](/commands/avc_pull/) - Pull data from the repository 45 | * [avc push](/commands/avc_push/) - Push data to the repository 46 | * [avc put](/commands/avc_put/) - Upload data to a repository 47 | * [avc status](/commands/avc_status/) - Show the status of the workspace 48 | * [avc tag](/commands/avc_tag/) - List or manage tags 49 | * [avc version](/commands/avc_version/) - Print the version information 50 | 51 | ###### Auto generated by spf13/cobra on 24-Mar-2022 52 | -------------------------------------------------------------------------------- /docs/content/en/commands/avc_clone.md: -------------------------------------------------------------------------------- 1 | ## avc clone 2 | 3 | Clone a workspace 4 | 5 | ``` 6 | avc clone [] 7 | ``` 8 | 9 | ### Examples 10 | 11 | ``` 12 | # clone a workspace with local repository 13 | avc clone /path/to/mydataset 14 | 15 | # clone a workspace with s3 repository 16 | avc clone s3://mybucket/path/to/mydataset 17 | ``` 18 | 19 | ### Options 20 | 21 | ``` 22 | -h, --help help for clone 23 | ``` 24 | 25 | ### SEE ALSO 26 | 27 | * [avc](/commands/avc/) - ArtiVC is a version control system for large files 28 | 29 | ###### Auto generated by spf13/cobra on 24-Mar-2022 30 | -------------------------------------------------------------------------------- /docs/content/en/commands/avc_completion.md: -------------------------------------------------------------------------------- 1 | ## avc completion 2 | 3 | Generate the autocompletion script for the specified shell 4 | 5 | ### Synopsis 6 | 7 | Generate the autocompletion script for avc for the specified shell. 8 | See each sub-command's help for details on how to use the generated script. 9 | 10 | 11 | ### Options 12 | 13 | ``` 14 | -h, --help help for completion 15 | ``` 16 | 17 | ### SEE ALSO 18 | 19 | * [avc](/commands/avc/) - ArtiVC is a version control system for large files 20 | * [avc completion bash](/commands/avc_completion_bash/) - Generate the autocompletion script for bash 21 | * [avc completion fish](/commands/avc_completion_fish/) - Generate the autocompletion script for fish 22 | * [avc completion powershell](/commands/avc_completion_powershell/) - Generate the autocompletion script for powershell 23 | * [avc completion zsh](/commands/avc_completion_zsh/) - Generate the autocompletion script for zsh 24 | 25 | ###### Auto generated by spf13/cobra on 24-Mar-2022 26 | -------------------------------------------------------------------------------- /docs/content/en/commands/avc_completion_bash.md: -------------------------------------------------------------------------------- 1 | ## avc completion bash 2 | 3 | Generate the autocompletion script for bash 4 | 5 | ### Synopsis 6 | 7 | Generate the autocompletion script for the bash shell. 8 | 9 | This script depends on the 'bash-completion' package. 10 | If it is not installed already, you can install it via your OS's package manager. 11 | 12 | To load completions in your current shell session: 13 | 14 | source <(avc completion bash) 15 | 16 | To load completions for every new session, execute once: 17 | 18 | #### Linux: 19 | 20 | avc completion bash > /etc/bash_completion.d/avc 21 | 22 | #### macOS: 23 | 24 | avc completion bash > /usr/local/etc/bash_completion.d/avc 25 | 26 | You will need to start a new shell for this setup to take effect. 27 | 28 | 29 | ``` 30 | avc completion bash 31 | ``` 32 | 33 | ### Options 34 | 35 | ``` 36 | -h, --help help for bash 37 | --no-descriptions disable completion descriptions 38 | ``` 39 | 40 | ### SEE ALSO 41 | 42 | * [avc completion](/commands/avc_completion/) - Generate the autocompletion script for the specified shell 43 | 44 | ###### Auto generated by spf13/cobra on 24-Mar-2022 45 | -------------------------------------------------------------------------------- /docs/content/en/commands/avc_completion_fish.md: -------------------------------------------------------------------------------- 1 | ## avc completion fish 2 | 3 | Generate the autocompletion script for fish 4 | 5 | ### Synopsis 6 | 7 | Generate the autocompletion script for the fish shell. 8 | 9 | To load completions in your current shell session: 10 | 11 | avc completion fish | source 12 | 13 | To load completions for every new session, execute once: 14 | 15 | avc completion fish > ~/.config/fish/completions/avc.fish 16 | 17 | You will need to start a new shell for this setup to take effect. 18 | 19 | 20 | ``` 21 | avc completion fish [flags] 22 | ``` 23 | 24 | ### Options 25 | 26 | ``` 27 | -h, --help help for fish 28 | --no-descriptions disable completion descriptions 29 | ``` 30 | 31 | ### SEE ALSO 32 | 33 | * [avc completion](/commands/avc_completion/) - Generate the autocompletion script for the specified shell 34 | 35 | ###### Auto generated by spf13/cobra on 24-Mar-2022 36 | -------------------------------------------------------------------------------- /docs/content/en/commands/avc_completion_powershell.md: -------------------------------------------------------------------------------- 1 | ## avc completion powershell 2 | 3 | Generate the autocompletion script for powershell 4 | 5 | ### Synopsis 6 | 7 | Generate the autocompletion script for powershell. 8 | 9 | To load completions in your current shell session: 10 | 11 | avc completion powershell | Out-String | Invoke-Expression 12 | 13 | To load completions for every new session, add the output of the above command 14 | to your powershell profile. 15 | 16 | 17 | ``` 18 | avc completion powershell [flags] 19 | ``` 20 | 21 | ### Options 22 | 23 | ``` 24 | -h, --help help for powershell 25 | --no-descriptions disable completion descriptions 26 | ``` 27 | 28 | ### SEE ALSO 29 | 30 | * [avc completion](/commands/avc_completion/) - Generate the autocompletion script for the specified shell 31 | 32 | ###### Auto generated by spf13/cobra on 24-Mar-2022 33 | -------------------------------------------------------------------------------- /docs/content/en/commands/avc_completion_zsh.md: -------------------------------------------------------------------------------- 1 | ## avc completion zsh 2 | 3 | Generate the autocompletion script for zsh 4 | 5 | ### Synopsis 6 | 7 | Generate the autocompletion script for the zsh shell. 8 | 9 | If shell completion is not already enabled in your environment you will need 10 | to enable it. You can execute the following once: 11 | 12 | echo "autoload -U compinit; compinit" >> ~/.zshrc 13 | 14 | To load completions for every new session, execute once: 15 | 16 | #### Linux: 17 | 18 | avc completion zsh > "${fpath[1]}/_avc" 19 | 20 | #### macOS: 21 | 22 | avc completion zsh > /usr/local/share/zsh/site-functions/_avc 23 | 24 | You will need to start a new shell for this setup to take effect. 25 | 26 | 27 | ``` 28 | avc completion zsh [flags] 29 | ``` 30 | 31 | ### Options 32 | 33 | ``` 34 | -h, --help help for zsh 35 | --no-descriptions disable completion descriptions 36 | ``` 37 | 38 | ### SEE ALSO 39 | 40 | * [avc completion](/commands/avc_completion/) - Generate the autocompletion script for the specified shell 41 | 42 | ###### Auto generated by spf13/cobra on 24-Mar-2022 43 | -------------------------------------------------------------------------------- /docs/content/en/commands/avc_config.md: -------------------------------------------------------------------------------- 1 | ## avc config 2 | 3 | Configure the workspace 4 | 5 | ### Synopsis 6 | 7 | Configure the workspace. The config file is stored at ".avc/config". 8 | 9 | ``` 10 | avc config [ []] 11 | ``` 12 | 13 | ### Examples 14 | 15 | ``` 16 | # List the config 17 | avc config 18 | 19 | # Get the config 20 | avc config repo.url 21 | 22 | # Set the config 23 | avc config repo.url s3://your-bucket/data 24 | ``` 25 | 26 | ### Options 27 | 28 | ``` 29 | -h, --help help for config 30 | ``` 31 | 32 | ### SEE ALSO 33 | 34 | * [avc](/commands/avc/) - ArtiVC is a version control system for large files 35 | 36 | ###### Auto generated by spf13/cobra on 24-Mar-2022 37 | -------------------------------------------------------------------------------- /docs/content/en/commands/avc_diff.md: -------------------------------------------------------------------------------- 1 | ## avc diff 2 | 3 | Diff workspace/commits/references 4 | 5 | ``` 6 | avc diff [flags] 7 | ``` 8 | 9 | ### Examples 10 | 11 | ``` 12 | # Diff two version 13 | avc diff v0.1.0 v0.2.0 14 | ``` 15 | 16 | ### Options 17 | 18 | ``` 19 | -h, --help help for diff 20 | ``` 21 | 22 | ### SEE ALSO 23 | 24 | * [avc](/commands/avc/) - ArtiVC is a version control system for large files 25 | 26 | ###### Auto generated by spf13/cobra on 24-Mar-2022 27 | -------------------------------------------------------------------------------- /docs/content/en/commands/avc_docs.md: -------------------------------------------------------------------------------- 1 | ## avc docs 2 | 3 | Generate docs 4 | 5 | ### Synopsis 6 | 7 | Generate docs. For example: 8 | 9 | avc docs 10 | 11 | ``` 12 | avc docs [flags] 13 | ``` 14 | 15 | ### Options 16 | 17 | ``` 18 | -h, --help help for docs 19 | ``` 20 | 21 | ### SEE ALSO 22 | 23 | * [avc](/commands/avc/) - ArtiVC is a version control system for large files 24 | 25 | ###### Auto generated by spf13/cobra on 24-Mar-2022 26 | -------------------------------------------------------------------------------- /docs/content/en/commands/avc_get.md: -------------------------------------------------------------------------------- 1 | ## avc get 2 | 3 | Download data from a repository 4 | 5 | ``` 6 | avc get [-o ] [@|] [--] ... 7 | ``` 8 | 9 | ### Examples 10 | 11 | ``` 12 | # Download the latest version. The data go to "mydataset" folder. 13 | avc get s3://bucket/mydataset 14 | 15 | # Download the specific version 16 | avc get s3://mybucket/path/to/mydataset@v1.0.0 17 | 18 | # Download to a specific folder 19 | avc get -o /tmp/mydataset s3://bucket/mydataset 20 | 21 | # Download partial files 22 | avc get -o /tmp/mydataset s3://bucket/mydataset -- path/to/file1 path/to/file2 data/ 23 | ``` 24 | 25 | ### Options 26 | 27 | ``` 28 | --delete Delete extra files which are not listed in commit 29 | -h, --help help for get 30 | -o, --output string Output directory 31 | ``` 32 | 33 | ### SEE ALSO 34 | 35 | * [avc](/commands/avc/) - ArtiVC is a version control system for large files 36 | 37 | ###### Auto generated by spf13/cobra on 24-Mar-2022 38 | -------------------------------------------------------------------------------- /docs/content/en/commands/avc_init.md: -------------------------------------------------------------------------------- 1 | ## avc init 2 | 3 | Initiate a workspace 4 | 5 | ``` 6 | avc init 7 | ``` 8 | 9 | ### Examples 10 | 11 | ``` 12 | # Init a workspace with local repository 13 | avc init /path/to/mydataset 14 | 15 | # Init a workspace with s3 repository 16 | avc init s3://mybucket/path/to/mydataset 17 | ``` 18 | 19 | ### Options 20 | 21 | ``` 22 | -h, --help help for init 23 | ``` 24 | 25 | ### SEE ALSO 26 | 27 | * [avc](/commands/avc/) - ArtiVC is a version control system for large files 28 | 29 | ###### Auto generated by spf13/cobra on 24-Mar-2022 30 | -------------------------------------------------------------------------------- /docs/content/en/commands/avc_list.md: -------------------------------------------------------------------------------- 1 | ## avc list 2 | 3 | List files of a commit 4 | 5 | ``` 6 | avc list [flags] 7 | ``` 8 | 9 | ### Examples 10 | 11 | ``` 12 | # List files for the latest version 13 | avc list 14 | 15 | # List files for the specific version 16 | avc list v1.0.0 17 | ``` 18 | 19 | ### Options 20 | 21 | ``` 22 | -h, --help help for list 23 | ``` 24 | 25 | ### SEE ALSO 26 | 27 | * [avc](/commands/avc/) - ArtiVC is a version control system for large files 28 | 29 | ###### Auto generated by spf13/cobra on 24-Mar-2022 30 | -------------------------------------------------------------------------------- /docs/content/en/commands/avc_log.md: -------------------------------------------------------------------------------- 1 | ## avc log 2 | 3 | Log commits 4 | 5 | ``` 6 | avc log [|] 7 | ``` 8 | 9 | ### Examples 10 | 11 | ``` 12 | # Log commits from the latest 13 | avc log 14 | 15 | # Log commits from a specific version 16 | avc log v1.0.0 17 | ``` 18 | 19 | ### Options 20 | 21 | ``` 22 | -h, --help help for log 23 | ``` 24 | 25 | ### SEE ALSO 26 | 27 | * [avc](/commands/avc/) - ArtiVC is a version control system for large files 28 | 29 | ###### Auto generated by spf13/cobra on 24-Mar-2022 30 | -------------------------------------------------------------------------------- /docs/content/en/commands/avc_pull.md: -------------------------------------------------------------------------------- 1 | ## avc pull 2 | 3 | Pull data from the repository 4 | 5 | ``` 6 | avc pull [|] [flags] -- ... 7 | ``` 8 | 9 | ### Examples 10 | 11 | ``` 12 | # Pull the latest version 13 | avc pull 14 | 15 | # Pull from a specifc version 16 | avc pull v1.0.0 17 | 18 | # Pull partial files 19 | avc pull -- path/to/partia 20 | avc pull v0.1.0 -- path/to/partia ... 21 | ``` 22 | 23 | ### Options 24 | 25 | ``` 26 | --delete Delete extra files which are not listed in commit 27 | --dry-run Dry run 28 | -h, --help help for pull 29 | ``` 30 | 31 | ### SEE ALSO 32 | 33 | * [avc](/commands/avc/) - ArtiVC is a version control system for large files 34 | 35 | ###### Auto generated by spf13/cobra on 24-Mar-2022 36 | -------------------------------------------------------------------------------- /docs/content/en/commands/avc_push.md: -------------------------------------------------------------------------------- 1 | ## avc push 2 | 3 | Push data to the repository 4 | 5 | ### Synopsis 6 | 7 | Push data to the repository. There is no branch implemented yet, all put and push commands are always creating a commit and treat as the latest commit. 8 | 9 | ``` 10 | avc push [-m ] 11 | ``` 12 | 13 | ### Examples 14 | 15 | ``` 16 | # Push to the latest version 17 | avc push -m 'Initial version' 18 | 19 | # Push to the latest version and tag to specific version 20 | avc push -m 'Initial version' 21 | avc tag v1.0.0 22 | ``` 23 | 24 | ### Options 25 | 26 | ``` 27 | --dry-run Dry run 28 | -h, --help help for push 29 | -m, --message string Commit meessage 30 | ``` 31 | 32 | ### SEE ALSO 33 | 34 | * [avc](/commands/avc/) - ArtiVC is a version control system for large files 35 | 36 | ###### Auto generated by spf13/cobra on 24-Mar-2022 37 | -------------------------------------------------------------------------------- /docs/content/en/commands/avc_put.md: -------------------------------------------------------------------------------- 1 | ## avc put 2 | 3 | Upload data to a repository 4 | 5 | ``` 6 | avc put [-m ] [@] 7 | ``` 8 | 9 | ### Examples 10 | 11 | ``` 12 | # Upload the latest version 13 | avc put ./folder/ /path/to/mydataset 14 | 15 | # Upload the specific version 16 | avc put ./folder/ /path/to/mydataset@v1.0.0 17 | ``` 18 | 19 | ### Options 20 | 21 | ``` 22 | -h, --help help for put 23 | -m, --message string Commit meessage 24 | ``` 25 | 26 | ### SEE ALSO 27 | 28 | * [avc](/commands/avc/) - ArtiVC is a version control system for large files 29 | 30 | ###### Auto generated by spf13/cobra on 24-Mar-2022 31 | -------------------------------------------------------------------------------- /docs/content/en/commands/avc_status.md: -------------------------------------------------------------------------------- 1 | ## avc status 2 | 3 | Show the status of the workspace 4 | 5 | ``` 6 | avc status 7 | ``` 8 | 9 | ### Examples 10 | 11 | ``` 12 | # check current status 13 | avc status 14 | ``` 15 | 16 | ### Options 17 | 18 | ``` 19 | -h, --help help for status 20 | ``` 21 | 22 | ### SEE ALSO 23 | 24 | * [avc](/commands/avc/) - ArtiVC is a version control system for large files 25 | 26 | ###### Auto generated by spf13/cobra on 24-Mar-2022 27 | -------------------------------------------------------------------------------- /docs/content/en/commands/avc_tag.md: -------------------------------------------------------------------------------- 1 | ## avc tag 2 | 3 | List or manage tags 4 | 5 | ``` 6 | avc tag [--delete ] [] 7 | ``` 8 | 9 | ### Examples 10 | 11 | ``` 12 | # List the tags 13 | avc tag 14 | 15 | # Tag the lastest commit 16 | avc tag v1.0.0 17 | 18 | # Tag the specific commit 19 | avc tag --ref a1b2c3d4 v1.0.0 20 | 21 | # Delete a tags 22 | avc tag --delete v1.0.0 23 | ``` 24 | 25 | ### Options 26 | 27 | ``` 28 | -D, --delete Delete a tag 29 | -h, --help help for tag 30 | --ref string The source commit or reference to be tagged (default "latest") 31 | ``` 32 | 33 | ### SEE ALSO 34 | 35 | * [avc](/commands/avc/) - ArtiVC is a version control system for large files 36 | 37 | ###### Auto generated by spf13/cobra on 24-Mar-2022 38 | -------------------------------------------------------------------------------- /docs/content/en/commands/avc_version.md: -------------------------------------------------------------------------------- 1 | ## avc version 2 | 3 | Print the version information 4 | 5 | ### Synopsis 6 | 7 | Print the version information. For example: 8 | 9 | avc version 10 | version.BuildInfo{"Version":"v0.1-dev","GitCommit":"59b5c650fbed4d91c1e54b7cb3c3f6f0c50e5fa4","GitTreeState":"dirty","GoVersion":"go1.17.5"} 11 | 12 | 13 | ``` 14 | avc version [flags] 15 | ``` 16 | 17 | ### Options 18 | 19 | ``` 20 | -h, --help help for version 21 | ``` 22 | 23 | ### SEE ALSO 24 | 25 | * [avc](/commands/avc/) - ArtiVC is a version control system for large files 26 | 27 | ###### Auto generated by spf13/cobra on 24-Mar-2022 28 | -------------------------------------------------------------------------------- /docs/content/en/commands/images/geekdoc-dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/content/en/commands/images/geekdoc-dark.png -------------------------------------------------------------------------------- /docs/content/en/design/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Design 3 | weight: 3 4 | --- 5 | -------------------------------------------------------------------------------- /docs/content/en/design/alternatives.md: -------------------------------------------------------------------------------- 1 | --- 2 | weight: 4 3 | --- 4 | 5 | - **Object storage or NFS:** Create folders or prefixes to add version semantics. It is straightforward to store different versions of data. But it lacks commit message, metadata, and history tracking. And we cannot know which one is the latest version. 6 | - **S3 versioning**: [S3 Versioning](https://docs.aws.amazon.com/AmazonS3/latest/userguide/Versioning.html) can achieve the object-level versioning. We can get the latest, but possible to roll back to the previous version. 7 | - **Git LFS**: [Git LFS](https://git-lfs.github.com/) is an open-source Git extension for versioning large files developed by Github. 8 | - [Github](https://docs.github.com/en/repositories/working-with-files/managing-large-files/about-git-large-file-storage), [Gitlab](https://docs.gitlab.com/ee/topics/git/lfs/index.html), [Huggingface](https://huggingface.co/docs/transformers/model_sharing#repository-features) provides Git LFS feature with limited quota. 9 | - If you want to put data on your own storage, you need to install a [standalone Git LFS server](https://github.com/git-lfs/lfs-test-server) 10 | - **DVC**: [DVC](https://dvc.org/) is built to make ML models shareable and reproducible. It is designed to handle large files, data sets, machine learning models, and metrics as well as code. 11 | - use `git` command to version small files or metadata, use `dvc` to manage large files. 12 | - you need to know both git and dvc. In the workflow, the two commands should switch back and forth. See the [dvc tutorial](https://dvc.org/doc/use-cases/versioning-data-and-model-files/tutorial) 13 | - **LakeFS**: [LakeFS](https://lakefs.io/) provides a multi-server solution to make s3 as git-like repositories 14 | - the [architecture](https://docs.lakefs.io/understand/architecture.html) is much heavier than ArtiVC due to an extra database for metadata storage and S3 configuration. (e.g., S3 gateway) -------------------------------------------------------------------------------- /docs/content/en/design/benchmark.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Benchmark 3 | weight: 2 4 | --- 5 | 6 | {{}} 7 | 8 | This document shows the performance benchmark of ArtiVC and alternatives. We choose these three tools to compare 9 | 10 | - [AWS CLI](https://aws.amazon.com/cli/) (2.4.18): AWS CLI is the AWS official tool to upload/download and manage s3. We use it as the baseline to compare with other tools. 11 | - [Rclone](https://rclone.org/) (v1.57.0) RClone is a command line program to manage files on cloud storage. It is just like rsync, but for cloud storage. 12 | - [DVC](https://dvc.org/) (v2.9.5): DVC is an open-source tool for data science and machine learning projects. It provides Git-like commands to version large files. 13 | 14 | We upload data to the same bucket and path. The following are the example commands for each tool. 15 | 16 | 17 | {{}} 18 | 19 | {{}} 20 | ```bash 21 | # upload 22 | aws s3 cp --recursive . s3://art-vcs/benchmark/ 23 | 24 | # download 25 | aws s3 cp --recursive s3://art-vcs/benchmark/ . 26 | ``` 27 | {{< /tab >}} 28 | 29 | {{}} 30 | ```bash 31 | # upload 32 | rclone copy . s3:art-vcs 33 | 34 | # download 35 | rclone copy s3:art-vcs . 36 | ``` 37 | {{< /tab >}} 38 | 39 | {{}} 40 | ```bash 41 | # init 42 | git init 43 | dvc init 44 | dvc remote add -d s3 s3://art-vcs/benchmark/ 45 | 46 | # upload 47 | dvc add data 48 | dvc push 49 | 50 | # download 51 | dvc pull 52 | ``` 53 | {{< /tab >}} 54 | 55 | {{}} 56 | ```bash 57 | # init 58 | avc init s3://art-vcs/benchmark/ 59 | 60 | # upload 61 | avc push 62 | 63 | # download 64 | avc pull 65 | ``` 66 | {{< /tab >}} 67 | 68 | {{< /tabs >}} 69 | 70 | Testing Environment 71 | 72 | {{}} 73 | 74 | {{}} 75 | 76 | ```shell 77 | Hardware Overview: 78 | Model Name: MacBook Pro 79 | Model Identifier: MacBookPro18,3 80 | Chip: Apple M1 Pro 81 | Total Number of Cores: 10 (8 performance and 2 efficiency) 82 | Memory: 32 GB 83 | ``` 84 | {{< /tab >}} 85 | 86 | {{}} 87 | 88 | ```shell 89 | $ speedtest 90 | Speedtest by Ookla 91 | 92 | Server: Chunghwa Mobile - Hsinchu (id = 18450) 93 | ISP: Chunghwa Telecom 94 | Latency: 5.25 ms (0.59 ms jitter) 95 | Download: 224.54 Mbps (data used: 198.9 MB) 96 | Upload: 79.58 Mbps (data used: 136.3 MB) 97 | Packet Loss: Not available. 98 | Result URL: https://www.speedtest.net/result/c/4dd6b2ec-61d1-4b7b-a179-811951412997 99 | ``` 100 | {{< /tab >}} 101 | 102 | {{}} 103 | The region is `ap-northeast-1` 104 | {{< /tab >}} 105 | 106 | {{< /tabs >}} 107 | 108 | # Test Plans 109 | 110 | We test these three scenarios 111 | 112 | 1. **[Transfer large size files](#transfer-large-size-files)** 113 | 2. **[Transfer large amount of files](#transfer-large-amount-of-files)** 114 | 3. **[Determining which files to upload and download](#determining-which-files-to-upload)** 115 | 116 | ## Transfer large size files 117 | 118 | In this test case, we create 10 x 100mb files under `data/` folder. S3 supports [multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/userguide/mpuoverview.html). If the client supports multipart upload and supports parallel upload and download, the result should be similar. We also try to transfer twice, the second trial is to test if the tool can know there is existing content at the same location. 119 | 120 | Prepare Data 121 | ```bash 122 | # create 10 x 100mb files in data/ 123 | seq 10 | xargs -I {} dd if=/dev/urandom of=data/100m_{} bs=100m count=1 124 | ``` 125 | 126 | Commands 127 | {{}} 128 | {{}} 129 | 130 | ```bash 131 | # upload 132 | time aws s3 cp --recursive . s3://art-vcs/benchmark/ 133 | 134 | # download 135 | time aws s3 cp --recursive s3://art-vcs/benchmark/ . 136 | ``` 137 | {{}} 138 | 139 | {{}} 140 | 141 | ```bash 142 | # upload 143 | time rclone copy --progress . s3:art-vcs/benchmark/ 144 | 145 | # download 146 | time rclone copy --progress s3:art-vcs/benchmark/ . 147 | ``` 148 | {{}} 149 | 150 | {{}} 151 | 152 | 153 | ```bash 154 | # init 155 | git init 156 | dvc init 157 | dvc remote add -d s3 s3://art-vcs/benchmark/ 158 | 159 | # track data and compute hash 160 | dvc add data 161 | 162 | # upload 163 | time dvc push 164 | 165 | # download 166 | rm -rf .dvc/cache 167 | time dvc pull 168 | ``` 169 | {{}} 170 | 171 | {{}} 172 | 173 | 174 | ```bash 175 | # init 176 | avc init s3://art-vcs/benchmark/large-files/artivc/ 177 | 178 | # upload 179 | time avc push 180 | 181 | # download 182 | time avc pull 183 | ``` 184 | {{}} 185 | {{}} 186 | 187 | Result 188 | 189 | ![](../images/benchmark1.svg) 190 | 191 | | | Upload (1st / 2nd)| Download (1st / 2nd) | 192 | | --- | --- | --- | 193 | | AWS CLI | 1m43s / 1m43s | 1m53s / 1m52s | 194 | | Rclone | 1m59s / 2.6s | 2m22s / 2.6s | 195 | | DVC | 1m44s / 1s | 1m27s / <1s | 196 | | ArtiVC | 1m43s / <1s | 1m30s / <1s | 197 | 198 | 199 | For the first trial of download, all four tools have a similar result. But for the second trial, all tools except AWS CLI would check the content hash from local to remote. ArtiVC has the best performance because the file list and hashes are stored in ONE commit object. That is, there is only one API request required for this test case. 200 | 201 | ## Transfer large amount of files 202 | 203 | In this test case, we create 1,000 x 1kb files under `data/` folder. It is quite common for a dataset to contain many small files. Because the file size is small, if the tool can reduce the number of API requests, it would get better performance. 204 | 205 | Prepare data 206 | 207 | ```bash 208 | # create 1000 files with 1k size 209 | seq 1000 | xargs -I {} dd if=/dev/urandom of=data/1k_{} bs=1k count=1 210 | ``` 211 | 212 | 213 | Commands 214 | {{}} 215 | 216 | {{}} 217 | 218 | ```bash 219 | # upload 220 | time aws s3 cp --recursive . s3://art-vcs/benchmark/ 221 | 222 | # download 223 | time aws s3 cp --recursive s3://art-vcs/benchmark/ . 224 | ``` 225 | {{}} 226 | 227 | {{}} 228 | 229 | ```bash 230 | # upload 231 | time rclone copy --progress . s3:art-vcs/benchmark/ 232 | 233 | # download 234 | time rclone copy --progress s3:art-vcs/benchmark/ . 235 | ``` 236 | {{}} 237 | 238 | {{}} 239 | 240 | ```bash 241 | # init 242 | git init 243 | dvc init 244 | dvc remote add -d s3 s3://art-vcs/benchmark/ 245 | 246 | # track data and compute hash 247 | dvc add data 248 | 249 | # upload 250 | time dvc push 251 | 252 | # download 253 | rm -rf .dvc/cache 254 | time dvc pull 255 | ``` 256 | {{}} 257 | 258 | {{}} 259 | 260 | ```bash 261 | # init 262 | avc init s3://art-vcs/benchmark/ 263 | 264 | # upload 265 | time avc push 266 | 267 | # download 268 | time avc pull 269 | ``` 270 | {{}} 271 | {{}} 272 | 273 | Result 274 | 275 | ![](../images/benchmark2.svg) 276 | 277 | | | Upload (1st / 2nd) | Download (1st / 2nd) | 278 | | --- | --- | --- | 279 | | aws cli | 16s / 16s | 10s / 10s | 280 | | rclone | 51s / 12s | 20s / 12s | 281 | | dvc | 20s / 1s | 18s / <1s | 282 | | artivc | 12s / <1s | 8s / <1s | 283 | 284 | 285 | For the first trial, RClone gets the worst performance because it own have four threads (transfers) by default. We can use `--transfers 10` option to increase the transfer speed. ArtiVC has the best performance for the first trial because there is an optimization for the first push if there is no commit in the remote repository. It will upload all the files without a content check. 286 | 287 | For the second trial, ArtiVC and DVC have an efficient way to know no transfer is required. RClone would check all the content hash one by one. 288 | 289 | ## Determining which files to upload 290 | 291 | To transfer files is costly. If we can know the files are existing in local or remote in advance, we can prevent unnecessary transfer. This test is to understand how much time to take for difference checking. This test is inspired by the article [Cloud Data Sync Methods and Benchmark: DVC vs Rclone](https://dvc.org/blog/dvc-vs-rclone). 292 | 293 | To compare the local and remote files, there are two methods 294 | 295 | 1. Check the content hash with remote for each local file. The method would get better performance if there are few files in local and a lot of files in the remote. 296 | 2. List the remote files and compare the remote hash with local files. The performance of the method is linearly related to the amount of data on the remote. 297 | 298 | There are three scenarios to test 299 | 300 | 1. 50000 local files and no remote files 301 | 1. 500 local files and 50000 remote files 302 | 1. 1 local file and 50000 remote files 303 | 304 | 305 | Prepare data 306 | 307 | ```bash 308 | # create 50,000 files with 1k size 309 | seq 50000 | xargs -I {} dd if=/dev/urandom of=data/1k_{} bs=1k count=1 310 | ``` 311 | 312 | Commands 313 | {{}} 314 | {{}} 315 | 316 | 317 | ```bash 318 | time rclone copy --dry-run s3:art-vcs/benchmark/ . 319 | ``` 320 | {{}} 321 | {{}} 322 | 323 | 324 | ```bash 325 | time rclone copy --dry-run —no-traverse s3:art-vcs/benchmark/ . 326 | ``` 327 | {{}} 328 | {{}} 329 | 330 | ```bash 331 | time dvc status -c 332 | ``` 333 | {{}} 334 | {{}} 335 | 336 | ```bash 337 | time avc push --dry-run 338 | ``` 339 | {{}} 340 | {{}} 341 | 342 | Result 343 | 344 | ![](../images/benchmark3.svg) 345 | | | local 50000 / remote 0 | local 500 / remote 50000 | local 1 / remote 50000 | 346 | | --- | --- | --- | --- | 347 | | rclone | 3s | 16s | 11s | 348 | | rclone (—no-traverse) | 7m48s | 6.6s | 2s | 349 | | dvc | 3s | 6.6s | 2.2s | 350 | | artivc | 1.1s | <1s | <1s | 351 | 352 | 353 | Just like the [blog post](https://dvc.org/blog/dvc-vs-rclone), DVC uses an adaptive method to query data from remote, but ArtiVC still outperforms DVC, why? The reason is DVC uses a `data.dvc` file holding the content hash of a version of the folder, and the file list and md5 hashes are stored in a `.dir` file. Even though there is the file list stored in the `.dir`, it still cannot guarantee that all the files are available in the remote. DVC still needs to use one of the two methods to synchronize the status between local and remote. 354 | 355 | ArtiVC uses another way. ArtiVC is a centralized version control system and the commit object is stored in this repository. All push commands should guarantee that all files should be successfully uploaded to the repository and then the commit object can be uploaded to the repository. So if we can get the commit object from the repo, we can say that all the files listed in the commit objects are available in the repo. There is no additional need to check the existence one by one. 356 | 357 | 358 | # Conclusions 359 | 360 | From the benchmark, we know ArtiVC has a similar performance as AWS CLI while downloading and uploading the data. Using the commit object, we can easily manipulate the changeset with only one API call, no matter how many objects in local or remote. -------------------------------------------------------------------------------- /docs/content/en/design/faq.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: FAQ 3 | weight: 2 4 | --- 5 | 6 | ### Q: Why don't we use object storage directly 7 | Yes. we can use object storage to version the artifact using different prefixes. However, it does not support atomic operation for multi-files upload. That is, we cannot know if there is one file missing when uploading, or if there are any files added to the folder. In ArtiVC, a commit is an atomic operation to advance one version. 8 | 9 | ### Q: Why don't we use git 10 | Git is not suitable for [versioning big files](https://stackoverflow.com/questions/17888604/git-with-large-files). Whenever we clone a git repository, it will download ALL files from every branch and commit to the local. 11 | 12 | There are good solutions like [Git LFS](https://git-lfs.github.com/) or [DVC](https://dvc.org/) out there to version large files in the git repository. However, we consider that if we require a dependency of some storage like s3 or NFS, why don't we do the versioning just only on the storage? 13 | 14 | ### Q: Can we do the versioning with multiple datasets or models? 15 | Yes. Just create different repositories with different prefixes in the same s3 bucket or different folders in NFS 16 | 17 | ### Q: What storage does ArtiVC support? 18 | Currently, we can support local, NFS(by local), AWS S3, GCS(Google Cloud Storage) and Azure Blob Storage. 19 | 20 | ### Q: Do we support Windows? 21 | Not yet, we will implement and test it soon. 22 | -------------------------------------------------------------------------------- /docs/content/en/design/how-it-works.md: -------------------------------------------------------------------------------- 1 | --- 2 | weight: 1 3 | --- 4 | There are three major types of data are stored in the artifact repository 5 | - **Blob**: the actual object to be uploaded to object storage. The path is determined by the **SHA1** hash of the content. We use the same path strategy as [git](https://git-scm.com/book/zh-tw/v2/Git-Internals-Git-Objects). 6 | - **Commit**: whenever the client pushes a commit to the repository, it creates a commit object to the store. It contains the timestamp, message, and the list of blobs. A commit is also stored at the path of the content hash. It makes it impossible to change the content because the hash would be invalid. The content of a commit is a gzip-compressed JSON content. 7 | - **Reference**: References are the named tags or time strings to link to a commit. It makes it possible to do the versioning. A special kind of reference `latest` is used by default whenever the client pushes a commit to an artifact repository. 8 | 9 | ![](../images/artivc-overview.png) 10 | 11 | 12 | 13 | The sample file layout in the artifact repository 14 | ``` 15 | 16 | ├── commits 17 | │ ├── 428f898a6792f37cf6805776098387783fdcaa87 18 | │ └── b8d996e998197f8be65f0a0e6ceaa2c7b26bfd11 19 | ├── objects 20 | │ ├── 03 21 | │ │ └── cfd743661f07975fa2f1220c5194cbaff48451 22 | │ ├── 23 23 | │ │ └── 715d393e1b723b249642a586597426885f64d1 24 | │ ├── 2b 25 | │ │ └── 77b0d6a5f3bd047fb2c6e44bbb22822f773f94 26 | │ ├── 32 27 | │ │ └── e2eb1b2f4b757fcc3e112d57ca702c65dad526 28 | │ ├── 6d 29 | │ │ └── 7cb1f44cb598a0db93f912791a1c20e3bd7c6b 30 | │ ├── 84 31 | │ │ └── 44869206da2a25b8ee79a8959ed4b34144f2a8 32 | │ ├── ac 33 | │ │ └── 9bcb803e59cd45610d87f2b683319420e7f76b 34 | │ └── d7 35 | │ └── 153b344c84ae187e2a894688310f1956dc45b7 36 | └── refs 37 | ├── latest 38 | └── tags 39 | ├── v1.0.0 40 | └── v1.0.1 41 | ``` 42 | 43 | The sample content of a commit 44 | ``` 45 | { 46 | "createdAt": "2022-02-06T00:34:45.406454+08:00", 47 | "messaage": null, 48 | "blobs": [ 49 | { 50 | "path": "README.md", 51 | "hash": "32e2eb1b2f4b757fcc3e112d57ca702c65dad526", 52 | "mode": 420, 53 | "size": 6148 54 | }, 55 | { 56 | "path": "front.png", 57 | "hash": "ac9bcb803e59cd45610d87f2b683319420e7f76b", 58 | "mode": 420, 59 | "size": 6130505 60 | }, 61 | { 62 | "path": "back.png", 63 | "hash": "d7153b344c84ae187e2a894688310f1956dc45b7", 64 | "mode": 420, 65 | "size": 696686 66 | }, 67 | { 68 | "path": "tmp", 69 | "hash": "03cfd743661f07975fa2f1220c5194cbaff48451", 70 | "mode": 420, 71 | "size": 4 72 | }, 73 | { 74 | "path": "screen.png", 75 | "hash": "6d7cb1f44cb598a0db93f912791a1c20e3bd7c6b", 76 | "mode": 420, 77 | "size": 305686 78 | }, 79 | { 80 | "path": "view/1.png", 81 | "hash": "8444869206da2a25b8ee79a8959ed4b34144f2a8", 82 | "mode": 420, 83 | "size": 578961 84 | }, 85 | { 86 | "path": "view/2.png", 87 | "hash": "2b77b0d6a5f3bd047fb2c6e44bbb22822f773f94", 88 | "mode": 420, 89 | "size": 214683 90 | } 91 | ] 92 | } 93 | ``` -------------------------------------------------------------------------------- /docs/content/en/design/images/artiv-overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/content/en/design/images/artiv-overview.png -------------------------------------------------------------------------------- /docs/content/en/design/mlops.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: MLOps 3 | weight: 5 4 | --- 5 | 6 | Machine learning application is a data-driven approach to solving problems. The MLOps is a methodology to has a feedback loop from data, model training, evaluation, model publish, deploy, monitoring. There are three core components in an ML application. 7 | 8 | - Code 9 | - Datasets 10 | - Models 11 | 12 | The code can be training code, application code. Mostly, it is versioned by git and we have been familiar with the way to version it. And we also use git as the single source of truth to drive the whole DevOps lifecycle. 13 | 14 | However, for datasets and models, there is still no defacto solution to version them. Usually, these data are stored in cloud object storage, on-premise object storage like MinIO, or NFS. There is still a gap between data storage and version metadata storage. Here is why we would like to build the *ArtiVC*. 15 | 16 | In addition, we are thinking about how to drive the automation when an artifact store event is triggered. In git, we can trigger a job whenever a git event. In the artifact store, we lack the fundamentals to trigger this event. *ArtiVC* reference the *git* design and provide the *commits* and *references* primitives to make it possible to define a commit or a version that is created. It makes it possible to listen to the object storage or the file system event to trigger an automation job accordingly. -------------------------------------------------------------------------------- /docs/content/en/posts/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Blogs 3 | type: posts 4 | weight: 10 5 | geekdocHidden: true 6 | --- 7 | -------------------------------------------------------------------------------- /docs/content/en/posts/initial-release.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: The first post 3 | type: posts 4 | date: 2022-03-15 5 | --- 6 | 7 | Welcome th the ArtiVC document site. The site is built by excellent project [hugo](https://gohugo.io/) and the beautiful theme by [geekdocs](https://geekdocs.de/) 8 | -------------------------------------------------------------------------------- /docs/content/en/posts/rename-project.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Renamed to ArtiVC 3 | type: posts 4 | date: 2022-3-31 5 | --- 6 | 7 | Today, we are pleased to announce our project name is changed from `ArtiV` to `ArtiVC`. The new website is also published at https://artivc.io. 8 | -------------------------------------------------------------------------------- /docs/content/en/usage/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Usage 3 | weight: 1 4 | --- 5 | -------------------------------------------------------------------------------- /docs/content/en/usage/cheatsheet.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Cheatsheet 3 | weight: 9999 4 | --- 5 | 6 | Please see the [command reference](../../commands/avc) for the detail 7 | 8 | ![Cheatsheet](../images/cheatsheet.png) 9 | -------------------------------------------------------------------------------- /docs/content/en/usage/dryrun.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Dry Run 3 | weight: 11 4 | --- 5 | 6 | Pushing and pulling data is time-consuming. And need to be double-checked before transferring. Dry-run is the feature that allows listing the changeset before sending. 7 | 8 | 9 | ## Push 10 | 11 | 1. Dry run before pushing 12 | ```shell 13 | avc push --dry-run 14 | ``` 15 | 16 | 1. Do the actual push 17 | ``` 18 | avc push 19 | ``` 20 | 21 | ## Pull 22 | 23 | 1. Dry run before pulling 24 | ```shell 25 | avc pull -dry-run 26 | # or check in delete mode 27 | # avc pull --delete -dry-run 28 | ``` 29 | 30 | 1. Do the actual pull 31 | 32 | ```shell 33 | avc pull 34 | # avc pull --delete 35 | ``` 36 | 37 | -------------------------------------------------------------------------------- /docs/content/en/usage/expose.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Expose the data 3 | weight: 20 4 | --- 5 | 6 | ArtiVC repository can be exposed as an HTTP endpoint. In S3, we can just make the bucket and give the data consumer the HTTP endpoint of the repository. In this way, we can download data through CDN or other reverse proxies. 7 | 8 | 1. [Make your S3 bucket public](https://aws.amazon.com/premiumsupport/knowledge-center/read-access-objects-s3-bucket/?nc1=h_ls) 9 | 1. Copy the public URL of your repository. For example 10 | ```shell 11 | https://mybucket.s3.ap-northeast-1.amazonaws.com/datasets/flowers-classification 12 | ``` 13 | 1. Download the data 14 | ```shell 15 | avc get -o /tmp/output https://mybucket.s3.ap-northeast-1.amazonaws.com/datasets/flowers-classification 16 | ``` 17 | -------------------------------------------------------------------------------- /docs/content/en/usage/getting-started.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Getting Started 3 | weight: 1 4 | --- 5 | 6 | 7 | 8 | {{< toc >}} 9 | 10 | 11 | # Install ArtiVC 12 | 13 | ## Install from Release Page 14 | 15 | Download the latest ArtiVC in the [release](https://github.com/InfuseAI/ArtiVC/releases) page 16 | 17 | ## Install by homebrew 18 | 19 | ```shell 20 | brew tap infuseai/artivc 21 | brew install artivc 22 | ``` 23 | 24 | # Configuration 25 | Here we describe how to configure credentials to access the remote backend. The principle of ArtiVC is "Use your tool's config". This allows you to access among the toolchains painlessly. 26 | 27 | {{}} 28 | {{}} 29 | No configuration required 30 | {{}} 31 | 32 | {{}} 33 | 1. Configure the `~/.ssh/config` 34 | ```bash 35 | Host myserver 36 | HostName myserver.hosts 37 | User myname 38 | IdentityFile ~/.ssh/id_ed25519 39 | ``` 40 | 1. Check if you can access the SSH server successfully 41 | ``` 42 | ssh myserver 43 | ``` 44 | 45 | For more information, please see the [Remote Filesystem (SSH) backend](../../backends/ssh) 46 | {{}} 47 | 48 | {{}} 49 | 1. Install the [AWS CLI](https://aws.amazon.com/cli/) 50 | 2. Configure the AWS CLI 51 | ``` 52 | aws configure 53 | ``` 54 | 3. Check current config 55 | ``` 56 | aws configure list 57 | ``` 58 | 59 | For more information, please see the [AWS S3 backend](../../backends/s3) 60 | 61 | {{}} 62 | 63 | {{}} 64 | 1. Install the [glcoud CLI](https://cloud.google.com/sdk/gcloud) 65 | 2. Login the application default credential 66 | ``` 67 | gcloud auth application-default login 68 | ``` 69 | 3. Check the current credential is available 70 | ``` 71 | gcloud auth application-default print-access-token 72 | ``` 73 | 74 | For more information, please see the [Google Cloud Storage backend](../../backends/gcs) 75 | {{}} 76 | 77 | {{}} 78 | 1. Install the [Azure CLI](https://docs.microsoft.com/cli/azure/install-azure-cli) 79 | 2. Login the Azure CLI 80 | ``` 81 | az login 82 | ``` 83 | 3. Check the login status 84 | ``` 85 | az account show 86 | ``` 87 | 88 | For more information, please see the [Azure Blob Storage](../../backends/azureblob) 89 | {{}} 90 | {{}} 91 | 92 | # Quick Start 93 | ## Push data 94 | 1. Prepare your data. We put data in the folder `/tmp/artivc/workspace` 95 | ```shell 96 | mkdir -p /tmp/artivc/workspace 97 | cd /tmp/artivc/workspace 98 | ``` 99 | 100 | put the first version of data 101 | ```shell 102 | echo a > a 103 | echo b > b 104 | echo c > c 105 | ``` 106 | 107 | 1. Select a folder as the repository. Here we use `/tmp/artivc/repo` 108 | ```shell 109 | mkdir -p /tmp/artivc/repo 110 | ``` 111 | 112 | 1. Init the workspace 113 | 114 | {{}} 115 | {{}} 116 | ```shell 117 | # in /tmp/artivc/workspace 118 | avc init /tmp/artivc/repo 119 | ``` 120 | {{}} 121 | 122 | {{}} 123 | ```shell 124 | # in /tmp/artivc/workspace 125 | avc init :path/to/repo 126 | ``` 127 | {{}} 128 | 129 | {{}} 130 | ```shell 131 | # in /tmp/artivc/workspace 132 | avc init s3:///path/to/repo 133 | ``` 134 | {{}} 135 | 136 | {{}} 137 | ```shell 138 | # in /tmp/artivc/workspace 139 | avc init gs:///path/to/repo 140 | ``` 141 | {{}} 142 | 143 | {{}} 144 | ```shell 145 | # in /tmp/artivc/workspace 146 | avc init https://.blob.core.windows.net//path/to/repo 147 | ``` 148 | {{}} 149 | 150 | {{}} 151 | 152 | 1. Push the data 153 | ```shell 154 | avc push 155 | ``` 156 | 157 | 1. See the commit log 158 | ```shell 159 | avc log 160 | ``` 161 | 162 | 1. Add more data to your repository 163 | 164 | ```shell 165 | echo "hello" > hello 166 | avc status 167 | ``` 168 | 169 | push to the remote 170 | ```shell 171 | avc push 172 | ``` 173 | 174 | 1. Tag a version 175 | 176 | ```shell 177 | avc tag v0.1.0 178 | ``` 179 | 180 | See the log 181 | ```shell 182 | avc log 183 | ``` 184 | 185 | ## Clone data from existing repository 186 | 1. Go to the folder to clone repository 187 | 188 | {{}} 189 | {{}} 190 | ```shell 191 | cd /tmp/artivc/ 192 | avc clone /tmp/artivc/repo another-workspace 193 | ``` 194 | {{}} 195 | 196 | {{}} 197 | ```shell 198 | cd /tmp/artivc/ 199 | avc clone :path/to/repo 200 | ``` 201 | {{}} 202 | 203 | {{}} 204 | ```shell 205 | cd /tmp/artivc/ 206 | avc clone s3:///path/to/repo 207 | ``` 208 | {{}} 209 | 210 | {{}} 211 | ```shell 212 | cd /tmp/artivc/ 213 | avc clone gs:///path/to/repo 214 | ``` 215 | {{}} 216 | 217 | {{}} 218 | ```shell 219 | cd /tmp/artivc/ 220 | avc clone https://.blob.core.windows.net//path/to/repo 221 | ``` 222 | {{}} 223 | 224 | {{}} 225 | 226 | Then the workspace is created, and the data is downloaded. 227 | 228 | 1. See the commit log 229 | ```shell 230 | cd another-workspace/ 231 | avc log 232 | ``` 233 | 234 | ## Download data 235 | 236 | 1. Download the latest version 237 | 238 | {{}} 239 | {{}} 240 | ```shell 241 | avc get -o /tmp/artivc/dl-latest /tmp/artivc/repo 242 | ``` 243 | {{}} 244 | 245 | {{}} 246 | ```shell 247 | avc get -o /tmp/artivc/dl-latest :path/to/repo 248 | ``` 249 | {{}} 250 | 251 | {{}} 252 | ```shell 253 | avc get -o /tmp/artivc/dl-latest s3:///path/to/repo 254 | ``` 255 | {{}} 256 | 257 | {{}} 258 | ```shell 259 | avc get -o /tmp/artivc/dl-latest gs:///path/to/repo 260 | ``` 261 | {{}} 262 | 263 | {{}} 264 | ```shell 265 | avc get -o /tmp/artivc/dl-latest https://.blob.core.windows.net//path/to/repo 266 | ``` 267 | {{}} 268 | 269 | {{}} 270 | 271 | check the content 272 | ```shell 273 | ls /tmp/artivc/dl-latest 274 | ``` 275 | 276 | 1. Or download the specific version 277 | 278 | {{}} 279 | {{}} 280 | ```shell 281 | avc get -o /tmp/artivc/dl-v0.1.0 /tmp/artivc/repo@v0.1.0 282 | ``` 283 | {{}} 284 | 285 | {{}} 286 | ```shell 287 | avc get -o /tmp/artivc/dl-v0.1.0 :path/to/repo@v0.1.0 288 | ``` 289 | {{}} 290 | 291 | {{}} 292 | ```shell 293 | avc get -o /tmp/artivc/dl-v0.1.0 s3:///path/to/repo@v0.1.0 294 | ``` 295 | {{}} 296 | 297 | {{}} 298 | ```shell 299 | avc get -o /tmp/artivc/dl-v0.1.0 gs:///path/to/repo@v0.1.0 300 | ``` 301 | {{}} 302 | 303 | {{}} 304 | ```shell 305 | avc get -o /tmp/artivc/dl-v0.1.0 https://.blob.core.windows.net//path/to/repo@v0.1.0 306 | ``` 307 | {{}} 308 | 309 | {{}} 310 | 311 | check the content 312 | ```shell 313 | ls /tmp/artivc/dl-v0.1.0 314 | ``` -------------------------------------------------------------------------------- /docs/content/en/usage/ignore-file.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Ignore File 3 | weight: 12 4 | --- 5 | 6 | Just like git, you can put a `.avcignore` file at the root of workspace to define the excluding list. The rule is the same as `.gitignore`. For more details, please check the [pattern format](https://git-scm.com/docs/gitignore#_pattern_format) in the git document. 7 | 8 | Here is the example: 9 | 10 | ```shell 11 | # Ignore files 12 | test 13 | path/to/my/file 14 | .DS_Store 15 | 16 | # Ignore folders. Use a forward slash at the end 17 | build/ 18 | path/to/my/folder/ 19 | /build/ 20 | 21 | # Ignore all file with extension '.py' 22 | *.py 23 | ```` 24 | 25 | 26 | -------------------------------------------------------------------------------- /docs/content/en/usage/images/cheatsheet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/content/en/usage/images/cheatsheet.png -------------------------------------------------------------------------------- /docs/content/en/usage/partial-download.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Partial Download 3 | weight: 13 4 | --- 5 | 6 | By default, ArtiVC download all files of a version. It also supports to download partial of the files in a commit. 7 | 8 | You can download partially: 9 | 10 | - by specifying multiple files. 11 | - by specifying wildcard-like `.gitignore` pattern. 12 | 13 | For more details, please check the [pattern format](https://git-scm.com/docs/gitignore#_pattern_format) in the git document. 14 | 15 | ## Usage 16 | ```shell 17 | # get 18 | avc get -o output repo -- path/to/file1 path/to/file2 data/ 19 | 20 | # pull 21 | avc pull -- path/to/partial 22 | avc pull v0.1.0 -- path/to/partial ... 23 | ``` 24 | 25 | Note: if you would like to do a partial download, please put the path list after the "--" flag terminator. 26 | 27 | 28 | ## SEE ALSO 29 | 30 | * [avc get](/commands/avc_get/) - Download data from a repository 31 | * [avc pull](/commands/avc_pull/) - Pull data from the repository 32 | -------------------------------------------------------------------------------- /docs/content/en/usage/windows-supports.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Windows Support 3 | weight: 100 4 | --- 5 | 6 | ArtiVC is supported on Windows platforms by running on the WSL. 7 | 8 | 9 | > WSL Prerequisites: 10 | > 11 | > You must be running Windows 10 version 2004 and higher (Build 19041 and higher) or Windows 11. 12 | 13 | 14 | ## What's WSL 15 | WSL is Windows Subsystem for Linux. It lets users work on a Linux environment on Windows directly. 16 | 17 | Check [official WSL site](https://docs.microsoft.com/en-us/windows/wsl/) for more information. 18 | 19 | 20 | ## How To 21 | You could download ArtiVC linux amd64/arm64 version binary from our [github release page](https://github.com/InfuseAI/ArtiVC/releases) depends on your platform architecture. 22 | 23 | After extracting ArtiVC binary from the archive, what you need to do is executing it on linux command line interface as usual. 24 | 25 | - extract the binary from archive 26 | ```shell 27 | $ tar zxvf ArtiVC-v0.4.0-linux-arm64.tar.gz 28 | ``` 29 | 30 | - execution 31 | ```shell 32 | $ ./avc version 33 | version.BuildInfo{"Version":"v0.4.0","GitCommit":"febfe3bc579d77130570ba7d12fcf404326b0f7d","GitTreeState":"clean","GoVersion":"go1.17.8"} 34 | ``` 35 | -------------------------------------------------------------------------------- /docs/content/en/use-cases/_index.md: -------------------------------------------------------------------------------- 1 | --- 2 | weight: 3 3 | --- -------------------------------------------------------------------------------- /docs/content/en/use-cases/backup.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Data Backup/Snapshot 3 | weight: 1 4 | --- 5 | 6 | Data backup is one of the most common requirement in different scenario. ArtiVC is a very simple tool to backup, or even snapshot, your data in the cloud storage. 7 | 8 | ## Snapshot the data 9 | 10 | 1. Init the repository 11 | 12 | ```shell 13 | avc init s3://mybucket/mydocuments 14 | ``` 15 | 1. Snapshot 16 | 17 | ``` 18 | avc push 19 | ``` 20 | 1. Optionally to tag current snapshot as a version 21 | ``` 22 | avc tag '2022-Q1' 23 | ``` 24 | 25 | ## Rollback 26 | 27 | 1. See the snapshot timeline 28 | 29 | ``` 30 | avc log 31 | ``` 32 | 33 | 1. Rollback. Use `--delete` to delete local files which are not listed in the snapshot version. 34 | 35 | ``` 36 | avc pull --delete 49175d02 37 | ``` 38 | 39 | ## Get a file from a version 40 | 41 | 1. Get a file from a given version 42 | 43 | ``` 44 | avc pull 49175d02 -- path/to/my/file 45 | ``` 46 | -------------------------------------------------------------------------------- /docs/content/en/use-cases/dataprep.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Dataset Preparation 3 | weight: 2 4 | --- 5 | 6 | Organizing dataset can be a hassle, especially as data is constantly evolving. ArtiVC is the most suitable tool to organize the dataset. There are the following benefits. 7 | 8 | - No need to transfer files with the existing content. Even you rename or copy to different folder. ArtiVC knows they are the same content. It is common to move or keep the same images, videos when the dataset is evolving. 9 | - Version tagging. If there is a stable version of dataset, we can tag a commit as the human-readable version. 10 | 11 | ## Prepare a dataset 12 | 13 | Here are the common steps to prepare a dataset 14 | 15 | 1. Create a dataset folder and use subfolders as image labels 16 | 1. Initiate the workspace. 17 | ```shell 18 | avc init s3://mybucket/datasets/flowers-classification 19 | ``` 20 | 1. Push your first release 21 | ```shell 22 | avc push -m 'first version' 23 | ``` 24 | 1. Clean the dataset, and move the wrong-classified data 25 | 1. Push the dataset again 26 | ```shell 27 | # See what data will be pushed 28 | avc status 29 | # Push 30 | avc push -m 'my second version' 31 | ``` 32 | 1. If there are new versions is pushed by others, sync the data set with remote 33 | ```shell 34 | # Check the difference 35 | avc pull --dry-run 36 | # Sync with remote 37 | avc pull 38 | # or use the delete mode 39 | # avc pull --delete --dry-run 40 | # avc pull --delete 41 | ``` 42 | 1. tag the version 43 | ```shell 44 | avc push 45 | avc tag v0.1.0 46 | ``` 47 | and see the change 48 | ```shell 49 | avc log 50 | ``` 51 | 52 | ## Clone the dataset 53 | 54 | Use the dataset in the other machine 55 | 56 | ```shell 57 | avc clone s3://mybucket/datasets/flowers-classification 58 | cd flowers-classification 59 | ``` -------------------------------------------------------------------------------- /docs/content/en/use-cases/experiment.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: ML Experiments 3 | weight: 3 4 | --- 5 | Here we use three repositories 6 | - Dataset for training 7 | - A base model for transfer learning of fine-tuning 8 | - Experiment output. Versioned by the timestamp of an experiment. 9 | 10 | 11 | In this use case, we use `get` and `put` commands to simplify the commands for programmatic use cases. 12 | 13 | 1. Clone the training code 14 | ```shell 15 | git clone https://github.com/my-org/my-ml-project.git 16 | cd my-ml-project 17 | ``` 18 | 1. Download the dataset and the base model 19 | ```shell 20 | avc get -o dataset/ s3://mybucket/datasets/flowers-classification@v0.1.0 21 | avc get -o base/ s3://mybucket/models/my-base-model@v0.3.0 22 | ``` 23 | 1. Train and output your training result (trained model, experiment log, hyperparams, etc) to `artifacts/` folder 24 | ```shell 25 | python ./train.py 26 | ``` 27 | 1. Upload the artifacts 28 | ```shell 29 | avc put artifacts/ s3://mybucket/experiments/project1@202220303-100504 30 | ``` 31 | -------------------------------------------------------------------------------- /docs/data/menu/extra.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | header: 3 | - name: GitHub 4 | ref: https://github.com/InfuseAI/ArtiVC 5 | icon: gdoc_github 6 | external: true 7 | - name: Blogs 8 | ref: posts 9 | icon: gdoc_notification -------------------------------------------------------------------------------- /docs/data/menu/main.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | main: 3 | -------------------------------------------------------------------------------- /docs/data/menu/more.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | more: 3 | - name: News 4 | ref: "/posts" 5 | icon: "gdoc_notification" 6 | - name: Releases 7 | ref: "https://github.com/infuseai/artivc/releases" 8 | external: true 9 | icon: "gdoc_download" 10 | - name: View Source 11 | ref: "https://github.com/infuseai/artivc" 12 | external: true 13 | icon: "gdoc_github" 14 | -------------------------------------------------------------------------------- /docs/layouts/shortcodes/sprites.html: -------------------------------------------------------------------------------- 1 |
2 | {{ range $key, $value := .Site.Data.sprites.geekdoc }} 3 |
4 |
5 | 6 |
7 |
8 | #{{ (replace $key "_" "_") | safeHTML }} 9 |
10 |
11 | {{ end }} 12 |
13 | -------------------------------------------------------------------------------- /docs/static/.htaccess: -------------------------------------------------------------------------------- 1 | ErrorDocument 404 /404.html 2 | 3 | ExpiresActive On 4 | ExpiresDefault "access plus 600 seconds" 5 | ExpiresByType text/css "access plus 1 week" 6 | ExpiresByType text/javascript "access plus 1 month" 7 | ExpiresByType text/html "access plus 1 seconds" 8 | ExpiresByType application/javascript "access plus 1 month" 9 | ExpiresByType application/x-javascript "access plus 1 month" 10 | ExpiresByType image/gif "access plus 1 week" 11 | ExpiresByType image/jpeg "access plus 1 week" 12 | ExpiresByType image/png "access plus 1 week" 13 | ExpiresByType image/x-icon "access plus 1 month" 14 | ExpiresByType image/svg+xml "access plus 1 week" 15 | ExpiresByType application/x-font-woff "access plus 1 week" 16 | ExpiresByType application/font-woff2 "access plus 1 week" 17 | -------------------------------------------------------------------------------- /docs/static/ArtiVC_workspace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/static/ArtiVC_workspace.png -------------------------------------------------------------------------------- /docs/static/_includes/example.html.part: -------------------------------------------------------------------------------- 1 |

2 | Example HTML include 3 |

4 | 5 |

This is heading 4

6 |
This is heading 5
7 |
This is heading 6
8 | -------------------------------------------------------------------------------- /docs/static/_includes/example.md.part: -------------------------------------------------------------------------------- 1 | _**Example Mardown include**_ 2 | 3 | File including a simple Markdown table. 4 | 5 | | Head 1 | Head 2 | Head 3 | 6 | | ------ | ------ | ------ | 7 | | 1 | 2 | 3 | 8 | -------------------------------------------------------------------------------- /docs/static/brand.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/static/custom.css: -------------------------------------------------------------------------------- 1 | :root { 2 | --code-max-height: 60rem; 3 | } 4 | 5 | /* Light mode theming */ 6 | :root, 7 | :root[color-mode="light"] { 8 | --header-background: #25a476; 9 | --header-font-color: #ffffff; 10 | 11 | --body-background: #ffffff; 12 | --body-font-color: #343a40; 13 | 14 | --mark-color: #ffab00; 15 | 16 | --button-background: #62cb97; 17 | --button-border-color: #4ec58a; 18 | 19 | --link-color: #518169; 20 | --link-color-visited: #c54e8a; 21 | 22 | --code-background: #f5f6f8; 23 | --code-accent-color: #e3e7eb; 24 | --code-accent-color-lite: #eff1f3; 25 | 26 | --code-copy-font-color: #6b7784; 27 | --code-copy-border-color: #adb4bc; 28 | --code-copy-success-color: #00c853; 29 | 30 | --accent-color: #e9ecef; 31 | --accent-color-lite: #f8f9fa; 32 | 33 | --control-icons: #b2bac1; 34 | 35 | --footer-background: #2f333e; 36 | --footer-font-color: #ffffff; 37 | --footer-link-color: #ffcc5c; 38 | --footer-link-color-visited: #ffcc5c; 39 | } 40 | @media (prefers-color-scheme: light) { 41 | :root { 42 | --header-background: #25a476; 43 | --header-font-color: #ffffff; 44 | 45 | --body-background: #ffffff; 46 | --body-font-color: #343a40; 47 | 48 | --mark-color: #ffab00; 49 | 50 | --button-background: #62cb97; 51 | --button-border-color: #4ec58a; 52 | 53 | --link-color: #518169; 54 | --link-color-visited: #c54e8a; 55 | 56 | --code-background: #f5f6f8; 57 | --code-accent-color: #e3e7eb; 58 | --code-accent-color-lite: #eff1f3; 59 | 60 | --code-copy-font-color: #6b7784; 61 | --code-copy-border-color: #adb4bc; 62 | --code-copy-success-color: #00c853; 63 | 64 | --accent-color: #e9ecef; 65 | --accent-color-lite: #f8f9fa; 66 | 67 | --control-icons: #b2bac1; 68 | 69 | --footer-background: #2f333e; 70 | --footer-font-color: #ffffff; 71 | --footer-link-color: #ffcc5c; 72 | --footer-link-color-visited: #ffcc5c; 73 | } 74 | } 75 | 76 | /* Dark mode theming */ 77 | :root[color-mode="dark"] { 78 | --header-background: #25a476; 79 | --header-font-color: #ffffff; 80 | 81 | --body-background: #343a40; 82 | --body-font-color: #ced3d8; 83 | 84 | --mark-color: #ffab00; 85 | 86 | --button-background: #62cb97; 87 | --button-border-color: #4ec58a; 88 | 89 | --link-color: #7ac29e; 90 | --link-color-visited: #c27a9e; 91 | 92 | --code-background: #2f353a; 93 | --code-accent-color: #262b2f; 94 | --code-accent-color-lite: #2b3035; 95 | 96 | --code-copy-font-color: #adb4bc; 97 | --code-copy-border-color: #808c98; 98 | --code-copy-success-color: #00c853; 99 | 100 | --accent-color: #2b3035; 101 | --accent-color-lite: #2f353a; 102 | 103 | --control-icons: #b2bac1; 104 | 105 | --footer-background: #2f333e; 106 | --footer-font-color: #ffffff; 107 | --footer-link-color: #ffcc5c; 108 | --footer-link-color-visited: #ffcc5c; 109 | } 110 | @media (prefers-color-scheme: dark) { 111 | :root { 112 | --header-background: #25a476; 113 | --header-font-color: #ffffff; 114 | 115 | --body-background: #343a40; 116 | --body-font-color: #ced3d8; 117 | 118 | --mark-color: #ffab00; 119 | 120 | --button-background: #62cb97; 121 | --button-border-color: #4ec58a; 122 | 123 | --link-color: #7ac29e; 124 | --link-color-visited: #c27a9e; 125 | 126 | --code-background: #2f353a; 127 | --code-accent-color: #262b2f; 128 | --code-accent-color-lite: #2b3035; 129 | 130 | --code-copy-font-color: #adb4bc; 131 | --code-copy-border-color: #808c98; 132 | --code-copy-success-color: #00c853; 133 | 134 | --accent-color: #2b3035; 135 | --accent-color-lite: #2f353a; 136 | 137 | --control-icons: #b2bac1; 138 | 139 | --footer-background: #2f333e; 140 | --footer-font-color: #ffffff; 141 | --footer-link-color: #ffcc5c; 142 | --footer-link-color-visited: #ffcc5c; 143 | } 144 | } 145 | 146 | .icon-grid { 147 | width: 8rem; 148 | height: 8rem; 149 | margin: 0.2em; 150 | text-align: center; 151 | padding: 0.3em; 152 | } 153 | 154 | .icon-grid__line { 155 | height: 4rem; 156 | } 157 | 158 | .icon-grid__line .icon { 159 | font-size: 3em; 160 | } 161 | 162 | .icon-grid__line--text { 163 | font-size: 0.8em; 164 | } 165 | 166 | img { 167 | box-shadow: 0 4px 8px 0 rgb(0 0 0 / 20%), 0 6px 20px 0 rgb(0 0 0 / 19%); 168 | } 169 | 170 | img.gdoc-brand__img { 171 | width: auto; 172 | box-shadow: none; 173 | } 174 | 175 | span.gdoc-brand__title { 176 | visibility: hidden; 177 | } 178 | 179 | span.gdoc-button--entry { 180 | background: var(--header-background); 181 | color: var(--header-font-color) 182 | } 183 | 184 | span.gdoc-button--entry:hover { 185 | background: var(--button-background); 186 | } 187 | -------------------------------------------------------------------------------- /docs/static/custom.css.example: -------------------------------------------------------------------------------- 1 | /* Global customization */ 2 | 3 | :root { 4 | --code-max-height: 60rem; 5 | } 6 | 7 | /* Light mode theming */ 8 | :root, 9 | :root[color-mode="light"] { 10 | --header-background: #4ec58a; 11 | --header-font-color: #ffffff; 12 | 13 | --body-background: #ffffff; 14 | --body-font-color: #343a40; 15 | 16 | --mark-color: #ffab00; 17 | 18 | --button-background: #62cb97; 19 | --button-border-color: #4ec58a; 20 | 21 | --link-color: #518169; 22 | --link-color-visited: #c54e8a; 23 | 24 | --code-background: #f5f6f8; 25 | --code-accent-color: #e3e7eb; 26 | --code-accent-color-lite: #eff1f3; 27 | 28 | --code-copy-font-color: #6b7784; 29 | --code-copy-border-color: #adb4bc; 30 | --code-copy-success-color: #00c853; 31 | 32 | --accent-color: #e9ecef; 33 | --accent-color-lite: #f8f9fa; 34 | 35 | --control-icons: #b2bac1; 36 | 37 | --footer-background: #2f333e; 38 | --footer-font-color: #ffffff; 39 | --footer-link-color: #ffcc5c; 40 | --footer-link-color-visited: #ffcc5c; 41 | } 42 | @media (prefers-color-scheme: light) { 43 | :root { 44 | --header-background: #4ec58a; 45 | --header-font-color: #ffffff; 46 | 47 | --body-background: #ffffff; 48 | --body-font-color: #343a40; 49 | 50 | --mark-color: #ffab00; 51 | 52 | --button-background: #62cb97; 53 | --button-border-color: #4ec58a; 54 | 55 | --link-color: #518169; 56 | --link-color-visited: #c54e8a; 57 | 58 | --code-background: #f5f6f8; 59 | --code-accent-color: #e3e7eb; 60 | --code-accent-color-lite: #eff1f3; 61 | 62 | --code-copy-font-color: #6b7784; 63 | --code-copy-border-color: #adb4bc; 64 | --code-copy-success-color: #00c853; 65 | 66 | --accent-color: #e9ecef; 67 | --accent-color-lite: #f8f9fa; 68 | 69 | --control-icons: #b2bac1; 70 | 71 | --footer-background: #2f333e; 72 | --footer-font-color: #ffffff; 73 | --footer-link-color: #ffcc5c; 74 | --footer-link-color-visited: #ffcc5c; 75 | } 76 | } 77 | 78 | /* Dark mode theming */ 79 | :root[color-mode="dark"] { 80 | --header-background: #4ec58a; 81 | --header-font-color: #ffffff; 82 | 83 | --body-background: #343a40; 84 | --body-font-color: #ced3d8; 85 | 86 | --mark-color: #ffab00; 87 | 88 | --button-background: #62cb97; 89 | --button-border-color: #4ec58a; 90 | 91 | --link-color: #7ac29e; 92 | --link-color-visited: #c27a9e; 93 | 94 | --code-background: #2f353a; 95 | --code-accent-color: #262b2f; 96 | --code-accent-color-lite: #2b3035; 97 | 98 | --code-copy-font-color: #adb4bc; 99 | --code-copy-border-color: #808c98; 100 | --code-copy-success-color: #00c853; 101 | 102 | --accent-color: #2b3035; 103 | --accent-color-lite: #2f353a; 104 | 105 | --control-icons: #b2bac1; 106 | 107 | --footer-background: #2f333e; 108 | --footer-font-color: #ffffff; 109 | --footer-link-color: #ffcc5c; 110 | --footer-link-color-visited: #ffcc5c; 111 | } 112 | @media (prefers-color-scheme: dark) { 113 | :root { 114 | --header-background: #4ec58a; 115 | --header-font-color: #ffffff; 116 | 117 | --body-background: #343a40; 118 | --body-font-color: #ced3d8; 119 | 120 | --mark-color: #ffab00; 121 | 122 | --button-background: #62cb97; 123 | --button-border-color: #4ec58a; 124 | 125 | --link-color: #7ac29e; 126 | --link-color-visited: #c27a9e; 127 | 128 | --code-background: #2f353a; 129 | --code-accent-color: #262b2f; 130 | --code-accent-color-lite: #2b3035; 131 | 132 | --code-copy-font-color: #adb4bc; 133 | --code-copy-border-color: #808c98; 134 | --code-copy-success-color: #00c853; 135 | 136 | --accent-color: #2b3035; 137 | --accent-color-lite: #2f353a; 138 | 139 | --control-icons: #b2bac1; 140 | 141 | --footer-background: #2f333e; 142 | --footer-font-color: #ffffff; 143 | --footer-link-color: #ffcc5c; 144 | --footer-link-color-visited: #ffcc5c; 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /docs/static/favicon/android-chrome-192x192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/static/favicon/android-chrome-192x192.png -------------------------------------------------------------------------------- /docs/static/favicon/android-chrome-512x512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/static/favicon/android-chrome-512x512.png -------------------------------------------------------------------------------- /docs/static/favicon/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/static/favicon/apple-touch-icon.png -------------------------------------------------------------------------------- /docs/static/favicon/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/static/favicon/favicon-16x16.png -------------------------------------------------------------------------------- /docs/static/favicon/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/static/favicon/favicon-32x32.png -------------------------------------------------------------------------------- /docs/static/favicon/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/static/favicon/favicon.ico -------------------------------------------------------------------------------- /docs/static/favicon/favicon.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/static/media/bundle-menu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/static/media/bundle-menu.png -------------------------------------------------------------------------------- /docs/static/media/file-tree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/static/media/file-tree.png -------------------------------------------------------------------------------- /docs/static/media/more-menu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/static/media/more-menu.png -------------------------------------------------------------------------------- /docs/static/socialartiv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/static/socialartiv.png -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/infuseai/artivc 2 | 3 | go 1.17 4 | 5 | require ( 6 | cloud.google.com/go/storage v1.21.0 7 | github.com/BurntSushi/toml v1.0.0 8 | github.com/aws/aws-sdk-go-v2/config v1.13.1 9 | github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.9.0 10 | github.com/aws/aws-sdk-go-v2/service/s3 v1.24.1 11 | github.com/fatih/color v1.13.0 12 | github.com/kevinburke/ssh_config v1.2.0 13 | github.com/pkg/sftp v1.13.4 14 | github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 15 | github.com/spf13/cobra v1.3.0 16 | github.com/stretchr/testify v1.7.0 17 | golang.org/x/crypto v0.0.0-20220331220935-ae2d96664a29 18 | google.golang.org/api v0.69.0 19 | ) 20 | 21 | require ( 22 | cloud.google.com/go v0.100.2 // indirect 23 | cloud.google.com/go/compute v1.2.0 // indirect 24 | cloud.google.com/go/iam v0.1.1 // indirect 25 | github.com/Azure/azure-sdk-for-go/sdk/azcore v0.21.1 // indirect 26 | github.com/Azure/azure-sdk-for-go/sdk/azidentity v0.13.0 // indirect 27 | github.com/Azure/azure-sdk-for-go/sdk/internal v0.9.2 // indirect 28 | github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v0.3.0 // indirect 29 | github.com/AzureAD/microsoft-authentication-library-for-go v0.4.0 // indirect 30 | github.com/aws/aws-sdk-go-v2 v1.13.0 // indirect 31 | github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.2.0 // indirect 32 | github.com/aws/aws-sdk-go-v2/credentials v1.8.0 // indirect 33 | github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.10.0 // indirect 34 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.4 // indirect 35 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.2.0 // indirect 36 | github.com/aws/aws-sdk-go-v2/internal/ini v1.3.5 // indirect 37 | github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.7.0 // indirect 38 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.7.0 // indirect 39 | github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.11.0 // indirect 40 | github.com/aws/aws-sdk-go-v2/service/sso v1.9.0 // indirect 41 | github.com/aws/aws-sdk-go-v2/service/sts v1.14.0 // indirect 42 | github.com/aws/smithy-go v1.10.0 // indirect 43 | github.com/cpuguy83/go-md2man/v2 v2.0.1 // indirect 44 | github.com/davecgh/go-spew v1.1.1 // indirect 45 | github.com/golang-jwt/jwt v3.2.1+incompatible // indirect 46 | github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect 47 | github.com/golang/protobuf v1.5.2 // indirect 48 | github.com/google/go-cmp v0.5.7 // indirect 49 | github.com/google/uuid v1.2.0 // indirect 50 | github.com/googleapis/gax-go/v2 v2.1.1 // indirect 51 | github.com/inconshreveable/mousetrap v1.0.0 // indirect 52 | github.com/jmespath/go-jmespath v0.4.0 // indirect 53 | github.com/kr/fs v0.1.0 // indirect 54 | github.com/kylelemons/godebug v1.1.0 // indirect 55 | github.com/mattn/go-colorable v0.1.12 // indirect 56 | github.com/mattn/go-isatty v0.0.14 // indirect 57 | github.com/pkg/browser v0.0.0-20210115035449-ce105d075bb4 // indirect 58 | github.com/pmezard/go-difflib v1.0.0 // indirect 59 | github.com/russross/blackfriday/v2 v2.1.0 // indirect 60 | github.com/spf13/pflag v1.0.5 // indirect 61 | go.opencensus.io v0.23.0 // indirect 62 | golang.org/x/net v0.0.0-20220407224826-aac1ed45d8e3 // indirect 63 | golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8 // indirect 64 | golang.org/x/sys v0.0.0-20220209214540-3681064d5158 // indirect 65 | golang.org/x/text v0.3.7 // indirect 66 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect 67 | google.golang.org/appengine v1.6.7 // indirect 68 | google.golang.org/genproto v0.0.0-20220216160803-4663080d8bc8 // indirect 69 | google.golang.org/grpc v1.44.0 // indirect 70 | google.golang.org/protobuf v1.27.1 // indirect 71 | gopkg.in/yaml.v2 v2.4.0 // indirect 72 | gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect 73 | ) 74 | -------------------------------------------------------------------------------- /internal/core/config.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "io/ioutil" 7 | "os" 8 | "path" 9 | "path/filepath" 10 | "strings" 11 | 12 | "github.com/BurntSushi/toml" 13 | gitignore "github.com/sabhiram/go-gitignore" 14 | ) 15 | 16 | func InitWorkspace(baseDir, repo string) error { 17 | config := map[string]interface{}{ 18 | "repo": map[string]interface{}{ 19 | "url": repo, 20 | }, 21 | } 22 | 23 | configPath := path.Join(baseDir, ".avc/config") 24 | err := mkdirsForFile(configPath) 25 | if err != nil { 26 | return err 27 | } 28 | 29 | f, err := os.Create(configPath) 30 | if err != nil { 31 | return err 32 | } 33 | 34 | if err := toml.NewEncoder(f).Encode(config); err != nil { 35 | return err 36 | } 37 | 38 | if err := f.Close(); err != nil { 39 | return err 40 | } 41 | return nil 42 | } 43 | 44 | type ArtConfig struct { 45 | config map[string]interface{} 46 | MetadataDir string 47 | BaseDir string 48 | } 49 | 50 | func NewConfig(baseDir, metadataDir, repoUrl string) ArtConfig { 51 | config := ArtConfig{ 52 | BaseDir: baseDir, 53 | MetadataDir: metadataDir, 54 | } 55 | config.config = make(map[string]interface{}) 56 | config.SetRepoUrl(repoUrl) 57 | return config 58 | } 59 | 60 | func LoadConfig(dir string) (ArtConfig, error) { 61 | load := func(dir string) (map[string]interface{}, error) { 62 | config := make(map[string]interface{}) 63 | configPath := path.Join(dir, ".avc/config") 64 | 65 | data, err := ioutil.ReadFile(configPath) 66 | if err != nil { 67 | return nil, err 68 | } 69 | 70 | err = toml.Unmarshal(data, &config) 71 | if err != nil { 72 | return nil, err 73 | } 74 | 75 | return config, nil 76 | } 77 | 78 | if dir == "" { 79 | var err2 error 80 | dir, err2 = os.Getwd() 81 | if err2 != nil { 82 | return ArtConfig{}, err2 83 | } 84 | } 85 | 86 | for { 87 | config, err := load(dir) 88 | var e *toml.ParseError 89 | if errors.As(err, &e) { 90 | fmt.Fprintf(os.Stderr, "cannot load the workspace config\n") 91 | return ArtConfig{}, err 92 | } 93 | 94 | if err == nil { 95 | return ArtConfig{config: config, BaseDir: dir, MetadataDir: path.Join(dir, ".avc")}, nil 96 | } 97 | 98 | newDir := filepath.Dir(dir) 99 | if dir == newDir { 100 | break 101 | } 102 | dir = newDir 103 | } 104 | 105 | err2 := ErrWorkspaceNotFound 106 | 107 | return ArtConfig{}, err2 108 | } 109 | 110 | func (config *ArtConfig) Set(path string, value interface{}) { 111 | var obj map[string]interface{} = config.config 112 | 113 | parts := strings.Split(path, ".") 114 | for i, p := range parts { 115 | if i == len(parts)-1 { 116 | obj[p] = value 117 | } else { 118 | if v, ok := obj[p].(map[string]interface{}); ok { 119 | obj = v 120 | } else { 121 | child := make(map[string]interface{}) 122 | obj[p] = child 123 | obj = child 124 | } 125 | } 126 | } 127 | } 128 | 129 | func (config *ArtConfig) Get(path string) interface{} { 130 | var obj interface{} = config.config 131 | var val interface{} = nil 132 | 133 | parts := strings.Split(path, ".") 134 | for _, p := range parts { 135 | if v, ok := obj.(map[string]interface{}); ok { 136 | obj = v[p] 137 | val = obj 138 | } else { 139 | return nil 140 | } 141 | } 142 | 143 | return val 144 | } 145 | 146 | func (config *ArtConfig) GetString(path string) string { 147 | var value string 148 | 149 | if config.Get(path) != nil { 150 | value = config.Get(path).(string) 151 | } 152 | 153 | return value 154 | } 155 | 156 | func (config *ArtConfig) RepoUrl() string { 157 | return config.GetString("repo.url") 158 | } 159 | 160 | func (config *ArtConfig) SetRepoUrl(repoUrl string) { 161 | config.Set("repo.url", repoUrl) 162 | } 163 | 164 | func (config *ArtConfig) Print() { 165 | var printChild func(string, interface{}) 166 | 167 | printChild = func(path string, obj interface{}) { 168 | if v, ok := obj.(map[string]interface{}); ok { 169 | for key, value := range v { 170 | if path == "" { 171 | printChild(key, value) 172 | } else { 173 | printChild(path+"."+key, value) 174 | } 175 | } 176 | } else { 177 | fmt.Printf("%s=%v\n", path, obj) 178 | } 179 | } 180 | 181 | printChild("", config.config) 182 | } 183 | 184 | func (config *ArtConfig) Save() error { 185 | configPath := path.Join(config.MetadataDir, "config") 186 | f, err := os.OpenFile(configPath, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0o644) 187 | if err != nil { 188 | return err 189 | } 190 | 191 | if err := toml.NewEncoder(f).Encode(config.config); err != nil { 192 | return err 193 | } 194 | 195 | err = f.Close() 196 | if err != nil { 197 | return err 198 | } 199 | 200 | return nil 201 | } 202 | 203 | type AvcIgnore = gitignore.GitIgnore 204 | 205 | func NewAvcIgnore(dir string) (*AvcIgnore, error) { 206 | avcIgnorePath := path.Join(dir, ".avcignore") 207 | 208 | avcIgnore, err := gitignore.CompileIgnoreFile(avcIgnorePath) 209 | 210 | if err != nil { 211 | return nil, err 212 | } 213 | 214 | return avcIgnore, nil 215 | } 216 | 217 | type AvcInclude = gitignore.GitIgnore 218 | 219 | func NewAvcInclude(filePath []string) *AvcInclude { 220 | filter := gitignore.CompileIgnoreLines(filePath...) 221 | 222 | return filter 223 | } 224 | -------------------------------------------------------------------------------- /internal/core/errors.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | ) 7 | 8 | var ( 9 | ErrWorkspaceNotFound = errors.New("not a workspace") 10 | ErrEmptyRepository = errors.New("no commit is found in the repository. please push data to repository first") 11 | ) 12 | 13 | type ReferenceNotFoundError struct { 14 | Ref string 15 | Err error 16 | } 17 | 18 | func (err ReferenceNotFoundError) Error() string { 19 | return fmt.Sprintf("reference not found: %s", err.Ref) 20 | } 21 | -------------------------------------------------------------------------------- /internal/core/hash.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "crypto/sha1" 5 | "fmt" 6 | "io" 7 | "os" 8 | ) 9 | 10 | func Sha1Sum(content []byte) string { 11 | hasher := sha1.New() 12 | hasher.Write(content) 13 | return fmt.Sprintf("%x", hasher.Sum(nil)) 14 | } 15 | 16 | func Sha1SumFromFile(path string) (string, error) { 17 | hasher := sha1.New() 18 | f, err := os.Open(path) 19 | if err != nil { 20 | return "", err 21 | } 22 | defer f.Close() 23 | if _, err := io.Copy(hasher, f); err != nil { 24 | return "", err 25 | } 26 | sum := hasher.Sum([]byte{}) 27 | return fmt.Sprintf("%x", sum), nil 28 | } 29 | -------------------------------------------------------------------------------- /internal/core/manager_test.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "os" 5 | "path/filepath" 6 | "testing" 7 | 8 | "github.com/stretchr/testify/assert" 9 | ) 10 | 11 | func TestPutGet(t *testing.T) { 12 | wp1 := t.TempDir() 13 | meta1 := t.TempDir() 14 | wp2 := t.TempDir() 15 | meta2 := t.TempDir() 16 | repo := t.TempDir() 17 | 18 | path := "test" 19 | content := "test-data" 20 | 21 | assert.NoError(t, writeFile([]byte(content), filepath.Join(wp1, path))) 22 | 23 | config := NewConfig(wp1, meta1, repo) 24 | mngr1, err := NewArtifactManager(config) 25 | assert.Empty(t, err) 26 | err = mngr1.Push(PushOptions{}) 27 | assert.Empty(t, err) 28 | 29 | config = NewConfig(wp2, meta2, repo) 30 | mngr2, err := NewArtifactManager(config) 31 | assert.Empty(t, err) 32 | err = mngr2.Pull(PullOptions{}) 33 | assert.Empty(t, err) 34 | 35 | data, err := readFile(filepath.Join(wp2, path)) 36 | assert.Empty(t, err) 37 | assert.Equal(t, string(data), content) 38 | 39 | _, err = os.Stat(filepath.Join(wp2, ".avc")) 40 | assert.True(t, os.IsNotExist(err)) 41 | } 42 | 43 | func TestPushPull(t *testing.T) { 44 | wp1 := t.TempDir() 45 | wp2 := t.TempDir() 46 | repo := t.TempDir() 47 | 48 | path := "test" 49 | content := "test-data" 50 | 51 | assert.NoError(t, writeFile([]byte(content), filepath.Join(wp1, path))) 52 | 53 | assert.NoError(t, InitWorkspace(wp1, repo)) 54 | config, _ := LoadConfig(wp1) 55 | mngr1, _ := NewArtifactManager(config) 56 | assert.NoError(t, mngr1.Push(PushOptions{})) 57 | 58 | assert.NoError(t, InitWorkspace(wp2, repo)) 59 | config, _ = LoadConfig(wp2) 60 | mngr2, _ := NewArtifactManager(config) 61 | assert.NoError(t, mngr2.Pull(PullOptions{})) 62 | 63 | data, _ := readFile(filepath.Join(wp2, path)) 64 | assert.Equal(t, string(data), content) 65 | 66 | _, err := os.Stat(filepath.Join(wp2, ".avc/config")) 67 | assert.False(t, os.IsNotExist(err)) 68 | } 69 | 70 | func TestPushWithIgnore(t *testing.T) { 71 | wp1 := t.TempDir() 72 | wp2 := t.TempDir() 73 | repo := t.TempDir() 74 | 75 | assert.NoError(t, writeFile([]byte("a"), filepath.Join(wp1, "a"))) 76 | assert.NoError(t, writeFile([]byte("b"), filepath.Join(wp1, "b"))) 77 | assert.NoError(t, writeFile([]byte("c"), filepath.Join(wp1, "c"))) 78 | 79 | avcIgnore := ` 80 | a 81 | e 82 | ` 83 | 84 | assert.NoError(t, writeFile([]byte(avcIgnore), filepath.Join(wp1, ".avcignore"))) 85 | 86 | assert.NoError(t, InitWorkspace(wp1, repo)) 87 | config, _ := LoadConfig(wp1) 88 | mngr1, _ := NewArtifactManager(config) 89 | err := mngr1.Push(PushOptions{}) 90 | assert.Empty(t, err) 91 | 92 | assert.NoError(t, InitWorkspace(wp2, repo)) 93 | config, _ = LoadConfig(wp2) 94 | mngr2, _ := NewArtifactManager(config) 95 | err = mngr2.Pull(PullOptions{}) 96 | assert.Empty(t, err) 97 | 98 | data, _ := readFile(filepath.Join(wp2, "a")) 99 | assert.Equal(t, "", string(data)) 100 | data, _ = readFile(filepath.Join(wp2, "b")) 101 | assert.Equal(t, "b", string(data)) 102 | data, _ = readFile(filepath.Join(wp2, "c")) 103 | assert.Equal(t, "c", string(data)) 104 | } 105 | 106 | func TestPullWithIgnore(t *testing.T) { 107 | wp1 := t.TempDir() 108 | wp2 := t.TempDir() 109 | repo := t.TempDir() 110 | 111 | // push 112 | assert.NoError(t, writeFile([]byte("a"), filepath.Join(wp1, "a"))) 113 | assert.NoError(t, writeFile([]byte("b"), filepath.Join(wp1, "b"))) 114 | assert.NoError(t, writeFile([]byte("c"), filepath.Join(wp1, "c"))) 115 | assert.NoError(t, InitWorkspace(wp1, repo)) 116 | config, _ := LoadConfig(wp1) 117 | mngr1, _ := NewArtifactManager(config) 118 | err := mngr1.Push(PushOptions{}) 119 | assert.Empty(t, err) 120 | 121 | // pull 122 | avcIgnore := ` 123 | a 124 | e 125 | ` 126 | assert.NoError(t, writeFile([]byte(avcIgnore), filepath.Join(wp2, ".avcignore"))) 127 | assert.NoError(t, writeFile([]byte("abc"), filepath.Join(wp2, "a"))) 128 | assert.NoError(t, writeFile([]byte("efg"), filepath.Join(wp2, "e"))) 129 | assert.NoError(t, InitWorkspace(wp2, repo)) 130 | config, _ = LoadConfig(wp2) 131 | mngr2, _ := NewArtifactManager(config) 132 | err = mngr2.Pull(PullOptions{}) 133 | assert.Empty(t, err) 134 | 135 | data, _ := readFile(filepath.Join(wp2, "a")) 136 | assert.Equal(t, "abc", string(data)) 137 | data, _ = readFile(filepath.Join(wp2, "c")) 138 | assert.Equal(t, "c", string(data)) 139 | data, _ = readFile(filepath.Join(wp2, "e")) 140 | assert.Equal(t, "efg", string(data)) 141 | } 142 | 143 | func TestSymlink(t *testing.T) { 144 | var err error 145 | wp1 := t.TempDir() 146 | wp2 := t.TempDir() 147 | repo := t.TempDir() 148 | 149 | // Firt version 150 | // a = "a" 151 | // b -> bb 152 | // c -> cc 153 | // d -> dd 154 | assert.NoError(t, writeFile([]byte("a"), filepath.Join(wp1, "a"))) 155 | assert.NoError(t, symlinkFile("bb", filepath.Join(wp1, "b"))) 156 | assert.NoError(t, symlinkFile("cc", filepath.Join(wp1, "c"))) 157 | assert.NoError(t, symlinkFile("dd", filepath.Join(wp1, "d"))) 158 | 159 | assert.NoError(t, InitWorkspace(wp1, repo)) 160 | config, _ := LoadConfig(wp1) 161 | mngr1, _ := NewArtifactManager(config) 162 | assert.NoError(t, mngr1.Push(PushOptions{})) 163 | 164 | assert.NoError(t, InitWorkspace(wp2, repo)) 165 | config, _ = LoadConfig(wp2) 166 | mngr2, _ := NewArtifactManager(config) 167 | assert.NoError(t, mngr2.Pull(PullOptions{})) 168 | 169 | data, _ := readFile(filepath.Join(wp2, "a")) 170 | assert.Equal(t, "a", string(data)) 171 | link, _ := readlinkFile(filepath.Join(wp2, "b")) 172 | assert.Equal(t, "bb", link) 173 | 174 | // Second version 175 | // a = "a" => a -> aa 176 | // b -> bb => (deleted) 177 | // c -> cc => c = "c" 178 | // d -> dd => e -> dd 179 | assert.NoError(t, deleteFile(filepath.Join(wp1, "a"))) 180 | assert.NoError(t, symlinkFile("aa", filepath.Join(wp1, "a"))) 181 | assert.NoError(t, deleteFile(filepath.Join(wp1, "b"))) 182 | assert.NoError(t, deleteFile(filepath.Join(wp1, "c"))) 183 | assert.NoError(t, writeFile([]byte("c"), filepath.Join(wp1, "c"))) 184 | assert.NoError(t, deleteFile(filepath.Join(wp1, "d"))) 185 | assert.NoError(t, symlinkFile("dd", filepath.Join(wp1, "e"))) 186 | assert.NoError(t, mngr1.Push(PushOptions{})) 187 | assert.NoError(t, mngr2.Pull(PullOptions{Delete: true})) 188 | 189 | link, _ = readlinkFile(filepath.Join(wp2, "a")) 190 | assert.Equal(t, "aa", link) 191 | _, err = readlinkFile(filepath.Join(wp2, "b")) 192 | assert.Error(t, err) 193 | data, _ = readFile(filepath.Join(wp2, "c")) 194 | assert.Equal(t, "c", string(data)) 195 | _, err = readlinkFile(filepath.Join(wp2, "d")) 196 | assert.Error(t, err) 197 | link, _ = readlinkFile(filepath.Join(wp2, "e")) 198 | assert.Equal(t, "dd", link) 199 | } 200 | 201 | func TestPermMode(t *testing.T) { 202 | wp1 := t.TempDir() 203 | wp2 := t.TempDir() 204 | repo := t.TempDir() 205 | 206 | // Firt version 207 | // a = "a" 644 208 | // b = "b" 600 209 | // c = "c" 755 210 | assert.NoError(t, writeFile([]byte("a"), filepath.Join(wp1, "a"))) 211 | assert.NoError(t, chmod(filepath.Join(wp1, "a"), 0o644)) 212 | assert.NoError(t, writeFile([]byte("b"), filepath.Join(wp1, "b"))) 213 | assert.NoError(t, chmod(filepath.Join(wp1, "b"), 0o600)) 214 | assert.NoError(t, writeFile([]byte("c"), filepath.Join(wp1, "c"))) 215 | assert.NoError(t, chmod(filepath.Join(wp1, "c"), 0o755)) 216 | 217 | assert.NoError(t, InitWorkspace(wp1, repo)) 218 | config, _ := LoadConfig(wp1) 219 | mngr1, _ := NewArtifactManager(config) 220 | assert.NoError(t, mngr1.Push(PushOptions{})) 221 | 222 | assert.NoError(t, InitWorkspace(wp2, repo)) 223 | config, _ = LoadConfig(wp2) 224 | mngr2, _ := NewArtifactManager(config) 225 | assert.NoError(t, mngr2.Pull(PullOptions{})) 226 | 227 | mode, _ := readFileMode(filepath.Join(wp2, "a")) 228 | assert.Equal(t, 0o644, int(mode)) 229 | mode, _ = readFileMode(filepath.Join(wp2, "b")) 230 | assert.Equal(t, 0o600, int(mode)) 231 | mode, _ = readFileMode(filepath.Join(wp2, "c")) 232 | assert.Equal(t, 0o755, int(mode)) 233 | 234 | // Second versio n 235 | // a = "a" 644 => a = "a" 755 236 | // b = "b" 600 => bb = "b" 600 237 | // c = "c" 755 => cc = "c" 700 238 | // (new) d = "d" 755 239 | assert.NoError(t, chmod(filepath.Join(wp1, "a"), 0o755)) 240 | assert.NoError(t, renameFile(filepath.Join(wp1, "b"), filepath.Join(wp1, "bb"))) 241 | assert.NoError(t, renameFile(filepath.Join(wp1, "c"), filepath.Join(wp1, "cc"))) 242 | assert.NoError(t, chmod(filepath.Join(wp1, "cc"), 0o700)) 243 | assert.NoError(t, writeFile([]byte("d"), filepath.Join(wp1, "d"))) 244 | assert.NoError(t, chmod(filepath.Join(wp1, "d"), 0o755)) 245 | 246 | assert.NoError(t, mngr1.Push(PushOptions{})) 247 | assert.NoError(t, mngr2.Pull(PullOptions{Delete: true})) 248 | 249 | mode, _ = readFileMode(filepath.Join(wp2, "a")) 250 | assert.Equal(t, 0o755, int(mode)) 251 | mode, _ = readFileMode(filepath.Join(wp2, "bb")) 252 | assert.Equal(t, 0o600, int(mode)) 253 | mode, _ = readFileMode(filepath.Join(wp2, "cc")) 254 | assert.Equal(t, 0o700, int(mode)) 255 | mode, _ = readFileMode(filepath.Join(wp2, "d")) 256 | assert.Equal(t, 0o755, int(mode)) 257 | } 258 | -------------------------------------------------------------------------------- /internal/core/types.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "io/fs" 7 | "os" 8 | "path/filepath" 9 | "time" 10 | ) 11 | 12 | const ( 13 | RefLocal = "__local__" 14 | RefLatest = "latest" 15 | ) 16 | 17 | type BlobMetaData struct { 18 | Path string `json:"path"` 19 | Hash string `json:"hash,omitempty"` 20 | Link string `json:"link,omitempty"` 21 | Mode fs.FileMode `json:"mode"` 22 | Size int64 `json:"size"` 23 | } 24 | 25 | type Commit struct { 26 | CreatedAt time.Time `json:"createdAt"` 27 | Parent string `json:"parent,omitempty"` 28 | Message *string `json:"messaage,omitempty"` 29 | Blobs []BlobMetaData `json:"blobs"` 30 | } 31 | 32 | type PushOptions struct { 33 | DryRun bool 34 | Message *string 35 | Tag *string 36 | } 37 | 38 | type ChangeMode int 39 | 40 | type PullOptions struct { 41 | DryRun bool 42 | NoFetch bool 43 | Delete bool 44 | RefOrCommit *string 45 | FileFilter PathFilter 46 | } 47 | 48 | type PathFilter func(path string) bool 49 | 50 | type DiffOptions struct { 51 | LeftRef string 52 | LeftCommit *Commit 53 | RightRef string 54 | RightCommit *Commit 55 | AddFilter PathFilter 56 | ChangeFilter PathFilter 57 | DeleteFilter PathFilter 58 | IncludeFilter PathFilter 59 | NoDelete bool 60 | } 61 | 62 | type DiffType int 63 | 64 | const ( 65 | DiffTypeAdd DiffType = iota 66 | DiffTypeDelete 67 | DiffTypeChange 68 | DiffTypeRename 69 | ) 70 | 71 | type DiffRecord struct { 72 | Type DiffType 73 | Hash string 74 | Link string 75 | Path string 76 | Size int64 77 | Mode fs.FileMode 78 | OldPath string 79 | OldLink string 80 | OldHash string 81 | OldSize int64 82 | OldMode fs.FileMode 83 | } 84 | 85 | type DiffResult struct { 86 | Records []DiffRecord 87 | } 88 | 89 | type BlobDownloadResult struct { 90 | // File not changed. Skip the download 91 | Skip bool 92 | } 93 | 94 | type BlobUploadResult struct { 95 | // Blob exists in ther repo. Skip the upload 96 | Skip bool 97 | } 98 | 99 | func MakeBlobMetadata(baseDir string, path string) (BlobMetaData, error) { 100 | fullPath := filepath.Join(baseDir, path) 101 | info, err := os.Lstat(fullPath) 102 | if err != nil { 103 | return BlobMetaData{}, err 104 | } 105 | 106 | if info.Mode()&fs.ModeSymlink != 0 { 107 | link, err := os.Readlink(fullPath) 108 | if err != nil { 109 | return BlobMetaData{}, err 110 | } 111 | 112 | return BlobMetaData{ 113 | Path: path, 114 | Link: link, 115 | Mode: 0, 116 | }, nil 117 | } else if info.Mode().IsRegular() { 118 | hash, _ := Sha1SumFromFile(fullPath) 119 | return BlobMetaData{ 120 | Path: path, 121 | Hash: hash, 122 | Mode: info.Mode().Perm(), 123 | Size: info.Size(), 124 | }, nil 125 | } else { 126 | fmt.Printf("%s %s\n", info.Mode(), info.Name()) 127 | return BlobMetaData{}, os.ErrInvalid 128 | } 129 | } 130 | 131 | func MakeCommitMetadata(commit *Commit) ([]byte, string) { 132 | jsondata, _ := json.Marshal(commit) 133 | hash := Sha1Sum(jsondata) 134 | return jsondata, hash 135 | } 136 | -------------------------------------------------------------------------------- /internal/core/utils.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "compress/gzip" 5 | "fmt" 6 | "io" 7 | "io/fs" 8 | "io/ioutil" 9 | "os" 10 | "path/filepath" 11 | ) 12 | 13 | func MakeObjectPath(hash string) string { 14 | return fmt.Sprintf("objects/%s/%s", hash[:2], hash[2:]) 15 | } 16 | 17 | func MakeCommitPath(hash string) string { 18 | return fmt.Sprintf("commits/%s", hash) 19 | } 20 | 21 | func MakeRefPath(ref string) string { 22 | return fmt.Sprintf("refs/%s", ref) 23 | } 24 | 25 | func MakeTagPath(ref string) string { 26 | return fmt.Sprintf("refs/tags/%s", ref) 27 | } 28 | 29 | func mkdirsForFile(file string) error { 30 | return os.MkdirAll(filepath.Dir(file), fs.ModePerm) 31 | } 32 | 33 | func removeEmptyDirs(dir string, removeSelf bool) (bool, error) { 34 | var hasEntries bool 35 | 36 | entires, err := os.ReadDir(dir) 37 | if err != nil { 38 | return false, err 39 | } 40 | for _, entry := range entires { 41 | if entry.IsDir() { 42 | subdir := filepath.Join(dir, entry.Name()) 43 | removed, err := removeEmptyDirs(subdir, true) 44 | if err != nil { 45 | return false, err 46 | } 47 | if !removed { 48 | hasEntries = true 49 | } 50 | } else { 51 | hasEntries = true 52 | } 53 | } 54 | 55 | if !hasEntries && removeSelf { 56 | err := os.Remove(dir) 57 | if err != nil { 58 | return false, err 59 | } 60 | return true, nil 61 | } 62 | 63 | return false, nil 64 | } 65 | 66 | func writeFile(content []byte, dst string) error { 67 | err := os.MkdirAll(filepath.Dir(dst), fs.ModePerm) 68 | if err != nil { 69 | return err 70 | } 71 | 72 | err = ioutil.WriteFile(dst, content, 0o644) 73 | return err 74 | } 75 | 76 | func readFile(src string) ([]byte, error) { 77 | return ioutil.ReadFile(src) 78 | } 79 | 80 | func readFileMode(src string) (fs.FileMode, error) { 81 | info, err := os.Lstat(src) 82 | if err != nil { 83 | return 0, err 84 | } 85 | 86 | return info.Mode(), err 87 | } 88 | 89 | func writeGzipFile(content []byte, dst string) error { 90 | err := os.MkdirAll(filepath.Dir(dst), fs.ModePerm) 91 | if err != nil { 92 | return err 93 | } 94 | 95 | file, err := os.Create(dst) 96 | if err != nil { 97 | return err 98 | } 99 | gfile := gzip.NewWriter(file) 100 | defer gfile.Close() 101 | _, err = gfile.Write(content) 102 | return err 103 | } 104 | 105 | func readGzipFile(src string) ([]byte, error) { 106 | file, err := os.Open(src) 107 | if err != nil { 108 | return nil, err 109 | } 110 | gfile, err := gzip.NewReader(file) 111 | if err != nil { 112 | return nil, err 113 | } 114 | defer gfile.Close() 115 | 116 | return io.ReadAll(gfile) 117 | } 118 | 119 | func deleteFile(src string) error { 120 | return os.Remove(src) 121 | } 122 | 123 | func renameFile(src, dst string) error { 124 | err := os.MkdirAll(filepath.Dir(dst), fs.ModePerm) 125 | if err != nil { 126 | return err 127 | } 128 | 129 | return os.Rename(src, dst) 130 | } 131 | 132 | func symlinkFile(target, src string) error { 133 | return os.Symlink(target, src) 134 | } 135 | 136 | func readlinkFile(src string) (string, error) { 137 | return os.Readlink(src) 138 | } 139 | 140 | func chmod(src string, mode fs.FileMode) error { 141 | return os.Chmod(src, mode) 142 | } 143 | -------------------------------------------------------------------------------- /internal/core/utils_test.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestRemoveEmptyDirs(t *testing.T) { 11 | tmpDir := t.TempDir() 12 | 13 | assert.NoError(t, os.Mkdir(tmpDir+"/a", os.ModePerm)) 14 | removed, err := removeEmptyDirs(tmpDir+"/a", true) 15 | assert.True(t, removed) 16 | assert.Empty(t, err) 17 | 18 | assert.NoError(t, os.Mkdir(tmpDir+"/b", os.ModePerm)) 19 | assert.NoError(t, writeFile([]byte("hello"), tmpDir+"/b/hello")) 20 | removed, err = removeEmptyDirs(tmpDir+"/b", true) 21 | assert.False(t, removed) 22 | assert.Empty(t, err) 23 | 24 | assert.NoError(t, os.MkdirAll(tmpDir+"/c/c/c/c/a", os.ModePerm)) 25 | assert.NoError(t, os.MkdirAll(tmpDir+"/c/c/c/c/b", os.ModePerm)) 26 | assert.NoError(t, os.MkdirAll(tmpDir+"/c/c/c/c/c", os.ModePerm)) 27 | assert.NoError(t, writeFile([]byte("hello"), tmpDir+"/c/c/a")) 28 | removed, err = removeEmptyDirs(tmpDir+"/c", true) 29 | assert.Empty(t, err) 30 | assert.False(t, removed) 31 | stat, err := os.Stat(tmpDir + "/c/c/a") 32 | assert.Equal(t, "a", stat.Name()) 33 | assert.Empty(t, err) 34 | _, err = os.Stat("/c/c/c") 35 | assert.Error(t, err) 36 | } 37 | -------------------------------------------------------------------------------- /internal/executor/executor.go: -------------------------------------------------------------------------------- 1 | package executor 2 | 3 | import ( 4 | "context" 5 | "runtime" 6 | "sync" 7 | ) 8 | 9 | type TaskFunc func(ctx context.Context) error 10 | 11 | func ExecuteAll(numCPU int, tasks ...TaskFunc) error { 12 | var err error 13 | ctx, cancel := context.WithCancel(context.Background()) 14 | defer cancel() 15 | 16 | if numCPU == 0 { 17 | numCPU = runtime.NumCPU() 18 | } 19 | 20 | wg := sync.WaitGroup{} 21 | wg.Add(numCPU) 22 | 23 | queue := make(chan TaskFunc, len(tasks)) 24 | // Add tasks to queue 25 | for _, task := range tasks { 26 | queue <- task 27 | } 28 | close(queue) 29 | 30 | // Spawn the executer 31 | for i := 0; i < numCPU; i++ { 32 | go func() { 33 | defer wg.Done() 34 | for { 35 | select { 36 | case task, ok := <-queue: 37 | if ctx.Err() != nil || !ok { 38 | return 39 | } 40 | if e := task(ctx); e != nil { 41 | err = e 42 | cancel() 43 | } 44 | case <-ctx.Done(): 45 | return 46 | } 47 | } 48 | }() 49 | } 50 | 51 | // wait for all task done 52 | wg.Wait() 53 | return err 54 | } 55 | -------------------------------------------------------------------------------- /internal/executor/executor_test.go: -------------------------------------------------------------------------------- 1 | package executor 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "math/rand" 7 | "runtime" 8 | "sync/atomic" 9 | "testing" 10 | "time" 11 | 12 | "github.com/stretchr/testify/assert" 13 | ) 14 | 15 | func TestHappyPath(t *testing.T) { 16 | var str1, str2 *string 17 | 18 | task1 := func(ctx context.Context) error { 19 | str := "foo" 20 | str1 = &str 21 | return nil 22 | } 23 | 24 | task2 := func(ctx context.Context) error { 25 | str := "bar" 26 | str2 = &str 27 | return nil 28 | } 29 | 30 | err := ExecuteAll(runtime.NumCPU(), task1, task2) 31 | assert.Empty(t, err) 32 | assert.Equal(t, "foo", *str1) 33 | assert.Equal(t, "bar", *str2) 34 | } 35 | 36 | func TestFailedPath(t *testing.T) { 37 | ErrFoo := errors.New("foo") 38 | 39 | taskOk := func(ctx context.Context) error { 40 | return nil 41 | } 42 | 43 | taskErr := func(ctx context.Context) error { 44 | return ErrFoo 45 | } 46 | 47 | err := ExecuteAll(runtime.NumCPU(), taskOk, taskErr) 48 | assert.Equal(t, ErrFoo, err) 49 | 50 | err = ExecuteAll(runtime.NumCPU(), taskErr, taskOk) 51 | assert.Equal(t, ErrFoo, err) 52 | } 53 | 54 | func TestConcurrent(t *testing.T) { 55 | tasks := []TaskFunc{} 56 | var counter int32 57 | 58 | for i := 0; i < 100; i++ { 59 | f := func(ctx context.Context) error { 60 | time.Sleep(time.Duration(rand.Intn(50)) * time.Millisecond) 61 | atomic.AddInt32(&counter, 1) 62 | return nil 63 | } 64 | tasks = append(tasks, f) 65 | 66 | } 67 | 68 | err := ExecuteAll(50, tasks...) 69 | assert.Empty(t, err) 70 | assert.Equal(t, int32(100), counter) 71 | } 72 | 73 | func TestContext(t *testing.T) { 74 | ErrFoo := errors.New("foo") 75 | 76 | taskForever := func(ctx context.Context) error { 77 | <-ctx.Done() 78 | return nil 79 | } 80 | 81 | taskErr := func(ctx context.Context) error { 82 | return ErrFoo 83 | } 84 | 85 | err := ExecuteAll(3, taskForever, taskErr) 86 | assert.Equal(t, ErrFoo, err) 87 | } 88 | -------------------------------------------------------------------------------- /internal/log/log.go: -------------------------------------------------------------------------------- 1 | package log 2 | 3 | import ( 4 | "log" 5 | "os" 6 | ) 7 | 8 | var logger *log.Logger 9 | 10 | func SetDebug(debug bool) { 11 | if debug { 12 | logger = log.New(os.Stderr, "[DBG] ", log.Ldate|log.Lmicroseconds) 13 | } else { 14 | logger = nil 15 | } 16 | } 17 | 18 | func Debug(v ...interface{}) { 19 | if logger == nil { 20 | return 21 | } 22 | 23 | logger.Print(v...) 24 | } 25 | 26 | func Debugf(format string, v ...interface{}) { 27 | if logger == nil { 28 | return 29 | } 30 | 31 | logger.Printf(format, v...) 32 | } 33 | 34 | func Debugln(v ...interface{}) { 35 | if logger == nil { 36 | return 37 | } 38 | 39 | logger.Println(v...) 40 | } 41 | -------------------------------------------------------------------------------- /internal/repository/azureblob.go: -------------------------------------------------------------------------------- 1 | package repository 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | neturl "net/url" 8 | "os" 9 | "path/filepath" 10 | "strings" 11 | 12 | "github.com/Azure/azure-sdk-for-go/sdk/azidentity" 13 | "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob" 14 | "github.com/infuseai/artivc/internal/log" 15 | ) 16 | 17 | type AzureBlobRepository struct { 18 | Client *azblob.ContainerClient 19 | Prefix string 20 | BasePath string 21 | } 22 | 23 | func IsAzureStorageUrl(repoUrl string) bool { 24 | url, err := neturl.Parse(repoUrl) 25 | if err != nil { 26 | return false 27 | } 28 | 29 | return strings.HasSuffix(url.Host, ".blob.core.windows.net") 30 | } 31 | 32 | func ParseAzureBlobUrl(urlString string) (storageAccount, container, prefix string, err error) { 33 | url, err := neturl.Parse(urlString) 34 | if err != nil { 35 | return 36 | } 37 | 38 | storageAccount = url.Host[:len(url.Host)-len(".blob.core.windows.net")] 39 | comps := strings.Split(url.Path, "/") 40 | if len(comps) < 2 { 41 | err = fmt.Errorf("invalid azure blob url: " + urlString) 42 | return 43 | } 44 | 45 | container = comps[1] 46 | prefix = strings.Join(comps[2:], "/") 47 | 48 | return 49 | } 50 | 51 | func makeAzureServiceAccountUrl(accountName string) string { 52 | return fmt.Sprintf("https://%s.blob.core.windows.net/", accountName) 53 | } 54 | 55 | func NewAzureBlobRepository(repo string) (*AzureBlobRepository, error) { 56 | accountName, container, prefix, err := ParseAzureBlobUrl(repo) 57 | if err != nil { 58 | return nil, err 59 | } 60 | 61 | serviceUrl := makeAzureServiceAccountUrl(accountName) 62 | var serviceClient azblob.ServiceClient 63 | 64 | var accountKey string 65 | if value := os.Getenv("AZURE_STORAGE_ACCOUNT_KEY"); value != "" { 66 | accountKey = value 67 | } 68 | 69 | if accountKey != "" { 70 | credential, err := azblob.NewSharedKeyCredential(accountName, accountKey) 71 | if err != nil { 72 | return nil, err 73 | } 74 | 75 | serviceClient, err = azblob.NewServiceClientWithSharedKey(serviceUrl, credential, nil) 76 | if err != nil { 77 | return nil, err 78 | } 79 | } else { 80 | credential, err := azidentity.NewDefaultAzureCredential(nil) 81 | if err != nil { 82 | return nil, err 83 | } 84 | 85 | serviceClient, err = azblob.NewServiceClient(serviceUrl, credential, nil) 86 | if err != nil { 87 | return nil, err 88 | } 89 | } 90 | 91 | containerClient := serviceClient.NewContainerClient(container) 92 | 93 | r := &AzureBlobRepository{ 94 | Client: &containerClient, 95 | BasePath: repo, 96 | Prefix: prefix, 97 | } 98 | 99 | // check if the client has enough permission 100 | dir, err := os.MkdirTemp("", "artivc-azblob-*") 101 | if err != nil { 102 | return nil, err 103 | } 104 | defer os.RemoveAll(dir) // clean up 105 | 106 | err = r.Download("refs/latest", filepath.Join(dir, "latest"), nil) 107 | if err != nil { 108 | var internalError *azblob.InternalError 109 | if !errors.As(err, &internalError) { 110 | return nil, err 111 | } 112 | 113 | var errStorage *azblob.StorageError 114 | if !internalError.As(&errStorage) { 115 | return nil, internalError 116 | } 117 | 118 | if errStorage.ErrorCode == azblob.StorageErrorCodeBlobNotFound { 119 | // blob not found. but authentication/authorization check is ok. PASS 120 | } else if errStorage.ErrorCode == azblob.StorageErrorCodeAuthorizationPermissionMismatch { 121 | // authorization permission mismatch 122 | log.Debugln(errStorage.Error()) 123 | fmt.Fprintf(os.Stderr, "Authorization permission mismatch. Please assign 'Storage Blob Data Contributor' role to the logged-in account in the storage account '%s'\n", accountName) 124 | fmt.Fprintln(os.Stderr, "Please see https://docs.microsoft.com/azure/storage/blobs/assign-azure-role-data-access") 125 | fmt.Fprintln(os.Stderr, "") 126 | return nil, fmt.Errorf("authorization permission mismatch") 127 | } else { 128 | // other error 129 | return nil, errStorage 130 | } 131 | } 132 | 133 | return r, nil 134 | } 135 | 136 | func (repo *AzureBlobRepository) Upload(localPath, repoPath string, m *Meter) error { 137 | ctx := context.Background() 138 | 139 | // file 140 | src, err := os.Open(localPath) 141 | if err != nil { 142 | return err 143 | } 144 | defer src.Close() 145 | 146 | // upload 147 | blobPath := filepath.Join(repo.Prefix, repoPath) 148 | blobClient := repo.Client.NewBlockBlobClient(blobPath) 149 | 150 | _, err = blobClient.UploadFileToBlockBlob( 151 | ctx, 152 | src, 153 | azblob.HighLevelUploadToBlockBlobOption{ 154 | Progress: func(bytesTransferred int64) { 155 | if m != nil { 156 | m.SetBytes(bytesTransferred) 157 | } 158 | }, 159 | Parallelism: 10, 160 | }, 161 | ) 162 | 163 | return err 164 | } 165 | 166 | func (repo *AzureBlobRepository) Download(repoPath, localPath string, m *Meter) error { 167 | ctx := context.Background() 168 | 169 | // file 170 | dest, err := os.Create(localPath) 171 | if err != nil { 172 | return err 173 | } 174 | defer dest.Close() 175 | 176 | // download 177 | blobPath := filepath.Join(repo.Prefix, repoPath) 178 | blobClient := repo.Client.NewBlockBlobClient(blobPath) 179 | err = blobClient.DownloadBlobToFile(ctx, 0, 0, dest, azblob.HighLevelDownloadFromBlobOptions{ 180 | Progress: func(bytesTransferred int64) { 181 | if m != nil { 182 | m.SetBytes(bytesTransferred) 183 | } 184 | }, 185 | Parallelism: 10, 186 | }) 187 | if err != nil { 188 | return err 189 | } 190 | 191 | return nil 192 | } 193 | 194 | func (repo *AzureBlobRepository) Delete(repoPath string) error { 195 | ctx := context.Background() 196 | 197 | blobPath := filepath.Join(repo.Prefix, repoPath) 198 | blobClient := repo.Client.NewBlockBlobClient(blobPath) 199 | _, err := blobClient.Delete(ctx, nil) 200 | if err != nil { 201 | return err 202 | } 203 | 204 | return nil 205 | } 206 | 207 | func (repo *AzureBlobRepository) Stat(repoPath string) (FileInfo, error) { 208 | ctx := context.Background() 209 | 210 | blobPath := filepath.Join(repo.Prefix, repoPath) 211 | blobClient := repo.Client.NewBlockBlobClient(blobPath) 212 | _, err := blobClient.GetProperties(ctx, nil) 213 | if err != nil { 214 | return nil, err 215 | } 216 | 217 | return &SimpleFileInfo{ 218 | name: filepath.Base(repoPath), 219 | }, nil 220 | } 221 | 222 | func (repo *AzureBlobRepository) List(repoPath string) ([]FileInfo, error) { 223 | ctx := context.Background() 224 | entries := make([]FileInfo, 0) 225 | prefix := filepath.Join(repo.Prefix, repoPath) + "/" 226 | pager := repo.Client.ListBlobsHierarchy("/", &azblob.ContainerListBlobHierarchySegmentOptions{Prefix: &prefix}) 227 | for pager.NextPage(ctx) { 228 | resp := pager.PageResponse() 229 | 230 | for _, blobInfo := range resp.Segment.BlobItems { 231 | n := *blobInfo.Name 232 | name := n[len(prefix):] 233 | entries = append(entries, &SimpleFileInfo{ 234 | name: name, 235 | isDir: false, 236 | }) 237 | } 238 | 239 | for _, blobPrefix := range resp.Segment.BlobPrefixes { 240 | p := *blobPrefix.Name 241 | name := p[len(prefix) : len((p))-1] 242 | entries = append(entries, &SimpleFileInfo{ 243 | name: name, 244 | isDir: true, 245 | }) 246 | } 247 | } 248 | 249 | return entries, nil 250 | } 251 | -------------------------------------------------------------------------------- /internal/repository/azureblob_test.go: -------------------------------------------------------------------------------- 1 | package repository 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func Test_ParseAzureBlobUrl(t *testing.T) { 10 | testCases := []struct { 11 | repo string 12 | storageAccount string 13 | container string 14 | prefix string 15 | }{ 16 | { 17 | repo: "https://artivc.blob.core.windows.net/avc", 18 | storageAccount: "artivc", 19 | container: "avc", 20 | prefix: "", 21 | }, 22 | { 23 | repo: "https://artivc.blob.core.windows.net/avc/", 24 | storageAccount: "artivc", 25 | container: "avc", 26 | prefix: "", 27 | }, 28 | { 29 | repo: "https://artivc.blob.core.windows.net/avc/abc", 30 | storageAccount: "artivc", 31 | container: "avc", 32 | prefix: "abc", 33 | }, 34 | { 35 | repo: "https://artivc.blob.core.windows.net/avc/abc/", 36 | storageAccount: "artivc", 37 | container: "avc", 38 | prefix: "abc/", 39 | }, 40 | } 41 | for _, tC := range testCases { 42 | t.Run(tC.repo, func(t *testing.T) { 43 | storageAccount, container, prefix, err := ParseAzureBlobUrl(tC.repo) 44 | if err != nil { 45 | t.Error(err) 46 | return 47 | } 48 | 49 | assert.Equal(t, tC.storageAccount, storageAccount) 50 | assert.Equal(t, tC.container, container) 51 | assert.Equal(t, tC.prefix, prefix) 52 | 53 | }) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /internal/repository/errors.go: -------------------------------------------------------------------------------- 1 | package repository 2 | 3 | import "errors" 4 | 5 | var ErrUnsupportedRepository = errors.New("Unsupported repository") 6 | 7 | type UnsupportedRepositoryError struct { 8 | Message string 9 | } 10 | 11 | func (err UnsupportedRepositoryError) Error() string { 12 | return err.Message 13 | } 14 | -------------------------------------------------------------------------------- /internal/repository/gcs.go: -------------------------------------------------------------------------------- 1 | package repository 2 | 3 | import ( 4 | "context" 5 | "os" 6 | "path/filepath" 7 | "strings" 8 | 9 | "cloud.google.com/go/storage" 10 | "google.golang.org/api/iterator" 11 | ) 12 | 13 | // Local Filesystem 14 | type GCSRepository struct { 15 | Bucket string 16 | BasePath string 17 | Client *storage.Client 18 | } 19 | 20 | func NewGCSRepository(bucket, basePath string) (*GCSRepository, error) { 21 | ctx := context.Background() 22 | basePath = strings.TrimPrefix(basePath, "/") 23 | client, err := storage.NewClient(ctx) 24 | if err != nil { 25 | return nil, err 26 | } 27 | 28 | return &GCSRepository{ 29 | Bucket: bucket, 30 | BasePath: basePath, 31 | Client: client, 32 | }, nil 33 | } 34 | 35 | func (repo *GCSRepository) Upload(localPath, repoPath string, m *Meter) error { 36 | ctx := context.Background() 37 | 38 | // client, bucket, obj 39 | client := repo.Client 40 | bkt := client.Bucket(repo.Bucket) 41 | obj := bkt.Object(filepath.Join(repo.BasePath, repoPath)) 42 | 43 | // src 44 | src, err := os.Open(localPath) 45 | if err != nil { 46 | return err 47 | } 48 | defer src.Close() 49 | 50 | // dest 51 | 52 | dest := obj.NewWriter(ctx) 53 | defer dest.Close() 54 | 55 | // copy 56 | _, err = CopyWithMeter(dest, src, m) 57 | if err != nil { 58 | return err 59 | } 60 | 61 | return nil 62 | } 63 | 64 | func (repo *GCSRepository) Download(repoPath, localPath string, m *Meter) error { 65 | ctx := context.Background() 66 | 67 | // client, bucket, obj 68 | client := repo.Client 69 | bkt := client.Bucket(repo.Bucket) 70 | obj := bkt.Object(filepath.Join(repo.BasePath, repoPath)) 71 | 72 | // src 73 | src, err := obj.NewReader(ctx) 74 | if err != nil { 75 | return err 76 | } 77 | defer src.Close() 78 | 79 | // dest 80 | dest, err := os.Create(localPath) 81 | if err != nil { 82 | return err 83 | } 84 | defer dest.Close() 85 | 86 | // copy 87 | _, err = CopyWithMeter(dest, src, m) 88 | if err != nil { 89 | return err 90 | } 91 | 92 | return nil 93 | } 94 | 95 | func (repo *GCSRepository) Delete(repoPath string) error { 96 | ctx := context.Background() 97 | 98 | // client, bucket, obj 99 | client := repo.Client 100 | bkt := client.Bucket(repo.Bucket) 101 | obj := bkt.Object(filepath.Join(repo.BasePath, repoPath)) 102 | 103 | // delete 104 | err := obj.Delete(ctx) 105 | if err != nil { 106 | return err 107 | } 108 | 109 | return nil 110 | } 111 | 112 | func (repo *GCSRepository) Stat(repoPath string) (FileInfo, error) { 113 | ctx := context.Background() 114 | 115 | // client, bucket, obj 116 | client := repo.Client 117 | bkt := client.Bucket(repo.Bucket) 118 | obj := bkt.Object(filepath.Join(repo.BasePath, repoPath)) 119 | 120 | // get object stat 121 | _, err := obj.Attrs(ctx) 122 | if err != nil { 123 | return nil, err 124 | } 125 | 126 | return &GCSFileInfo{ 127 | name: filepath.Base(repoPath), 128 | isDir: false, 129 | }, nil 130 | } 131 | 132 | func (repo *GCSRepository) List(repoPath string) ([]FileInfo, error) { 133 | ctx := context.Background() 134 | records := []FileInfo{} 135 | 136 | // client, bucket, obj 137 | client := repo.Client 138 | bkt := client.Bucket(repo.Bucket) 139 | prefix := filepath.Join(repo.BasePath, repoPath) + "/" 140 | query := &storage.Query{Prefix: prefix, Delimiter: "/"} 141 | 142 | it := bkt.Objects(ctx, query) 143 | for { 144 | attrs, err := it.Next() 145 | if err == iterator.Done { 146 | break 147 | } 148 | if err != nil { 149 | return records, err 150 | } 151 | 152 | fileinfo := GCSFileInfo{} 153 | 154 | if attrs.Name != "" { 155 | fileinfo.name = attrs.Name[len(prefix):] 156 | fileinfo.isDir = false 157 | } else { 158 | fileinfo.name = attrs.Prefix[len(prefix) : len(attrs.Prefix)-1] 159 | fileinfo.isDir = true 160 | } 161 | records = append(records, &fileinfo) 162 | } 163 | 164 | return records, nil 165 | } 166 | 167 | type GCSFileInfo struct { 168 | name string 169 | isDir bool 170 | } 171 | 172 | func (fi *GCSFileInfo) Name() string { 173 | return fi.name 174 | } 175 | 176 | func (fi *GCSFileInfo) IsDir() bool { 177 | return fi.isDir 178 | } 179 | -------------------------------------------------------------------------------- /internal/repository/http.go: -------------------------------------------------------------------------------- 1 | package repository 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "net/http" 7 | "net/url" 8 | "os" 9 | "strings" 10 | "time" 11 | ) 12 | 13 | type HttpRepository struct { 14 | RepoUrl string 15 | } 16 | 17 | func NewHttpRepository(repo string) (*HttpRepository, error) { 18 | if !strings.HasSuffix(repo, "/") { 19 | repo += "/" 20 | } 21 | 22 | return &HttpRepository{ 23 | RepoUrl: repo, 24 | }, nil 25 | } 26 | 27 | func (repo *HttpRepository) Upload(localPath, repoPath string, meter *Meter) error { 28 | return errors.New("Upload is not supported in Http repository") 29 | } 30 | 31 | func (repo *HttpRepository) Download(repoPath, localPath string, m *Meter) error { 32 | filePath, err := getFilePath(repo.RepoUrl, repoPath) 33 | if err != nil { 34 | return err 35 | } 36 | 37 | res, err := http.Get(filePath) 38 | if err != nil { 39 | retry := 0 40 | msg := err.Error() 41 | 42 | for err != nil && strings.HasSuffix(msg, "connection reset by peer") && retry < 10 { 43 | retry++ 44 | time.Sleep(time.Millisecond * 50 * time.Duration(retry)) 45 | res, err = http.Get(filePath) 46 | } 47 | 48 | if err != nil { 49 | return err 50 | } 51 | } 52 | defer res.Body.Close() 53 | 54 | if res.StatusCode != 200 { 55 | return fmt.Errorf("status code: %d", res.StatusCode) 56 | } 57 | 58 | outputFile, err := os.Create(localPath) 59 | if err != nil { 60 | return err 61 | } 62 | defer outputFile.Close() 63 | 64 | _, err = CopyWithMeter(outputFile, res.Body, m) 65 | return err 66 | } 67 | 68 | func (repo *HttpRepository) Delete(repoPath string) error { 69 | return errors.New("Delete is not supported in Http repository") 70 | } 71 | 72 | func (repo *HttpRepository) Stat(repoPath string) (FileInfo, error) { 73 | filePath, err := getFilePath(repo.RepoUrl, repoPath) 74 | if err != nil { 75 | return nil, err 76 | } 77 | 78 | res, err := http.Head(filePath) 79 | if err != nil { 80 | return nil, err 81 | } 82 | defer res.Body.Close() 83 | 84 | if res.StatusCode != 200 { 85 | return nil, fmt.Errorf("status code: %d", res.StatusCode) 86 | } 87 | 88 | info := &HttpFileInfo{ 89 | name: repoPath, 90 | } 91 | 92 | return info, nil 93 | } 94 | 95 | func (repo *HttpRepository) List(repoPath string) ([]FileInfo, error) { 96 | return nil, errors.New("List is not supported in Http repository") 97 | } 98 | 99 | func getFilePath(repoPath, filePath string) (string, error) { 100 | base, err := url.Parse(repoPath) 101 | if err != nil { 102 | return "", err 103 | } 104 | path, err := url.Parse(filePath) 105 | if err != nil { 106 | return "", err 107 | } 108 | return base.ResolveReference(path).String(), nil 109 | } 110 | 111 | type HttpFileInfo struct { 112 | name string 113 | } 114 | 115 | func (info *HttpFileInfo) Name() string { 116 | return info.name 117 | } 118 | 119 | func (info *HttpFileInfo) IsDir() bool { 120 | return false 121 | } 122 | -------------------------------------------------------------------------------- /internal/repository/local.go: -------------------------------------------------------------------------------- 1 | package repository 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "io/fs" 7 | "os" 8 | "path" 9 | "path/filepath" 10 | ) 11 | 12 | // Local Filesystem 13 | type LocalFileSystemRepository struct { 14 | RepoDir string 15 | } 16 | 17 | func NewLocalFileSystemRepository(repoDir string) (*LocalFileSystemRepository, error) { 18 | stat, err := os.Stat(repoDir) 19 | if err != nil { 20 | if os.IsNotExist(err) { 21 | err = os.Mkdir(repoDir, fs.ModePerm) 22 | if err != nil { 23 | return nil, errors.New("cannot make directory: " + repoDir) 24 | } 25 | } else { 26 | return nil, err 27 | } 28 | } else { 29 | if !stat.IsDir() { 30 | return nil, errors.New(repoDir + " is not a directory") 31 | } 32 | } 33 | 34 | return &LocalFileSystemRepository{ 35 | RepoDir: repoDir, 36 | }, nil 37 | } 38 | 39 | func (repo *LocalFileSystemRepository) Upload(localPath, repoPath string, m *Meter) error { 40 | sourceFileStat, err := os.Stat(localPath) 41 | if err != nil { 42 | return err 43 | } 44 | 45 | if !sourceFileStat.Mode().IsRegular() { 46 | return fmt.Errorf("%s is not a regular file", localPath) 47 | } 48 | 49 | source, err := os.Open(localPath) 50 | if err != nil { 51 | return err 52 | } 53 | defer source.Close() 54 | 55 | // Copy from source to tmp 56 | tmpDir := path.Join(repo.RepoDir, "tmp") 57 | err = os.MkdirAll(tmpDir, fs.ModePerm) 58 | if err != nil { 59 | return err 60 | } 61 | 62 | tmp, err := os.CreateTemp(tmpDir, "*") 63 | if err != nil { 64 | return err 65 | } 66 | tmpPath := tmp.Name() 67 | defer os.Remove(tmpPath) 68 | _, err = CopyWithMeter(tmp, source, m) 69 | if err != nil { 70 | return err 71 | } 72 | err = tmp.Close() 73 | if err != nil { 74 | return err 75 | } 76 | 77 | // Move from tmp to dest 78 | destPath := path.Join(repo.RepoDir, repoPath) 79 | err = os.MkdirAll(filepath.Dir(destPath), fs.ModePerm) 80 | if err != nil { 81 | return err 82 | } 83 | err = os.Remove(destPath) 84 | if err != nil && !os.IsNotExist(err) { 85 | return err 86 | } 87 | 88 | err = os.Rename(tmpPath, destPath) 89 | if err != nil { 90 | return err 91 | } 92 | 93 | return nil 94 | } 95 | 96 | func (repo *LocalFileSystemRepository) Download(repoPath, localPath string, m *Meter) error { 97 | srcPath := path.Join(repo.RepoDir, repoPath) 98 | src, err := os.Open(srcPath) 99 | if err != nil { 100 | return err 101 | } 102 | defer src.Close() 103 | 104 | dest, err := os.Create(localPath) 105 | if err != nil { 106 | return err 107 | } 108 | defer dest.Close() 109 | written, err := CopyWithMeter(dest, src, m) 110 | if err != nil { 111 | return err 112 | } 113 | 114 | if written == 0 { 115 | err = os.Truncate(localPath, 0) 116 | } 117 | 118 | return err 119 | } 120 | 121 | func (repo *LocalFileSystemRepository) Delete(repoPath string) error { 122 | filePath := path.Join(repo.RepoDir, repoPath) 123 | return os.Remove(filePath) 124 | } 125 | 126 | func (repo *LocalFileSystemRepository) Stat(repoPath string) (FileInfo, error) { 127 | filePath := path.Join(repo.RepoDir, repoPath) 128 | return os.Stat(filePath) 129 | } 130 | 131 | func (repo *LocalFileSystemRepository) List(repoPath string) ([]FileInfo, error) { 132 | dir := path.Join(repo.RepoDir, repoPath) 133 | fs, err := os.ReadDir(dir) 134 | if err != nil { 135 | return []FileInfo{}, nil 136 | } 137 | fs2 := []FileInfo{} 138 | 139 | for _, info := range fs { 140 | info2, ok := info.(FileInfo) 141 | if ok { 142 | fs2 = append(fs2, info2) 143 | } 144 | } 145 | return fs2, nil 146 | } 147 | -------------------------------------------------------------------------------- /internal/repository/local_test.go: -------------------------------------------------------------------------------- 1 | package repository 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestLocalUpload(t *testing.T) { 11 | testCases := []struct { 12 | desc string 13 | data string 14 | }{ 15 | { 16 | desc: "empty file", data: "", 17 | }, 18 | { 19 | desc: "non empty file", data: "hello", 20 | }, 21 | } 22 | for _, tC := range testCases { 23 | t.Run(tC.desc, func(t *testing.T) { 24 | repoDir := t.TempDir() 25 | tmpDir := t.TempDir() 26 | 27 | repo, err := NewLocalFileSystemRepository(repoDir) 28 | if err != nil { 29 | t.Error(err) 30 | } 31 | 32 | err = os.WriteFile(tmpDir+"/test", []byte(tC.data), 0644) 33 | if err != nil { 34 | t.Error(err) 35 | } 36 | 37 | err = repo.Upload(tmpDir+"/test", "path/to/the/test", nil) 38 | if err != nil { 39 | t.Error(err) 40 | } 41 | data, err := os.ReadFile(repoDir + "/path/to/the/test") 42 | if err != nil { 43 | t.Error(err) 44 | } 45 | assert.Equal(t, []byte(tC.data), []byte(data)) 46 | }) 47 | } 48 | } 49 | 50 | func TestLocalDownload(t *testing.T) { 51 | testCases := []struct { 52 | desc string 53 | data string 54 | }{ 55 | { 56 | desc: "empty file", data: "", 57 | }, 58 | { 59 | desc: "non empty file", data: "hello", 60 | }, 61 | } 62 | for _, tC := range testCases { 63 | t.Run(tC.desc, func(t *testing.T) { 64 | repoDir := t.TempDir() 65 | tmpDir := t.TempDir() 66 | 67 | repo, err := NewLocalFileSystemRepository(repoDir) 68 | if err != nil { 69 | t.Error(err) 70 | } 71 | 72 | err = os.MkdirAll(repoDir+"/path/to/the", os.ModePerm) 73 | if err != nil { 74 | t.Error(err) 75 | } 76 | 77 | err = os.WriteFile(repoDir+"/path/to/the/test", []byte(tC.data), 0644) 78 | if err != nil { 79 | t.Error(err) 80 | } 81 | 82 | err = repo.Download("path/to/the/test", tmpDir+"/test", nil) 83 | if err != nil { 84 | t.Error(err) 85 | } 86 | data, err := os.ReadFile(tmpDir + "/test") 87 | if err != nil { 88 | t.Error(err) 89 | } 90 | assert.Equal(t, []byte(tC.data), []byte(data)) 91 | }) 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /internal/repository/meter.go: -------------------------------------------------------------------------------- 1 | package repository 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "sync/atomic" 7 | "time" 8 | ) 9 | 10 | type ByteSize float64 11 | 12 | const ( 13 | _ = iota // ignore first value by assigning to blank identifier 14 | KB ByteSize = 1 << (10 * iota) 15 | MB 16 | GB 17 | TB 18 | PB 19 | EB 20 | ZB 21 | YB 22 | ) 23 | 24 | func (b ByteSize) String() string { 25 | switch { 26 | case b >= YB: 27 | return fmt.Sprintf("%.2fYB", b/YB) 28 | case b >= ZB: 29 | return fmt.Sprintf("%.2fZB", b/ZB) 30 | case b >= EB: 31 | return fmt.Sprintf("%.2fEB", b/EB) 32 | case b >= PB: 33 | return fmt.Sprintf("%.2fPB", b/PB) 34 | case b >= TB: 35 | return fmt.Sprintf("%.2fTB", b/TB) 36 | case b >= GB: 37 | return fmt.Sprintf("%.2fGB", b/GB) 38 | case b >= MB: 39 | return fmt.Sprintf("%.2fMB", b/MB) 40 | case b >= KB: 41 | return fmt.Sprintf("%.2fKB", b/KB) 42 | } 43 | return fmt.Sprintf("%.2fB", b) 44 | } 45 | 46 | type Session struct { 47 | startedAt time.Time 48 | meters []*Meter 49 | } 50 | 51 | func NewSession() *Session { 52 | return &Session{ 53 | startedAt: time.Now(), 54 | meters: []*Meter{}, 55 | } 56 | } 57 | 58 | func (s *Session) NewMeter() *Meter { 59 | meter := &Meter{ 60 | total: 0, 61 | } 62 | s.meters = append(s.meters, meter) 63 | return meter 64 | } 65 | 66 | func (s *Session) CalculateSpeed() ByteSize { 67 | totalDiff := time.Since(s.startedAt).Seconds() 68 | var total int64 69 | for _, meter := range s.meters { 70 | total = total + meter.total 71 | } 72 | 73 | speed := float64(total) / totalDiff 74 | return ByteSize(speed) 75 | } 76 | 77 | type Meter struct { 78 | total int64 79 | } 80 | 81 | func (m *Meter) Write(p []byte) (n int, err error) { 82 | written := len(p) 83 | m.AddBytes(written) 84 | return written, nil 85 | } 86 | 87 | func (m *Meter) AddBytes(bytes int) { 88 | atomic.AddInt64(&m.total, int64(bytes)) 89 | } 90 | 91 | func (m *Meter) SetBytes(bytes int64) { 92 | atomic.StoreInt64(&m.total, bytes) 93 | } 94 | 95 | func CopyWithMeter(dest io.Writer, src io.Reader, meter *Meter) (int64, error) { 96 | buf := make([]byte, 1024*1024) 97 | 98 | if meter != nil { 99 | return io.CopyBuffer(dest, io.TeeReader(src, meter), buf) 100 | } 101 | 102 | return io.CopyBuffer(dest, src, buf) 103 | } 104 | -------------------------------------------------------------------------------- /internal/repository/rclone.go: -------------------------------------------------------------------------------- 1 | package repository 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "os" 7 | "os/exec" 8 | "path/filepath" 9 | ) 10 | 11 | // Local Filesystem 12 | type RcloneRepository struct { 13 | Remote string 14 | BaseDir string 15 | } 16 | 17 | func NewRcloneRepository(remote, basePath string) (*RcloneRepository, error) { 18 | cmd := exec.Command("rclone", "version") 19 | err := cmd.Run() 20 | if err != nil { 21 | return nil, err 22 | } 23 | 24 | return &RcloneRepository{ 25 | Remote: remote, 26 | BaseDir: basePath, 27 | }, nil 28 | } 29 | 30 | func (repo *RcloneRepository) Upload(localPath, repoPath string, m *Meter) error { 31 | cmd := exec.Command("rclone", "copyto", "--no-check-dest", localPath, repo.remotePath(repoPath)) 32 | err := cmd.Run() 33 | if err != nil { 34 | return err 35 | } 36 | 37 | return nil 38 | } 39 | 40 | func (repo *RcloneRepository) Download(repoPath, localPath string, m *Meter) error { 41 | cmd := exec.Command("rclone", "copyto", "--no-check-dest", repo.remotePath(repoPath), localPath) 42 | err := cmd.Run() 43 | if err != nil { 44 | return err 45 | } 46 | 47 | return nil 48 | } 49 | 50 | func (repo *RcloneRepository) Delete(repoPath string) error { 51 | cmd := exec.Command("rclone", "deletefile", repo.remotePath(repoPath)) 52 | err := cmd.Run() 53 | if err != nil { 54 | return err 55 | } 56 | 57 | return nil 58 | } 59 | 60 | func (repo *RcloneRepository) Stat(repoPath string) (FileInfo, error) { 61 | var out bytes.Buffer 62 | cmd := exec.Command("rclone", "size", "--json", repo.remotePath(repoPath)) 63 | cmd.Stdout = &out 64 | if err := cmd.Run(); err != nil { 65 | return nil, err 66 | } 67 | 68 | type RcloneSize struct { 69 | Count int `json:"count"` 70 | } 71 | 72 | var size RcloneSize 73 | if err := json.Unmarshal(out.Bytes(), &size); err != nil { 74 | return nil, err 75 | } 76 | 77 | if size.Count == 0 { 78 | return nil, os.ErrNotExist 79 | } 80 | 81 | return &RcloneFileInfo{ 82 | Name_: filepath.Base(repoPath), 83 | IsDir_: false, 84 | }, nil 85 | } 86 | 87 | func (repo *RcloneRepository) List(repoPath string) ([]FileInfo, error) { 88 | var out bytes.Buffer 89 | cmd := exec.Command("rclone", "lsjson", repo.remotePath(repoPath)) 90 | cmd.Stdout = &out 91 | err := cmd.Run() 92 | if err != nil { 93 | return nil, err 94 | } 95 | 96 | var rcloneEntries []RcloneFileInfo 97 | err = json.Unmarshal(out.Bytes(), &rcloneEntries) 98 | if err != nil { 99 | return nil, err 100 | } 101 | 102 | entries := make([]FileInfo, 0) 103 | for _, entry := range rcloneEntries { 104 | entries = append(entries, &entry) 105 | } 106 | return entries, nil 107 | } 108 | 109 | func (repo *RcloneRepository) remotePath(repoPath string) string { 110 | path := filepath.Join(repo.BaseDir, repoPath) 111 | return repo.Remote + ":" + path 112 | } 113 | 114 | type RcloneFileInfo struct { 115 | Name_ string `json:"Name"` 116 | IsDir_ bool `json:"IsDir"` 117 | } 118 | 119 | func (e *RcloneFileInfo) Name() string { 120 | return e.Name_ 121 | } 122 | 123 | func (e *RcloneFileInfo) IsDir() bool { 124 | return e.IsDir_ 125 | } 126 | -------------------------------------------------------------------------------- /internal/repository/repo_integration_test.go: -------------------------------------------------------------------------------- 1 | // Run integration test to any repository 2 | // 3 | // TEST_REPOSITORY=s3://bucket/myrepo go test -v ./internal/repository 4 | package repository 5 | 6 | import ( 7 | cryptorand "crypto/rand" 8 | "crypto/sha1" 9 | "fmt" 10 | "io" 11 | "math/rand" 12 | "os" 13 | "path/filepath" 14 | "testing" 15 | "time" 16 | 17 | "github.com/infuseai/artivc/internal/log" 18 | "github.com/stretchr/testify/assert" 19 | ) 20 | 21 | func getRepo() (Repository, error) { 22 | repoStr := os.Getenv("TEST_REPOSITORY") 23 | if repoStr == "" { 24 | return nil, nil 25 | } 26 | result, err := ParseRepo(repoStr) 27 | if err != nil { 28 | return nil, err 29 | } 30 | return NewRepository(result) 31 | } 32 | 33 | func sha1sum(path string) string { 34 | hasher := sha1.New() 35 | f, err := os.Open(path) 36 | if err != nil { 37 | panic(err) 38 | } 39 | defer f.Close() 40 | if _, err := io.Copy(hasher, f); err != nil { 41 | panic(err) 42 | } 43 | sum := hasher.Sum([]byte{}) 44 | return fmt.Sprintf("%x", sum) 45 | } 46 | 47 | func generateRandomFile(path string, size int64) error { 48 | f, err := os.Create(path) 49 | if err != nil { 50 | return err 51 | } 52 | defer f.Close() 53 | 54 | _, err = io.CopyN(f, cryptorand.Reader, size) 55 | if err != nil { 56 | return err 57 | } 58 | 59 | return nil 60 | } 61 | 62 | func Test_Transfer(t *testing.T) { 63 | repo, err := getRepo() 64 | if repo == nil { 65 | return 66 | } 67 | 68 | if err != nil { 69 | t.Error(err) 70 | } 71 | 72 | testCases := []struct { 73 | desc string 74 | size int64 75 | repoPath string 76 | }{ 77 | {desc: "small file", size: 1024, repoPath: "bin"}, 78 | {desc: "small file with subpath", size: 1024, repoPath: "this/is/my/bin"}, 79 | {desc: "large file", size: 10 * 1024 * 1024, repoPath: "bin"}, 80 | {desc: "empty file", size: 0, repoPath: "bin"}, 81 | } 82 | for _, tC := range testCases { 83 | t.Run(tC.desc, func(t *testing.T) { 84 | tmpDir := t.TempDir() 85 | path := tmpDir + "/in" 86 | assert.NoError(t, generateRandomFile(path, tC.size)) 87 | 88 | if err := repo.Upload(path, tC.repoPath, nil); err != nil { 89 | t.Error(err) 90 | } 91 | 92 | if err := repo.Download(tC.repoPath, tmpDir+"/out", nil); err != nil { 93 | t.Error(err) 94 | } 95 | 96 | assert.Equal(t, sha1sum(tmpDir+"/in"), sha1sum(tmpDir+"/out")) 97 | 98 | if err := repo.Delete(tC.repoPath); err != nil { 99 | t.Error(err) 100 | } 101 | }) 102 | } 103 | } 104 | 105 | func Test_Stat(t *testing.T) { 106 | repo, err := getRepo() 107 | if repo == nil { 108 | return 109 | } 110 | 111 | if err != nil { 112 | t.Error(err) 113 | } 114 | 115 | rand.Seed(time.Now().UnixNano()) 116 | tmpDir := t.TempDir() 117 | path := tmpDir + "/bin" 118 | repoPath := fmt.Sprintf("stat/%d", rand.Int()) 119 | 120 | // stat non-existed file 121 | _, err = repo.Stat(repoPath) 122 | assert.Error(t, err, "Stat() should return error if the file does not exist") 123 | 124 | // upload & stat 125 | assert.NoError(t, generateRandomFile(path, 1024)) 126 | err = repo.Upload(path, repoPath, nil) 127 | if err != nil { 128 | t.Error(err) 129 | } 130 | 131 | info, err := repo.Stat(repoPath) 132 | if err != nil { 133 | t.Error(err) 134 | } 135 | assert.Equal(t, filepath.Base(repoPath), info.Name(), "name of Stat() should be the last component of path") 136 | assert.Equal(t, false, info.IsDir(), "result of Stat() should not be a directory ") 137 | 138 | // delete 139 | err = repo.Delete(repoPath) 140 | if err != nil { 141 | t.Error(err) 142 | } 143 | 144 | _, err = repo.Stat(repoPath) 145 | assert.Error(t, err, "Stat() should return error after the file deleted") 146 | } 147 | 148 | func Test_List(t *testing.T) { 149 | repo, err := getRepo() 150 | if repo == nil { 151 | return 152 | } 153 | 154 | if err != nil { 155 | t.Error(err) 156 | } 157 | 158 | rand.Seed(time.Now().UnixNano()) 159 | tmpDir := t.TempDir() 160 | path := tmpDir + "/bin" 161 | assert.NoError(t, generateRandomFile(path, 1024)) 162 | 163 | // Create files 164 | // 165 | // dir 166 | // ├── 0 167 | // ├── 1 168 | // ├── 2 169 | // └── 3 170 | // ├── 0 171 | // ├── 1 172 | // └── 2 173 | for i := 0; i < 3; i++ { 174 | rpath := fmt.Sprintf("dir/%d", i) 175 | err = repo.Upload(path, rpath, nil) 176 | if err != nil { 177 | t.Error(err) 178 | } 179 | 180 | defer func() { 181 | if err := repo.Delete(rpath); err != nil { 182 | log.Debugln("can't delete repo: " + err.Error()) 183 | } 184 | }() 185 | } 186 | for i := 0; i < 3; i++ { 187 | rpath := fmt.Sprintf("dir/3/%d", i) 188 | 189 | err = repo.Upload(path, rpath, nil) 190 | if err != nil { 191 | t.Error(err) 192 | } 193 | 194 | defer func() { 195 | if err := repo.Delete(rpath); err != nil { 196 | log.Debugln("can't delete repo: " + err.Error()) 197 | } 198 | }() 199 | } 200 | 201 | // test 202 | // ls dir 203 | list, err := repo.List("dir") 204 | assert.NoError(t, err) 205 | assert.Equal(t, 4, len(list)) 206 | for _, info := range list { 207 | switch info.Name() { 208 | case "0": 209 | assert.False(t, info.IsDir()) 210 | case "1": 211 | assert.False(t, info.IsDir()) 212 | case "2": 213 | assert.False(t, info.IsDir()) 214 | case "3": 215 | assert.True(t, info.IsDir()) 216 | default: 217 | assert.Fail(t, "wrong list item") 218 | } 219 | } 220 | 221 | // ls dir/3 222 | list, err = repo.List("dir/3") 223 | if err != nil { 224 | t.Error(err) 225 | } 226 | assert.Equal(t, 3, len(list)) 227 | for _, info := range list { 228 | switch info.Name() { 229 | case "0": 230 | assert.False(t, info.IsDir()) 231 | case "1": 232 | assert.False(t, info.IsDir()) 233 | case "2": 234 | assert.False(t, info.IsDir()) 235 | default: 236 | assert.Fail(t, "wrong list item") 237 | } 238 | } 239 | 240 | // ls nono-existing folder 241 | list, err = repo.List("dir-12345") 242 | if err != nil { 243 | t.Error(err) 244 | } 245 | assert.Equal(t, 0, len(list)) 246 | } 247 | -------------------------------------------------------------------------------- /internal/repository/repository.go: -------------------------------------------------------------------------------- 1 | package repository 2 | 3 | import ( 4 | neturl "net/url" 5 | "os" 6 | "path/filepath" 7 | "strings" 8 | ) 9 | 10 | type FileInfo interface { 11 | Name() string 12 | IsDir() bool 13 | } 14 | 15 | type SimpleFileInfo struct { 16 | name string 17 | isDir bool 18 | } 19 | 20 | func (fi *SimpleFileInfo) Name() string { 21 | return fi.name 22 | } 23 | 24 | func (fi *SimpleFileInfo) IsDir() bool { 25 | return fi.isDir 26 | } 27 | 28 | type Repository interface { 29 | Upload(localPath, repoPath string, meter *Meter) error 30 | Download(repoPath, localPath string, meter *Meter) error 31 | Delete(repoPath string) error 32 | Stat(repoPath string) (FileInfo, error) 33 | List(repoPath string) ([]FileInfo, error) 34 | } 35 | 36 | type RepoParseResult struct { 37 | Repo string 38 | scheme string 39 | host string 40 | path string 41 | } 42 | 43 | func ParseRepo(repo string) (RepoParseResult, error) { 44 | var result RepoParseResult 45 | 46 | if strings.Contains(repo, "://") { 47 | url, err := neturl.Parse(repo) 48 | if err != nil { 49 | return result, err 50 | } 51 | 52 | if url.Scheme == "" { 53 | return result, UnsupportedRepositoryError{ 54 | Message: "unsupported repository. Relative path is not allowed as a repository path", 55 | } 56 | } 57 | 58 | result.Repo = repo 59 | result.scheme = url.Scheme 60 | result.host = url.Host 61 | result.path = url.Path 62 | } else { 63 | i := strings.Index(repo, ":") 64 | if i > 0 { 65 | result.Repo = repo 66 | result.scheme = "ssh" 67 | result.host = repo[0:i] 68 | result.path = repo[i+1:] 69 | } else { 70 | cwd, err := os.Getwd() 71 | if err != nil { 72 | return result, err 73 | } 74 | if !strings.HasPrefix(repo, "/") { 75 | repo, err = filepath.Abs(filepath.Join(cwd, repo)) 76 | if err != nil { 77 | return result, err 78 | } 79 | } 80 | 81 | result.Repo = repo 82 | result.scheme = "file" 83 | result.host = "" 84 | result.path = repo 85 | } 86 | } 87 | 88 | return result, nil 89 | } 90 | 91 | func ParseRepoName(result RepoParseResult) (string, error) { 92 | if result.scheme == "ssh" { 93 | name := filepath.Base(result.path) 94 | if name == "/" { 95 | return result.host, nil 96 | } 97 | return name, nil 98 | } else { 99 | url, err := neturl.Parse(result.Repo) 100 | if err != nil { 101 | return "", err 102 | } 103 | 104 | if url.Path == "" { 105 | return url.Hostname(), nil 106 | } 107 | 108 | name := filepath.Base(url.Path) 109 | if name == "/" { 110 | return url.Hostname(), nil 111 | } 112 | 113 | return name, nil 114 | } 115 | } 116 | 117 | func NewRepository(result RepoParseResult) (Repository, error) { 118 | repo := result.Repo 119 | host := result.host 120 | path := result.path 121 | 122 | switch result.scheme { 123 | case "file": 124 | return NewLocalFileSystemRepository(path) 125 | case "s3": 126 | return NewS3Repository(host, path) 127 | case "gs": 128 | return NewGCSRepository(host, path) 129 | case "rclone": 130 | return NewRcloneRepository(host, path) 131 | case "ssh": 132 | return NewSSHRepository(host, path) 133 | case "http": 134 | return NewHttpRepository(repo) 135 | case "https": 136 | if IsAzureStorageUrl(repo) { 137 | return NewAzureBlobRepository(repo) 138 | } else { 139 | return NewHttpRepository(repo) 140 | } 141 | default: 142 | return nil, UnsupportedRepositoryError{ 143 | Message: "unsupported repository", 144 | } 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /internal/repository/repository_test.go: -------------------------------------------------------------------------------- 1 | package repository 2 | 3 | import ( 4 | "os" 5 | "path/filepath" 6 | "strings" 7 | "testing" 8 | 9 | "github.com/stretchr/testify/assert" 10 | ) 11 | 12 | func getAbsFilePath(path string) string { 13 | cwd, err := os.Getwd() 14 | if err != nil { 15 | return "" 16 | } 17 | if !strings.HasPrefix(path, "/") { 18 | path, err = filepath.Abs(filepath.Join(cwd, path)) 19 | if err != nil { 20 | return "" 21 | } 22 | } 23 | return path 24 | } 25 | 26 | func Test(t *testing.T) { 27 | testCases := []struct { 28 | desc string 29 | repo string 30 | scheme string 31 | host string 32 | path string 33 | name string 34 | }{ 35 | {repo: "/tmp", scheme: "file", host: "", path: getAbsFilePath("/tmp"), name: "tmp"}, 36 | {repo: "tmp", scheme: "file", host: "", path: getAbsFilePath("tmp"), name: "tmp"}, 37 | {repo: "../tmp", scheme: "file", host: "", path: getAbsFilePath("../tmp"), name: "tmp"}, 38 | {repo: "file:///tmp", scheme: "file", host: "", path: "/tmp", name: "tmp"}, 39 | {repo: "host:/tmp", scheme: "ssh", host: "host", path: "/tmp", name: "tmp"}, 40 | {repo: "host:tmp", scheme: "ssh", host: "host", path: "tmp", name: "tmp"}, 41 | {repo: "host:../tmp", scheme: "ssh", host: "host", path: "../tmp", name: "tmp"}, 42 | {repo: "ssh://host/tmp", scheme: "ssh", host: "host", path: "/tmp", name: "tmp"}, 43 | {repo: "xyz://host/tmp", scheme: "xyz", host: "host", path: "/tmp", name: "tmp"}, 44 | {repo: "xyz://host", scheme: "xyz", host: "host", path: "", name: "host"}, 45 | } 46 | 47 | for _, tC := range testCases { 48 | t.Run("pares repo "+tC.repo, func(t *testing.T) { 49 | result, err := ParseRepo(tC.repo) 50 | if err != nil { 51 | t.Error(err) 52 | return 53 | } 54 | 55 | assert.Equal(t, tC.scheme, result.scheme) 56 | assert.Equal(t, tC.host, result.host) 57 | assert.Equal(t, tC.path, result.path) 58 | 59 | repoName, err := ParseRepoName(result) 60 | if err != nil { 61 | t.Error(err) 62 | return 63 | } 64 | 65 | assert.Equal(t, tC.name, repoName) 66 | }) 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /internal/repository/s3.go: -------------------------------------------------------------------------------- 1 | package repository 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "io" 7 | "os" 8 | "path/filepath" 9 | "strings" 10 | 11 | "github.com/aws/aws-sdk-go-v2/config" 12 | "github.com/aws/aws-sdk-go-v2/feature/s3/manager" 13 | "github.com/aws/aws-sdk-go-v2/service/s3" 14 | ) 15 | 16 | type S3Repository struct { 17 | Bucket string 18 | BasePath string 19 | client *s3.Client 20 | } 21 | 22 | func NewS3Repository(bucket, basePath string) (*S3Repository, error) { 23 | basePath = strings.TrimPrefix(basePath, "/") 24 | 25 | cfg, err := config.LoadDefaultConfig(context.TODO()) 26 | if err != nil { 27 | return nil, err 28 | } 29 | client := s3.NewFromConfig(cfg) 30 | 31 | return &S3Repository{ 32 | Bucket: bucket, 33 | BasePath: basePath, 34 | client: client, 35 | }, nil 36 | } 37 | 38 | func (repo *S3Repository) Upload(localPath, repoPath string, m *Meter) error { 39 | // Reference the code to show the progress when uploading 40 | // https://github.com/aws/aws-sdk-go/blob/main/example/service/s3/putObjectWithProcess/putObjWithProcess.go 41 | sourceFileStat, err := os.Stat(localPath) 42 | if err != nil { 43 | return err 44 | } 45 | 46 | if !sourceFileStat.Mode().IsRegular() { 47 | return fmt.Errorf("%s is not a regular file", localPath) 48 | } 49 | 50 | source, err := os.Open(localPath) 51 | if err != nil { 52 | return err 53 | } 54 | defer source.Close() 55 | 56 | fileInfo, err := source.Stat() 57 | if err != nil { 58 | return err 59 | } 60 | 61 | reader := &progressReader{ 62 | fp: source, 63 | size: fileInfo.Size(), 64 | meter: m, 65 | } 66 | 67 | key := filepath.Join(repo.BasePath, repoPath) 68 | input := &s3.PutObjectInput{ 69 | Bucket: &repo.Bucket, 70 | Key: &key, 71 | Body: reader, 72 | } 73 | 74 | if sourceFileStat.Size() < manager.DefaultUploadPartSize { 75 | _, err = repo.client.PutObject(context.TODO(), input) 76 | } else { 77 | uploader := manager.NewUploader(repo.client) 78 | _, err = uploader.Upload(context.TODO(), input) 79 | } 80 | return err 81 | } 82 | 83 | func (repo *S3Repository) Download(repoPath, localPath string, m *Meter) error { 84 | // Reference the code to show the progress when downloading 85 | // https://github.com/aws/aws-sdk-go/tree/main/example/service/s3/getObjectWithProgress 86 | key := filepath.Join(repo.BasePath, repoPath) 87 | input := &s3.GetObjectInput{ 88 | Bucket: &repo.Bucket, 89 | Key: &key, 90 | } 91 | 92 | downloader := manager.NewDownloader(repo.client) 93 | 94 | dest, err := os.Create(localPath) 95 | if err != nil { 96 | fmt.Printf("%v\n", err) 97 | return err 98 | } 99 | defer dest.Close() 100 | 101 | writer := &progressWriter{writer: dest, meter: m} 102 | _, err = downloader.Download(context.TODO(), writer, input) 103 | return err 104 | } 105 | 106 | func (repo *S3Repository) Delete(repoPath string) error { 107 | key := filepath.Join(repo.BasePath, repoPath) 108 | input := &s3.DeleteObjectInput{ 109 | Bucket: &repo.Bucket, 110 | Key: &key, 111 | } 112 | 113 | _, err := repo.client.DeleteObject(context.TODO(), input) 114 | return err 115 | } 116 | 117 | func (repo *S3Repository) Stat(repoPath string) (FileInfo, error) { 118 | key := filepath.Join(repo.BasePath, repoPath) 119 | input := &s3.HeadObjectInput{ 120 | Bucket: &repo.Bucket, 121 | Key: &key, 122 | } 123 | _, err := repo.client.HeadObject(context.TODO(), input) 124 | if err != nil { 125 | return nil, err 126 | } 127 | 128 | return &S3FileInfo{ 129 | name: filepath.Base(repoPath), 130 | }, nil 131 | } 132 | 133 | func (repo *S3Repository) List(repoPath string) ([]FileInfo, error) { 134 | fullRepoPath := filepath.Join(repo.BasePath, repoPath) 135 | fullRepoPath = fullRepoPath + "/" 136 | delimeter := "/" 137 | input := &s3.ListObjectsV2Input{ 138 | Bucket: &repo.Bucket, 139 | Prefix: &fullRepoPath, 140 | Delimiter: &delimeter, 141 | } 142 | output, err := repo.client.ListObjectsV2(context.TODO(), input) 143 | if err != nil { 144 | return nil, err 145 | } 146 | 147 | entries := make([]FileInfo, 0) 148 | for _, prefix := range output.CommonPrefixes { 149 | fullname := *prefix.Prefix 150 | name := fullname[len(fullRepoPath) : len(fullname)-1] 151 | entry := S3FileInfo{name: name, isDir: true} 152 | entries = append(entries, &entry) 153 | } 154 | 155 | for _, obj := range output.Contents { 156 | fullname := *obj.Key 157 | entry := S3FileInfo{name: fullname[len(fullRepoPath):]} 158 | entries = append(entries, &entry) 159 | } 160 | return entries, err 161 | } 162 | 163 | type S3FileInfo struct { 164 | name string 165 | isDir bool 166 | } 167 | 168 | func (fi *S3FileInfo) Name() string { 169 | return fi.name 170 | } 171 | 172 | func (fi *S3FileInfo) IsDir() bool { 173 | return fi.isDir 174 | } 175 | 176 | type progressReader struct { 177 | fp *os.File 178 | size int64 179 | meter *Meter 180 | } 181 | 182 | func (r *progressReader) Read(p []byte) (int, error) { 183 | read, err := r.fp.Read(p) 184 | if r.meter != nil { 185 | r.meter.AddBytes(read) 186 | } 187 | return read, err 188 | } 189 | 190 | func (r *progressReader) ReadAt(p []byte, off int64) (int, error) { 191 | n, err := r.fp.ReadAt(p, off) 192 | if err != nil { 193 | return n, err 194 | } 195 | 196 | if r.meter != nil { 197 | r.meter.AddBytes(n) 198 | } 199 | 200 | return n, err 201 | } 202 | 203 | func (r *progressReader) Seek(offset int64, whence int) (int64, error) { 204 | return r.fp.Seek(offset, whence) 205 | } 206 | 207 | type progressWriter struct { 208 | writer io.WriterAt 209 | meter *Meter 210 | } 211 | 212 | func (w *progressWriter) WriteAt(p []byte, off int64) (int, error) { 213 | n, err := w.writer.WriteAt(p, off) 214 | if err != nil { 215 | return n, err 216 | } 217 | 218 | if w.meter != nil { 219 | w.meter.AddBytes(n) 220 | } 221 | 222 | return n, err 223 | } 224 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/infuseai/artivc/cmd" 5 | ) 6 | 7 | func main() { 8 | cmd.Execute() 9 | } 10 | --------------------------------------------------------------------------------