├── .github
└── workflows
│ ├── ci.yml
│ ├── docs-publish.yml
│ └── release.yml
├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── cmd
├── clone.go
├── config.go
├── diff.go
├── docs.go
├── get.go
├── init.go
├── list.go
├── log.go
├── pull.go
├── push.go
├── put.go
├── root.go
├── status.go
├── tag.go
├── util_test.go
├── utils.go
└── version.go
├── docs
├── .gitignore
├── Makefile
├── README.md
├── assets
│ ├── ArtiVC_workspace.svg
│ ├── art-overview.png
│ ├── cheatsheet.png
│ └── sprites
│ │ └── regular.svg
├── config
│ └── _default
│ │ ├── config.yaml
│ │ ├── languages.yaml
│ │ └── params.yaml
├── content
│ └── en
│ │ ├── _includes
│ │ ├── _index.md
│ │ └── include-page.md
│ │ ├── _index.md
│ │ ├── backends
│ │ ├── _index.md
│ │ ├── azureblob.md
│ │ ├── gcs.md
│ │ ├── local.md
│ │ ├── rclone.md
│ │ ├── s3.md
│ │ └── ssh.md
│ │ ├── commands
│ │ ├── _index.md
│ │ ├── avc.md
│ │ ├── avc_clone.md
│ │ ├── avc_completion.md
│ │ ├── avc_completion_bash.md
│ │ ├── avc_completion_fish.md
│ │ ├── avc_completion_powershell.md
│ │ ├── avc_completion_zsh.md
│ │ ├── avc_config.md
│ │ ├── avc_diff.md
│ │ ├── avc_docs.md
│ │ ├── avc_get.md
│ │ ├── avc_init.md
│ │ ├── avc_list.md
│ │ ├── avc_log.md
│ │ ├── avc_pull.md
│ │ ├── avc_push.md
│ │ ├── avc_put.md
│ │ ├── avc_status.md
│ │ ├── avc_tag.md
│ │ ├── avc_version.md
│ │ └── images
│ │ │ └── geekdoc-dark.png
│ │ ├── design
│ │ ├── _index.md
│ │ ├── alternatives.md
│ │ ├── benchmark.md
│ │ ├── faq.md
│ │ ├── how-it-works.md
│ │ ├── images
│ │ │ ├── artiv-overview.png
│ │ │ ├── benchmark1.svg
│ │ │ ├── benchmark2.svg
│ │ │ └── benchmark3.svg
│ │ └── mlops.md
│ │ ├── posts
│ │ ├── _index.md
│ │ ├── initial-release.md
│ │ └── rename-project.md
│ │ ├── usage
│ │ ├── _index.md
│ │ ├── cheatsheet.md
│ │ ├── dryrun.md
│ │ ├── expose.md
│ │ ├── getting-started.md
│ │ ├── ignore-file.md
│ │ ├── images
│ │ │ └── cheatsheet.png
│ │ ├── partial-download.md
│ │ └── windows-supports.md
│ │ └── use-cases
│ │ ├── _index.md
│ │ ├── backup.md
│ │ ├── dataprep.md
│ │ └── experiment.md
├── data
│ └── menu
│ │ ├── extra.yaml
│ │ ├── main.yaml
│ │ └── more.yaml
├── layouts
│ └── shortcodes
│ │ └── sprites.html
└── static
│ ├── .htaccess
│ ├── ArtiVC_workspace.png
│ ├── _includes
│ ├── example.html.part
│ └── example.md.part
│ ├── brand.svg
│ ├── custom.css
│ ├── custom.css.example
│ ├── favicon
│ ├── android-chrome-192x192.png
│ ├── android-chrome-512x512.png
│ ├── apple-touch-icon.png
│ ├── favicon-16x16.png
│ ├── favicon-32x32.png
│ ├── favicon.ico
│ └── favicon.svg
│ ├── media
│ ├── bundle-menu.png
│ ├── file-tree.png
│ └── more-menu.png
│ └── socialartiv.png
├── go.mod
├── go.sum
├── internal
├── core
│ ├── config.go
│ ├── errors.go
│ ├── hash.go
│ ├── manager.go
│ ├── manager_test.go
│ ├── types.go
│ ├── utils.go
│ └── utils_test.go
├── executor
│ ├── executor.go
│ └── executor_test.go
├── log
│ └── log.go
└── repository
│ ├── azureblob.go
│ ├── azureblob_test.go
│ ├── errors.go
│ ├── gcs.go
│ ├── http.go
│ ├── local.go
│ ├── local_test.go
│ ├── meter.go
│ ├── rclone.go
│ ├── repo_integration_test.go
│ ├── repository.go
│ ├── repository_test.go
│ ├── s3.go
│ └── ssh.go
└── main.go
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: Go test
2 |
3 | on:
4 | push:
5 | branches: "*"
6 | pull_request:
7 | branches: "*"
8 |
9 | jobs:
10 | unit-test:
11 | strategy:
12 | matrix:
13 | os: [ubuntu-latest, macos-latest]
14 | go: [1.17, 1.18, 1.19, '1.20']
15 | include:
16 | - os: ubuntu-latest
17 | go-build: ~/.cache/go-build
18 | name: ${{ matrix.os }} @ Go ${{ matrix.go }}
19 | runs-on: ${{ matrix.os }}
20 | steps:
21 | - name: Set up Go ${{ matrix.go }}
22 | uses: actions/setup-go@v2
23 | with:
24 | go-version: ${{ matrix.go }}
25 |
26 | - name: Checkout Code
27 | uses: actions/checkout@v3
28 | with:
29 | ref: ${{ github.ref }}
30 |
31 | - uses: actions/cache@v2
32 | with:
33 | path: |
34 | ${{ matrix.go-build }}
35 | ~/go/pkg/mod
36 | key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
37 | restore-keys: |
38 | ${{ runner.os }}-go-
39 | - name: Run Tests
40 | run: |
41 | make test
42 |
43 | it-test-s3:
44 | runs-on: ubuntu-latest
45 | steps:
46 | - uses: actions/checkout@v2
47 |
48 | - name: Set up Go
49 | uses: actions/setup-go@v2
50 | with:
51 | go-version: 1.17
52 |
53 | - name: Run test
54 | run: make integration-test
55 | env:
56 | TEST_REPOSITORY: ${{ secrets.REPOSITORY_S3 }}
57 | AWS_REGION: ${{ secrets.AWS_REGION }}
58 | AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
59 | AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
60 |
--------------------------------------------------------------------------------
/.github/workflows/docs-publish.yml:
--------------------------------------------------------------------------------
1 | name: Netlify Publish
2 |
3 | on:
4 | release:
5 | types: [published]
6 |
7 | jobs:
8 | publish:
9 | runs-on: ubuntu-20.04
10 | steps:
11 | - name: Setup Hugo
12 | uses: peaceiris/actions-hugo@v2
13 | with:
14 | hugo-version: latest
15 |
16 | - name: Checkout files
17 | uses: actions/checkout@v2
18 |
19 | - name: Build docs files
20 | run: make -C docs/ build
21 |
22 | - name: Deploy to Netlify
23 | uses: nwtgck/actions-netlify@v1.2
24 | with:
25 | publish-dir: ./docs/public
26 | deploy-message: "Deploy from GitHub Actions with tag ${{ github.event.release.tag_name }}"
27 | env:
28 | NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }}
29 | NETLIFY_SITE_ID: ${{ secrets.NETLIFY_SITE_ID }}
30 | timeout-minutes: 1
31 |
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: Upload to release assets
2 |
3 | on:
4 | release:
5 | types: [ created ]
6 |
7 | jobs:
8 | releases-matrix:
9 | name: Release Go Binary
10 | runs-on: ubuntu-latest
11 | strategy:
12 | matrix:
13 | goos: [ linux, darwin ]
14 | goarch: [ arm64, amd64 ]
15 | steps:
16 | - name: Show environment
17 | run: export
18 | - uses: actions/checkout@v2
19 | - uses: wangyoucao577/go-release-action@v1.25
20 | with:
21 | github_token: ${{ secrets.GITHUB_TOKEN }}
22 | goos: ${{ matrix.goos }}
23 | goarch: ${{ matrix.goarch }}
24 | build_command: "make"
25 | build_flags: "build"
26 | ldflags: "-I."
27 | extra_files: bin/avc
28 | pre_command: "git clean -df"
29 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # If you prefer the allow list template instead of the deny list, see community template:
2 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore
3 | #
4 | # Binaries for programs and plugins
5 | *.exe
6 | *.exe~
7 | *.dll
8 | *.so
9 | *.dylib
10 |
11 | # Test binary, built with `go test -c`
12 | *.test
13 |
14 | # Output of the go coverage tool, specifically when used with LiteIDE
15 | *.out
16 |
17 | # Dependency directories (remove the comment below to include it)
18 | # vendor/
19 |
20 | # Go workspace file
21 | go.work
22 |
23 | main
24 | .vscode
25 | bin
26 | .DS_Store
27 |
28 | .idea
29 | generated_docs
30 | .avc
31 | .avcignore
32 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | VERSION =
2 | LDFLAGS =
3 | GO ?= go
4 |
5 | GIT_COMMIT = $(shell git rev-parse HEAD)
6 | GIT_SHA = $(shell git rev-parse --short HEAD)
7 | GIT_TAG = $(shell git describe --tags --abbrev=0 --exact-match 2>/dev/null)
8 | GIT_DIRTY = $(shell test -n "`git status --porcelain`" && echo "dirty" || echo "clean")
9 |
10 | ifeq ($(VERSION),)
11 | VERSION := $(shell echo $${GITHUB_REF_NAME})
12 | endif
13 |
14 | LDFLAGS += -X github.com/infuseai/artivc/cmd.tagVersion=${VERSION}
15 | LDFLAGS += -X github.com/infuseai/artivc/cmd.gitCommit=${GIT_COMMIT}
16 | LDFLAGS += -X github.com/infuseai/artivc/cmd.gitTreeState=${GIT_DIRTY}
17 | LDFLAGS += -s -w
18 | LDFLAGS += $(EXT_LDFLAGS)
19 |
20 |
21 | build:
22 | mkdir -p bin
23 | $(GO) build -o bin/avc -ldflags '$(LDFLAGS)' main.go
24 |
25 | test:
26 | $(GO) test ./...
27 |
28 | integration-test:
29 | $(GO) test -v ./internal/repository
30 |
31 | .PHONY: doc-server
32 | doc-server:
33 | make -C docs/ start
34 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 | # ArtiVC
14 |
15 | [ArtiVC](https://artivc.io/) (**Arti**facts **V**ersion **C**ontrol) is a handy command-line tool for data versioning on cloud storage. With only one command, it helps you neatly snapshot your data and Switch data between versions. Even better, it seamlessly integrates your existing cloud environment. ArtiVC supports three major cloud providers (AWS S3, Google Cloud Storage, Azure Blob Storage) and the remote filesystem using SSH.
16 |
17 | [](https://asciinema.org/a/6JEhzpJ5QMiSkiC74s5CyT257?autoplay=1)
18 |
19 | Try it out from the [Getting Started](https://artivc.io/usage/getting-started/) guide
20 |
21 | # Features
22 |
23 | - **Data Versioning**: Version your data like versioning code. ArtiVC supports commit history, commit message, and version tag. You can diff two commits, and pull data from the specific version.
24 | - **Use your own storage**: We are used to putting large files in NFS or S3. To use ArtiVC, you can keep putting your files on the same storage without changes.
25 | - **No additional server is required**: ArtiVC is a CLI tool. No server or gateway is required to install and operate.
26 | - **Multiple backends support**: ArtiVC natively supports local filesystem, remote filesystem (by SSH), AWS S3, Google Cloud Storage, and Azure Blob Storage as backend. And 40+ backends are supported through [Rclone](https://artivc.io/backends/rclone/) integration. [Learn more](https://artivc.io/backends/)
27 | - **Painless Configuration**: No one like to configure. So we leverage the original configuration as much as possible. Use `.ssh/config` for ssh access, and use `aws configure`, `gcloud auth application-default login`, `az login` for the cloud platforms.
28 | - **Efficient storage and transfer**: The file structure of the repository is stored and transferred efficiently by [design](https://artivc.io/design/how-it-works/). It prevents storing duplicated content and minimum the number of files to upload when pushing a new version. [Learn more](https://artivc.io/design/benchmark/)
29 |
30 | # Documentation
31 |
32 | For more detail, please read the [ArtiVC documentation](https://artivc.io/usage/getting-started/)
33 |
--------------------------------------------------------------------------------
/cmd/clone.go:
--------------------------------------------------------------------------------
1 | package cmd
2 |
3 | import (
4 | "errors"
5 | "fmt"
6 | "io/fs"
7 | "os"
8 | "path/filepath"
9 | "strings"
10 |
11 | "github.com/infuseai/artivc/internal/core"
12 | "github.com/infuseai/artivc/internal/repository"
13 | "github.com/spf13/cobra"
14 | )
15 |
16 | var cloneCommand = &cobra.Command{
17 | Use: "clone []",
18 | Short: "Clone a workspace",
19 | DisableFlagsInUseLine: true,
20 | Example: ` # clone a workspace with local repository
21 | avc clone /path/to/mydataset
22 |
23 | # clone a workspace with s3 repository
24 | avc clone s3://mybucket/path/to/mydataset`,
25 | Args: cobra.RangeArgs(1, 2),
26 | Run: func(cmd *cobra.Command, args []string) {
27 | cwd, err := os.Getwd()
28 | exitWithError(err)
29 | result, err := repository.ParseRepo(args[0])
30 | exitWithError(err)
31 | repo := result.Repo
32 |
33 | if strings.HasPrefix(repo, "http") && !repository.IsAzureStorageUrl(repo) {
34 | exitWithError(errors.New("clone not support under http(s) repo"))
35 | }
36 |
37 | _, err = repository.NewRepository(result)
38 | exitWithError(err)
39 |
40 | destDir, err := repository.ParseRepoName(result)
41 | exitWithError(err)
42 |
43 | if len(args) > 1 {
44 | destDir = args[1]
45 | }
46 |
47 | baseDir := filepath.Join(cwd, destDir)
48 | err = os.Mkdir(baseDir, fs.ModePerm)
49 | if err == nil || (os.IsExist(err) && isDirEmpty(baseDir)) {
50 | // pass
51 | } else if os.IsExist(err) {
52 | exitWithFormat("fatal: destination path '%s' already exists and is not an empty directory.", destDir)
53 | } else {
54 | exitWithFormat("fatal: cannot create destination path '%s'.", destDir)
55 | }
56 | fmt.Printf("Cloning into '%s'...\n", destDir)
57 |
58 | exitWithError(core.InitWorkspace(baseDir, repo))
59 |
60 | config, err := core.LoadConfig(baseDir)
61 | exitWithError(err)
62 |
63 | mngr, err := core.NewArtifactManager(config)
64 | exitWithError(err)
65 |
66 | err = mngr.Pull(core.PullOptions{})
67 | if err != nil {
68 | os.RemoveAll(baseDir) // remove created dir
69 | exitWithError(err)
70 | }
71 | },
72 | }
73 |
--------------------------------------------------------------------------------
/cmd/config.go:
--------------------------------------------------------------------------------
1 | package cmd
2 |
3 | import (
4 | "errors"
5 | "fmt"
6 | "os"
7 | "strings"
8 |
9 | "github.com/infuseai/artivc/internal/core"
10 | "github.com/infuseai/artivc/internal/repository"
11 | "github.com/spf13/cobra"
12 | )
13 |
14 | var configCommand = &cobra.Command{
15 | Use: "config [ []]",
16 | Short: "Configure the workspace",
17 | Long: "Configure the workspace. The config file is stored at \".avc/config\".",
18 | DisableFlagsInUseLine: true,
19 | Example: ` # List the config
20 | avc config
21 |
22 | # Get the config
23 | avc config repo.url
24 |
25 | # Set the config
26 | avc config repo.url s3://your-bucket/data`,
27 | Args: cobra.RangeArgs(0, 2),
28 | Run: func(cmd *cobra.Command, args []string) {
29 | config, err := core.LoadConfig("")
30 | exitWithError(err)
31 |
32 | switch len(args) {
33 | case 0:
34 | config.Print()
35 | case 1:
36 | value := config.Get(args[0])
37 | if value != nil {
38 | fmt.Println(value)
39 | } else {
40 | fmt.Fprintf(os.Stderr, "key not found: %s\n", args[0])
41 | }
42 | case 2:
43 | key := args[0]
44 | value := args[1]
45 | if key == "repo.url" {
46 | if strings.HasPrefix(value, "http") && !repository.IsAzureStorageUrl(value) {
47 | exitWithError(errors.New("http(s) repository is not supported"))
48 | }
49 |
50 | result, err := repository.ParseRepo(value)
51 | exitWithError(err)
52 |
53 | _, err = repository.NewRepository(result)
54 | exitWithError(err)
55 | }
56 |
57 | config.Set(key, value)
58 | exitWithError(config.Save())
59 | }
60 | },
61 | }
62 |
--------------------------------------------------------------------------------
/cmd/diff.go:
--------------------------------------------------------------------------------
1 | package cmd
2 |
3 | import (
4 | "github.com/infuseai/artivc/internal/core"
5 | "github.com/spf13/cobra"
6 | )
7 |
8 | var diffCommand = &cobra.Command{
9 | Use: "diff",
10 | Short: "Diff workspace/commits/references",
11 | Example: `# Diff two version
12 | avc diff v0.1.0 v0.2.0`,
13 | Args: cobra.ExactArgs(2),
14 | Run: func(cmd *cobra.Command, args []string) {
15 | left := args[0]
16 | right := args[1]
17 | config, err := core.LoadConfig("")
18 | exitWithError(err)
19 |
20 | mngr, err := core.NewArtifactManager(config)
21 | exitWithError(err)
22 |
23 | err = mngr.Fetch()
24 | exitWithError(err)
25 |
26 | result, err := mngr.Diff(core.DiffOptions{
27 | LeftRef: left,
28 | RightRef: right,
29 | })
30 | exitWithError(err)
31 |
32 | result.Print(true)
33 | },
34 | }
35 |
--------------------------------------------------------------------------------
/cmd/docs.go:
--------------------------------------------------------------------------------
1 | package cmd
2 |
3 | import (
4 | "io/fs"
5 | "os"
6 | "path"
7 | "strings"
8 |
9 | "github.com/spf13/cobra"
10 | "github.com/spf13/cobra/doc"
11 | )
12 |
13 | var docsCommand = &cobra.Command{
14 | Use: "docs",
15 | Short: "Generate docs",
16 | Long: `Generate docs. For example:
17 |
18 | avc docs`,
19 | Run: func(cmd *cobra.Command, args []string) {
20 | const DocDir = "./generated_docs"
21 | err := os.Mkdir(DocDir, fs.ModePerm)
22 |
23 | if err == nil || (err != nil && os.IsExist(err)) {
24 | // pass when directory existing
25 | } else {
26 | exitWithFormat("Failed to create %s, skip to generate documents\n", DocDir)
27 | }
28 | linkHandler := func(name string) string {
29 | base := strings.TrimSuffix(name, path.Ext(name))
30 | return "/commands/" + strings.ToLower(base) + "/"
31 | }
32 |
33 | exitWithError(doc.GenMarkdownTreeCustom(cmd.Root(), DocDir, func(filestring string) string { return "" }, linkHandler))
34 | },
35 | }
36 |
--------------------------------------------------------------------------------
/cmd/get.go:
--------------------------------------------------------------------------------
1 | package cmd
2 |
3 | import (
4 | "errors"
5 | "os"
6 | "path/filepath"
7 | "strings"
8 |
9 | "github.com/infuseai/artivc/internal/core"
10 | "github.com/spf13/cobra"
11 | )
12 |
13 | // getCmd represents the download command
14 | var getCmd = &cobra.Command{
15 | Use: "get [-o ] [@|] [--] ...",
16 | DisableFlagsInUseLine: true,
17 | Short: "Download data from a repository",
18 | Example: ` # Download the latest version. The data go to "mydataset" folder.
19 | avc get s3://bucket/mydataset
20 |
21 | # Download the specific version
22 | avc get s3://mybucket/path/to/mydataset@v1.0.0
23 |
24 | # Download to a specific folder
25 | avc get -o /tmp/mydataset s3://bucket/mydataset
26 |
27 | # Download partial files
28 | avc get -o /tmp/mydataset s3://bucket/mydataset -- path/to/file1 path/to/file2 data/`,
29 | Args: cobra.MinimumNArgs(1),
30 | Run: func(cmd *cobra.Command, args []string) {
31 | var err error
32 |
33 | repoUrl, ref, err := parseRepoStr(args[0])
34 | exitWithError(err)
35 |
36 | baseDir, err := cmd.Flags().GetString("output")
37 | exitWithError(err)
38 |
39 | if baseDir == "" {
40 | comps := strings.Split(repoUrl, "/")
41 | if len(comps) == 0 {
42 | exitWithFormat("invlaid path: %v", repoUrl)
43 | }
44 | baseDir = comps[len(comps)-1]
45 | }
46 | baseDir, err = filepath.Abs(baseDir)
47 | exitWithError(err)
48 |
49 | metadataDir, _ := os.MkdirTemp(os.TempDir(), "*-avc")
50 | defer os.RemoveAll(metadataDir)
51 |
52 | config := core.NewConfig(baseDir, metadataDir, repoUrl)
53 |
54 | mngr, err := core.NewArtifactManager(config)
55 | exitWithError(err)
56 |
57 | options := core.PullOptions{NoFetch: true}
58 | if ref != "" {
59 | options.RefOrCommit = &ref
60 | }
61 |
62 | options.Delete, err = cmd.Flags().GetBool("delete")
63 | exitWithError(err)
64 |
65 | if len(args) > 1 {
66 | if options.Delete {
67 | exitWithError(errors.New("cannot download partial files and specify delete flag at the same time"))
68 | }
69 | fileInclude := core.NewAvcInclude(args[1:])
70 | options.FileFilter = func(path string) bool {
71 | return fileInclude.MatchesPath(path)
72 | }
73 | }
74 | exitWithError(mngr.Pull(options))
75 | },
76 | }
77 |
78 | func init() {
79 | getCmd.Flags().StringP("output", "o", "", "Output directory")
80 | getCmd.Flags().Bool("delete", false, "Delete extra files which are not listed in commit")
81 | }
82 |
--------------------------------------------------------------------------------
/cmd/init.go:
--------------------------------------------------------------------------------
1 | package cmd
2 |
3 | import (
4 | "errors"
5 | "fmt"
6 | "os"
7 | "strings"
8 |
9 | "github.com/infuseai/artivc/internal/core"
10 | "github.com/infuseai/artivc/internal/repository"
11 | "github.com/spf13/cobra"
12 | )
13 |
14 | var initCommand = &cobra.Command{
15 | Use: "init ",
16 | Short: "Initiate a workspace",
17 | DisableFlagsInUseLine: true,
18 | Example: ` # Init a workspace with local repository
19 | avc init /path/to/mydataset
20 |
21 | # Init a workspace with s3 repository
22 | avc init s3://mybucket/path/to/mydataset`,
23 | Args: cobra.ExactArgs(1),
24 | Run: func(cmd *cobra.Command, args []string) {
25 | cwd, err := os.Getwd()
26 | exitWithError(err)
27 |
28 | result, err := repository.ParseRepo(args[0])
29 | exitWithError(err)
30 | repo := result.Repo
31 |
32 | if strings.HasPrefix(repo, "http") && !repository.IsAzureStorageUrl(repo) {
33 | exitWithError(errors.New("init not support under http(s) repo"))
34 | }
35 |
36 | _, err = repository.NewRepository(result)
37 | exitWithError(err)
38 |
39 | fmt.Printf("Initialize the artivc workspace of the repository '%s'\n", repo)
40 | exitWithError(core.InitWorkspace(cwd, repo))
41 | },
42 | }
43 |
44 | func init() {
45 | }
46 |
--------------------------------------------------------------------------------
/cmd/list.go:
--------------------------------------------------------------------------------
1 | package cmd
2 |
3 | import (
4 | "github.com/infuseai/artivc/internal/core"
5 | "github.com/spf13/cobra"
6 | )
7 |
8 | var listCommand = &cobra.Command{
9 | Use: "list",
10 | Short: "List files of a commit",
11 | Aliases: []string{"ls"},
12 | Example: ` # List files for the latest version
13 | avc list
14 |
15 | # List files for the specific version
16 | avc list v1.0.0`,
17 | Args: cobra.RangeArgs(0, 1),
18 | Run: func(cmd *cobra.Command, args []string) {
19 | var ref string
20 | if len(args) == 0 {
21 | ref = core.RefLatest
22 | } else {
23 | ref = args[0]
24 | }
25 |
26 | config, err := core.LoadConfig("")
27 | exitWithError(err)
28 |
29 | mngr, err := core.NewArtifactManager(config)
30 | exitWithError(err)
31 |
32 | exitWithError(mngr.List(ref))
33 | },
34 | }
35 |
36 | func init() {
37 | }
38 |
--------------------------------------------------------------------------------
/cmd/log.go:
--------------------------------------------------------------------------------
1 | package cmd
2 |
3 | import (
4 | "github.com/infuseai/artivc/internal/core"
5 | "github.com/spf13/cobra"
6 | )
7 |
8 | var logCommand = &cobra.Command{
9 | Use: "log [|]",
10 | DisableFlagsInUseLine: true,
11 | Short: "Log commits",
12 | Example: ` # Log commits from the latest
13 | avc log
14 |
15 | # Log commits from a specific version
16 | avc log v1.0.0`,
17 | Args: cobra.RangeArgs(0, 1),
18 | Run: func(cmd *cobra.Command, args []string) {
19 | config, err := core.LoadConfig("")
20 | exitWithError(err)
21 |
22 | var ref string
23 | if len(args) == 0 {
24 | ref = core.RefLatest
25 | } else {
26 | ref = args[0]
27 | }
28 |
29 | mngr, err := core.NewArtifactManager(config)
30 | exitWithError(err)
31 |
32 | exitWithError(mngr.Log(ref))
33 | },
34 | }
35 |
36 | func init() {
37 | }
38 |
--------------------------------------------------------------------------------
/cmd/pull.go:
--------------------------------------------------------------------------------
1 | package cmd
2 |
3 | import (
4 | "errors"
5 |
6 | "github.com/infuseai/artivc/internal/core"
7 | "github.com/spf13/cobra"
8 | )
9 |
10 | // getCmd represents the download command
11 | var pullCmd = &cobra.Command{
12 | Use: "pull [|] [flags] -- ...",
13 | Short: "Pull data from the repository",
14 | Example: ` # Pull the latest version
15 | avc pull
16 |
17 | # Pull from a specifc version
18 | avc pull v1.0.0
19 |
20 | # Pull partial files
21 | avc pull -- path/to/partia
22 | avc pull v0.1.0 -- path/to/partia ...`,
23 | Run: func(cmd *cobra.Command, args []string) {
24 | config, err := core.LoadConfig("")
25 | exitWithError(err)
26 |
27 | mngr, err := core.NewArtifactManager(config)
28 | exitWithError(err)
29 |
30 | // options
31 | option := core.PullOptions{}
32 |
33 | option.DryRun, err = cmd.Flags().GetBool("dry-run")
34 | exitWithError(err)
35 |
36 | option.Delete, err = cmd.Flags().GetBool("delete")
37 | exitWithError(err)
38 |
39 | argsLenBeforeDash := cmd.Flags().ArgsLenAtDash()
40 | if argsLenBeforeDash == -1 {
41 | if len(args) == 1 {
42 | option.RefOrCommit = &args[0]
43 | } else if len(args) > 1 {
44 | exitWithError(errors.New("please specify \"--\" flag teminator"))
45 | }
46 | } else {
47 | if argsLenBeforeDash == 1 {
48 | option.RefOrCommit = &args[0]
49 | }
50 |
51 | if len(args)-argsLenBeforeDash > 0 {
52 | if option.Delete {
53 | exitWithError(errors.New("cannot pull partial files and specify delete flag at the same time"))
54 | }
55 |
56 | fileInclude := core.NewAvcInclude(args[argsLenBeforeDash:])
57 | option.FileFilter = func(path string) bool {
58 | return fileInclude.MatchesPath(path)
59 | }
60 | }
61 | }
62 |
63 | exitWithError(mngr.Pull(option))
64 | },
65 | }
66 |
67 | func init() {
68 | pullCmd.Flags().Bool("dry-run", false, "Dry run")
69 | pullCmd.Flags().Bool("delete", false, "Delete extra files which are not listed in commit")
70 | }
71 |
--------------------------------------------------------------------------------
/cmd/push.go:
--------------------------------------------------------------------------------
1 | package cmd
2 |
3 | import (
4 | "github.com/infuseai/artivc/internal/core"
5 | "github.com/spf13/cobra"
6 | )
7 |
8 | // getCmd represents the download command
9 | var pushCmd = &cobra.Command{
10 | Use: "push [-m ]",
11 | DisableFlagsInUseLine: true,
12 | Short: "Push data to the repository",
13 | Long: `Push data to the repository. There is no branch implemented yet, all put and push commands are always creating a commit and treat as the latest commit.`,
14 | Example: ` # Push to the latest version
15 | avc push -m 'Initial version'
16 |
17 | # Push to the latest version and tag to specific version
18 | avc push -m 'Initial version'
19 | avc tag v1.0.0`,
20 | Args: cobra.NoArgs,
21 | Run: func(cmd *cobra.Command, args []string) {
22 | config, err := core.LoadConfig("")
23 | exitWithError(err)
24 |
25 | // options
26 | option := core.PushOptions{}
27 | message, err := cmd.Flags().GetString("message")
28 | exitWithError(err)
29 |
30 | if message != "" {
31 | option.Message = &message
32 | }
33 |
34 | option.DryRun, err = cmd.Flags().GetBool("dry-run")
35 | exitWithError(err)
36 |
37 | // push
38 | mngr, err := core.NewArtifactManager(config)
39 | exitWithError(err)
40 |
41 | exitWithError(mngr.Push(option))
42 | },
43 | }
44 |
45 | func init() {
46 | pushCmd.Flags().StringP("message", "m", "", "Commit meessage")
47 | pushCmd.Flags().Bool("dry-run", false, "Dry run")
48 | }
49 |
--------------------------------------------------------------------------------
/cmd/put.go:
--------------------------------------------------------------------------------
1 | package cmd
2 |
3 | import (
4 | "os"
5 | "path/filepath"
6 |
7 | "github.com/infuseai/artivc/internal/core"
8 | "github.com/spf13/cobra"
9 | )
10 |
11 | var putCmd = &cobra.Command{
12 | Use: "put [-m ] [@]",
13 | DisableFlagsInUseLine: true,
14 | Short: "Upload data to a repository",
15 | Example: ` # Upload the latest version
16 | avc put ./folder/ /path/to/mydataset
17 |
18 | # Upload the specific version
19 | avc put ./folder/ /path/to/mydataset@v1.0.0`,
20 | Args: cobra.ExactArgs(2),
21 | Run: func(cmd *cobra.Command, args []string) {
22 | baseDir, err := filepath.Abs(args[0])
23 | exitWithError(err)
24 |
25 | repoUrl, ref, err := parseRepoStr(args[1])
26 | exitWithError(err)
27 |
28 | // options
29 | option := core.PushOptions{}
30 | message, err := cmd.Flags().GetString("message")
31 | exitWithError(err)
32 |
33 | if message != "" {
34 | option.Message = &message
35 | }
36 | if ref != "" {
37 | option.Tag = &ref
38 | }
39 |
40 | // Create temp metadata
41 | metadataDir, _ := os.MkdirTemp(os.TempDir(), "*-avc")
42 | defer os.RemoveAll(metadataDir)
43 |
44 | config := core.NewConfig(baseDir, metadataDir, repoUrl)
45 |
46 | // push
47 | mngr, err := core.NewArtifactManager(config)
48 | exitWithError(err)
49 |
50 | exitWithError(mngr.Push(option))
51 | },
52 | }
53 |
54 | func init() {
55 | putCmd.Flags().StringP("message", "m", "", "Commit meessage")
56 | }
57 |
--------------------------------------------------------------------------------
/cmd/root.go:
--------------------------------------------------------------------------------
1 | package cmd
2 |
3 | import (
4 | "os"
5 |
6 | "github.com/infuseai/artivc/internal/log"
7 | "github.com/spf13/cobra"
8 | )
9 |
10 | var debug bool
11 |
12 | // rootCmd represents the base command when called without any subcommands
13 | var rootCmd = &cobra.Command{
14 | Use: "avc",
15 | Short: "ArtiVC is a version control system for large files",
16 | Example: ` # Push data to the repository
17 | cd /path/to/my/data
18 | avc init s3://mybucket/path/to/repo
19 | avc push -m "my first commit"
20 |
21 | # Pull data from the repository
22 | cd /path/to/download
23 | avc init s3://mybucket/path/to/repo
24 | avc pull
25 |
26 | # Download by quick command
27 | avc get -o /path/to/download s3://mybucket/path/to/repo
28 |
29 | # Show command help
30 | avc -h
31 |
32 | For more information, please check https://github.com/infuseai/artivc`,
33 | PersistentPreRun: func(cmd *cobra.Command, args []string) {
34 | log.SetDebug(debug)
35 | },
36 | }
37 |
38 | // Execute adds all child commands to the root command and sets flags appropriately.
39 | // This is called by main.main(). It only needs to happen once to the rootCmd.
40 | func Execute() {
41 | err := rootCmd.Execute()
42 | if err != nil {
43 | os.Exit(1)
44 | }
45 | }
46 |
47 | func init() {
48 | cobra.EnableCommandSorting = false
49 | rootCmd.PersistentFlags().BoolVar(&debug, "debug", false, "enable the debug message")
50 |
51 | rootCmd.SetUsageTemplate(usageTemplate)
52 |
53 | addCommandWithGroup(GROUP_QUICK,
54 | getCmd,
55 | putCmd,
56 | )
57 |
58 | addCommandWithGroup(GROUP_BASIC,
59 | initCommand,
60 | cloneCommand,
61 | configCommand,
62 | statusCommand,
63 | pullCmd,
64 | pushCmd,
65 | tagCommand,
66 | listCommand,
67 | logCommand,
68 | diffCommand,
69 | )
70 |
71 | addCommandWithGroup("",
72 | versionCommand,
73 | docsCommand,
74 | )
75 | }
76 |
77 | func addCommandWithGroup(group string, cmds ...*cobra.Command) {
78 | for _, cmd := range cmds {
79 | cmd.Annotations = map[string]string{
80 | "group": group,
81 | }
82 | }
83 |
84 | rootCmd.AddCommand(cmds...)
85 | }
86 |
87 | var usageTemplate = `{{- /* usage template */ -}}
88 | {{define "command" -}}
89 | {{if (or .IsAvailableCommand (eq .Name "help"))}}
90 | {{rpad .Name .NamePadding }} {{.Short}}
91 | {{- end -}}
92 | {{- end -}}
93 | {{- /*
94 | Body
95 | */
96 | -}}
97 | Usage:{{if .Runnable}}
98 | {{.UseLine}}{{end}}{{if .HasAvailableSubCommands}}
99 | {{.CommandPath}} [command]{{end}}{{if gt (len .Aliases) 0}}
100 |
101 | Aliases:
102 | {{.NameAndAliases}}{{end}}{{if .HasExample}}
103 |
104 | Examples:
105 | {{.Example}}{{end}}
106 | {{if .HasAvailableSubCommands}}
107 | {{- if not .HasParent}}
108 | Basic Commands:{{range .Commands}}{{if (eq .Annotations.group "basic")}}{{template "command" .}}{{end}}{{end}}
109 |
110 | Quick Commands (Download or upload without a workspace):{{range .Commands}}{{if (eq .Annotations.group "quick")}}{{template "command" .}}{{end}}{{end}}
111 |
112 | Other Commands:{{range .Commands}}{{if not .Annotations.group}}{{template "command" .}}{{end}}{{end}}
113 | {{- else}}
114 | Available Commands:{{range .Commands}}{{if (or .IsAvailableCommand (eq .Name "help"))}}
115 | {{rpad .Name .NamePadding }} {{.Short}}{{end}}{{end}}
116 | {{- end -}}
117 | {{end}}
118 | {{if .HasAvailableLocalFlags}}
119 | Flags:
120 | {{.LocalFlags.FlagUsages | trimTrailingWhitespaces}}{{end}}{{if .HasAvailableInheritedFlags}}
121 |
122 | Global Flags:
123 | {{.InheritedFlags.FlagUsages | trimTrailingWhitespaces}}{{end}}{{if .HasHelpSubCommands}}
124 |
125 | Additional help topics:{{range .Commands}}{{if .IsAdditionalHelpTopicCommand}}
126 | {{rpad .CommandPath .CommandPathPadding}} {{.Short}}{{end}}{{end}}{{end}}{{if .HasAvailableSubCommands}}
127 |
128 | Use "{{.CommandPath}} [command] --help" for more information about a command.{{end}}
129 | `
130 |
--------------------------------------------------------------------------------
/cmd/status.go:
--------------------------------------------------------------------------------
1 | package cmd
2 |
3 | import (
4 | "fmt"
5 |
6 | "github.com/infuseai/artivc/internal/core"
7 | "github.com/spf13/cobra"
8 | )
9 |
10 | var statusCommand = &cobra.Command{
11 | Use: "status",
12 | Short: "Show the status of the workspace",
13 | DisableFlagsInUseLine: true,
14 | Example: ` # check current status
15 | avc status`,
16 | Args: cobra.NoArgs,
17 | Run: func(cmd *cobra.Command, args []string) {
18 | config, err := core.LoadConfig("")
19 | exitWithError(err)
20 |
21 | mngr, err := core.NewArtifactManager(config)
22 | exitWithError(err)
23 |
24 | exitWithError(mngr.Fetch())
25 |
26 | fmt.Printf("workspace of the repository '%s'\n\n", config.RepoUrl())
27 |
28 | result, err := mngr.Status()
29 | exitWithError(err)
30 |
31 | result.Print(true)
32 | },
33 | }
34 |
35 | func init() {
36 | }
37 |
--------------------------------------------------------------------------------
/cmd/tag.go:
--------------------------------------------------------------------------------
1 | package cmd
2 |
3 | import (
4 | "github.com/infuseai/artivc/internal/core"
5 | "github.com/spf13/cobra"
6 | )
7 |
8 | var tagCommand = &cobra.Command{
9 | Use: "tag [--delete ] []",
10 | DisableFlagsInUseLine: true,
11 | Short: "List or manage tags",
12 | Example: ` # List the tags
13 | avc tag
14 |
15 | # Tag the lastest commit
16 | avc tag v1.0.0
17 |
18 | # Tag the specific commit
19 | avc tag --ref a1b2c3d4 v1.0.0
20 |
21 | # Delete a tags
22 | avc tag --delete v1.0.0`,
23 | Args: cobra.RangeArgs(0, 2),
24 | Run: func(cmd *cobra.Command, args []string) {
25 | config, err := core.LoadConfig("")
26 | exitWithError(err)
27 |
28 | mngr, err := core.NewArtifactManager(config)
29 | exitWithError(err)
30 |
31 | if len(args) == 0 {
32 | exitWithError(mngr.ListTags())
33 | } else if len(args) == 1 {
34 | tag := args[0]
35 | refOrCommit, err := cmd.Flags().GetString("ref")
36 | exitWithError(err)
37 | delete, err := cmd.Flags().GetBool("delete")
38 | exitWithError(err)
39 |
40 | if !delete {
41 | exitWithError(mngr.AddTag(refOrCommit, tag))
42 | } else {
43 | exitWithError(mngr.DeleteTag(tag))
44 | }
45 | } else {
46 | exitWithFormat("requires 0 or 1 argument\n")
47 | }
48 | },
49 | }
50 |
51 | func init() {
52 | tagCommand.Flags().BoolP("delete", "D", false, "Delete a tag")
53 | tagCommand.Flags().String("ref", core.RefLatest, "The source commit or reference to be tagged")
54 | }
55 |
--------------------------------------------------------------------------------
/cmd/util_test.go:
--------------------------------------------------------------------------------
1 | package cmd
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/stretchr/testify/assert"
7 | )
8 |
9 | func TestTransformRepoUrl(t *testing.T) {
10 | baseDir := "/tmp/artivc"
11 | testCases := []struct {
12 | desc string
13 | in string
14 | out string
15 | }{
16 | {desc: "local file", in: "/this/is/my/path", out: "/this/is/my/path"},
17 | {desc: "relative path", in: "../path", out: "/tmp/path"},
18 | {desc: "relative path2", in: "../../../path", out: "/path"},
19 | {desc: "normal url (file)", in: "file://mybucket/this/is/my/path", out: "file://mybucket/this/is/my/path"},
20 | {desc: "normal url (s3)", in: "s3://mybucket/this/is/my/path", out: "s3://mybucket/this/is/my/path"},
21 | }
22 |
23 | for _, tC := range testCases {
24 | t.Run(tC.desc, func(t *testing.T) {
25 | result, err := transformRepoUrl(baseDir, tC.in)
26 | if err != nil {
27 | assert.Empty(t, tC.out)
28 | }
29 | assert.Equal(t, tC.out, result)
30 | })
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/cmd/utils.go:
--------------------------------------------------------------------------------
1 | package cmd
2 |
3 | import (
4 | "errors"
5 | "fmt"
6 | "io"
7 | neturl "net/url"
8 | "os"
9 | "path/filepath"
10 | "strings"
11 |
12 | "github.com/spf13/cobra"
13 | )
14 |
15 | const (
16 | GROUP_BASIC = "basic"
17 | GROUP_QUICK = "quick"
18 | )
19 |
20 | func exitWithError(err error) {
21 | cobra.CheckErr(err)
22 | }
23 |
24 | func exitWithFormat(format string, a ...interface{}) {
25 | cobra.CheckErr(fmt.Sprintf(format, a...))
26 | }
27 |
28 | func parseRepoStr(repoAndRef string) (repoUrl string, ref string, err error) {
29 | comps := strings.Split(repoAndRef, "@")
30 | if len(comps) == 1 {
31 | repoUrl = repoAndRef
32 | } else if len(comps) == 2 {
33 | repoUrl = comps[0]
34 | ref = comps[1]
35 | } else {
36 | err = errors.New("Invalid repository: " + repoAndRef)
37 | }
38 | return
39 | }
40 |
41 | func transformRepoUrl(base string, repo string) (string, error) {
42 | url, err := neturl.Parse(repo)
43 | if err != nil {
44 | return "", err
45 | }
46 |
47 | if url.Scheme != "" {
48 | return repo, nil
49 | }
50 |
51 | if strings.HasPrefix(repo, "/") {
52 | return repo, nil
53 | }
54 |
55 | return filepath.Abs(filepath.Join(base, url.Path))
56 | }
57 |
58 | func isDirEmpty(dir string) bool {
59 | f, err := os.Open(dir)
60 | if err != nil {
61 | return false
62 | }
63 | defer f.Close()
64 |
65 | _, err = f.Readdirnames(1)
66 | return err == io.EOF
67 | }
68 |
--------------------------------------------------------------------------------
/cmd/version.go:
--------------------------------------------------------------------------------
1 | package cmd
2 |
3 | import (
4 | "encoding/json"
5 | "fmt"
6 | "runtime"
7 |
8 | "github.com/spf13/cobra"
9 | )
10 |
11 | var (
12 | version = "v0.1-dev"
13 |
14 | // overwrite version when tagVersion exists
15 | tagVersion = ""
16 |
17 | // gitCommit is the git sha1
18 | gitCommit = ""
19 |
20 | // gitTreeState is the state of the git tree {dirty or clean}
21 | gitTreeState = ""
22 | )
23 |
24 | type BuildInfo struct {
25 | Version string
26 | GitCommit string
27 | GitTreeState string
28 | GoVersion string
29 | }
30 |
31 | func GetVersion() string {
32 | info := BuildInfo{
33 | Version: version,
34 | GitCommit: gitCommit,
35 | GitTreeState: gitTreeState,
36 | GoVersion: runtime.Version(),
37 | }
38 |
39 | if tagVersion != "" {
40 | info.Version = tagVersion
41 | }
42 |
43 | data, _ := json.Marshal(info)
44 | return fmt.Sprintf("version.BuildInfo%s", string(data))
45 | }
46 |
47 | var versionCommand = &cobra.Command{
48 | Use: "version",
49 | Short: "Print the version information",
50 | Long: `Print the version information. For example:
51 |
52 | avc version
53 | version.BuildInfo{"Version":"v0.1-dev","GitCommit":"59b5c650fbed4d91c1e54b7cb3c3f6f0c50e5fa4","GitTreeState":"dirty","GoVersion":"go1.17.5"}
54 | `,
55 | Run: func(cmd *cobra.Command, args []string) {
56 | fmt.Println(GetVersion())
57 | },
58 | }
59 |
--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | themes
2 | resources
3 | public
4 | .hugo_build.lock
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | THEME_VERSION := v0.27.4
2 | THEME := hugo-geekdoc
3 | BASEDIR := .
4 | THEMEDIR := $(BASEDIR)/themes
5 |
6 | build: doc-assets doc-build
7 |
8 | start: doc-assets
9 | hugo server -D
10 |
11 | doc-assets:
12 | mkdir -p $(THEMEDIR)/$(THEME)/ ; \
13 | curl -sSL "https://github.com/thegeeklab/$(THEME)/releases/download/${THEME_VERSION}/$(THEME).tar.gz" | tar -xz -C $(THEMEDIR)/$(THEME)/ --strip-components=1
14 |
15 |
16 | doc-commands:
17 | rm -rf content/en/commands/avc.md
18 | rm -rf content/en/commands/avc_*.md
19 | go run ../main.go docs
20 | cp -R ./generated_docs/ content/en/commands/
21 | rm -rf ./generated_docs/
22 |
23 | doc-build:
24 | cd $(BASEDIR); hugo
25 |
26 | clean:
27 | rm -rf $(THEMEDIR) && \
28 | rm -rf $(BASEDIR)/public
29 |
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | The root of document site. The doc site is generated by [hugo](https://gohugo.io/)
2 |
3 | ## Prepare Environment
4 | Install [hugo](https://gohugo.io/getting-started/quick-start/)
5 |
6 | ## Run dev server
7 |
8 | ```
9 | make start
10 | ```
11 |
12 | ## Build doc site
13 |
14 | ```
15 | make build
16 | ```
--------------------------------------------------------------------------------
/docs/assets/ArtiVC_workspace.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/assets/art-overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/assets/art-overview.png
--------------------------------------------------------------------------------
/docs/assets/cheatsheet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/assets/cheatsheet.png
--------------------------------------------------------------------------------
/docs/config/_default/config.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | baseURL: https://artivc.io/
3 | title: ArtiVC
4 | theme: hugo-geekdoc
5 |
6 | pygmentsUseClasses: true
7 | pygmentsCodeFences: true
8 | timeout: 180000
9 | pluralizeListTitles: false
10 |
11 | defaultContentLanguage: en
12 |
13 | disablePathToLower: true
14 | enableGitInfo: true
15 |
16 | enableRobotsTXT: true
17 |
18 | markup:
19 | goldmark:
20 | renderer:
21 | unsafe: true
22 | tableOfContents:
23 | startLevel: 1
24 | endLevel: 9
25 |
26 | taxonomies:
27 | tag: tags
28 |
29 | outputs:
30 | home:
31 | - HTML
32 | page:
33 | - HTML
34 | section:
35 | - HTML
36 | taxonomy:
37 | - HTML
38 | term:
39 | - HTML
40 |
--------------------------------------------------------------------------------
/docs/config/_default/languages.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | en:
3 | languageName: "English"
4 | contentDir: "content/en"
5 | weight: 10
6 |
--------------------------------------------------------------------------------
/docs/config/_default/params.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | description: >
3 | ArtiVC (Artifact Version Control) is a version control system for large files.
4 | images:
5 | - "ArtiVC_workspace.png"
6 |
7 | geekdocToC: 3
8 | geekdocTagsToMenu: true
9 |
10 | geekdocRepo: https://github.com/InfuseAI/ArtiVC
11 | geekdocEditPath: edit/main/docs
12 |
13 | geekdocSearch: true
14 | geekdocSearchShowParent: true
15 |
16 | geekdocLegalNotice: https://thegeeklab.de/legal-notice/#contact-information
17 | geekdocPrivacyPolicy: https://thegeeklab.de/legal-notice/#privacy-policy
18 |
19 | geekdocImageLazyLoading: true
20 | geekdocDarkModeDim: true
21 |
--------------------------------------------------------------------------------
/docs/content/en/_includes/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | GeekdocHidden: true
3 | ---
4 |
--------------------------------------------------------------------------------
/docs/content/en/_includes/include-page.md:
--------------------------------------------------------------------------------
1 | _**Example page include**_
2 |
3 | {{< hint info >}}
4 | **Example Shortcode**\
5 | Shortcode used in an include page.
6 | {{< /hint >}}
7 |
8 | | Head 1 | Head 2 | Head 3 |
9 | | ------ | ------ | ------ |
10 | | 1 | 2 | 3 |
11 |
--------------------------------------------------------------------------------
/docs/content/en/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title:
3 | geekdocNav: false
4 | geekdocBreadcrumb: false
5 | geekdocAlign: center
6 | geekdocAnchor: false
7 | ---
8 |
9 | {{< columns >}}
10 |
11 |
12 | ArtiVC (Arti fact V ersion C ontrol) is a handy command-line tool for data versioning on cloud storage. With only one command, it helps you neatly snapshot your data and switch data between versions. Even better, it seamlessly integrates your existing cloud environment. ArtiVC supports three major cloud providers (AWS S3, Google Cloud Storage, Azure Blob Storage) and the remote filesystem using SSH.
13 |
14 |
15 |
16 |
17 | Getting Started
18 |
19 |
20 |
21 | <--->
22 | [](https://asciinema.org/a/6JEhzpJ5QMiSkiC74s5CyT257?autoplay=1)
23 | {{< /columns >}}
24 |
25 |
26 | # Feature Overview
27 |
28 | {{< columns >}}
29 | ## Data Versioning
30 |
31 | Version your data like versioning code. ArtiVC supports commit history, commit message, and version tag. You can diff two commits, and pull data from the specific version.
32 |
33 | <--->
34 |
35 | ## Use your own storage
36 |
37 | We are used to putting large files in NFS or S3. To use ArtiVC, you can keep putting your files on the same storage without changes.
38 |
39 | <--->
40 |
41 | ## No additional server is required
42 |
43 | ArtiVC is a CLI tool. No server or gateway is required to install and operate.
44 |
45 | {{< /columns >}}
46 |
47 | {{< columns >}}
48 |
49 | ## Multiple backends support
50 |
51 | ArtiVC natively supports local filesystem, remote filesystem (by SSH), AWS S3, Google Cloud Storage, and Azure Blob Storage as backend. And 40+ backends are supported through [Rclone](backends/rclone/) integration. [Learn more](backends/)
52 |
53 | <--->
54 |
55 | ## Painless Configuration
56 |
57 | No one like to configure. So we leverage the original configuration as much as possible. Use `.ssh/config` for ssh access, and use `aws configure`, `gcloud auth application-default login`, `az login` for the cloud platforms.
58 |
59 | <--->
60 |
61 | ## Efficient storage and transfer
62 |
63 | The file structure of the repository is stored and transferred efficiently by [design](design/how-it-works/). It prevents storing duplicated content and minimum the number of files to upload when pushing a new version. [Learn more](design/benchmark/)
64 |
65 |
66 | {{< /columns >}}
67 |
--------------------------------------------------------------------------------
/docs/content/en/backends/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Backends
3 | weight: 2
4 | ---
5 |
6 |
7 | | Backend | Repository URL | Doc |
8 | | --- | --- | --- |
9 | | Local Filesystem | `/path/to/data` | [{{< icon "gdoc_link" >}}](local) |
10 | | Remote Filesystem (SSH) | `:path/to/data` | [{{< icon "gdoc_link" >}}](ssh) |
11 | | AWS S3 [{{< icon "gdoc_language" >}}](https://aws.amazon.com/s3/) | `s3:///path/to/data` | [{{< icon "gdoc_link" >}}](s3) |
12 | | Google Cloud Storage [{{< icon "gdoc_language" >}}](https://cloud.google.com/storage) | `gs:///path/to/data` | [{{< icon "gdoc_link" >}}](gcs) |
13 | | Azure Blob Storage [{{< icon "gdoc_language" >}}](https://azure.microsoft.com/services/storage/blobs/) | `https://.blob.core.windows.net//path/to/data` | [{{< icon "gdoc_link" >}}](azureblob) |
14 | | Rclone [{{< icon "gdoc_language" >}}](https://rclone.org/) | `rclone:///path/to/data` | [{{< icon "gdoc_link" >}}](rclone) |
15 |
16 |
--------------------------------------------------------------------------------
/docs/content/en/backends/azureblob.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Azure Blob Storage
3 | weight: 13
4 | ---
5 |
6 | {{< toc >}}
7 |
8 | Use [Azure Blob Storage](https://azure.microsoft.com/services/storage/blobs/) as the repository backend.
9 |
10 | ## Configuration
11 |
12 | Before using the backend, you have to setup the credential. There are two methods to configure.
13 |
14 | - **Use Azure CLI to login:** Suitable for development environment.
15 | - **Use environment variables:** Suitable for production or CI environment
16 |
17 |
18 | {{< hint warning >}}
19 | **Assign the Permission**\
20 | The logged-in account requires **Storage Blob Data Contributor** role to the storage account. Assign it in the **Azure Portal**
21 |
22 | *Storage Accounts* > *my account* > *Access Control (IAM)* > *Role assignments*
23 |
24 | For more information, please see https://docs.microsoft.com/azure/storage/blobs/assign-azure-role-data-access
25 | {{< /hint >}}
26 |
27 | The azure blob storage backend authenticates by a default procedure defined by [Azure SDK for Go](https://docs.microsoft.com/azure/developer/go/azure-sdk-authentication)
28 |
29 | ### Use Azure CLI to login
30 |
31 | This backend supports to use [Azure CLI](https://docs.microsoft.com/cli/azure/install-azure-cli) to configure the login account. It will open the browser and start the login process.
32 |
33 | ```
34 | az login
35 | ```
36 |
37 | It also supports other login options provided by az login, such as
38 |
39 | ```
40 | az login --service-principal -u -p -t
41 | ```
42 |
43 | ### Use Environment Variables
44 |
45 | - Service principal with a secret
46 |
47 | | Name | Description
48 | | --- | --- |
49 | AZURE_TENANT_ID | ID of the application's Azure AD tenant
50 | AZURE_CLIENT_ID | Application ID of an Azure service principal
51 | AZURE_CLIENT_SECRET | Password of the Azure service principal
52 |
53 | - Service principal with certificate
54 |
55 | | Name | Description
56 | | --- | --- |
57 | AZURE_TENANT_ID | ID of the application's Azure AD tenant
58 | AZURE_CLIENT_ID | ID of an Azure AD application
59 | AZURE_CLIENT_CERTIFICATE_PATH | Path to a certificate file including private key (without password protection)
60 |
61 | - Username and password
62 |
63 | | Name | Description
64 | | --- | --- |
65 | AZURE_CLIENT_ID | ID of an Azure AD application
66 | AZURE_USERNAME | A username (usually an email address)
67 | AZURE_PASSWORD | That user's password
68 |
69 | - Managed identity
70 |
71 | [Managed identities](https://docs.microsoft.com/azure/active-directory/managed-identities-azure-resources/overview) eliminate the need for developers to manage credentials. By connecting to resources that support Azure AD authentication, applications can use Azure AD tokens instead of credentials.
72 |
73 | | Name | Description
74 | | --- | --- |
75 | AZURE_CLIENT_ID | User assigned managed identity client id
76 |
77 | - Storage account key
78 |
79 | | Name | Description
80 | | --- | --- |
81 | AZURE_STORAGE_ACCOUNT_KEY | The access key of the storage account
82 |
83 | ## Usage
84 |
85 | Init a workspace
86 | ```shell
87 | avc init https://mystorageaccount.blob.core.windows.net/mycontainer/path/to/mydataset
88 | ```
89 |
90 | Clone a repository
91 | ```shell
92 | avc clone https://mystorageaccount.blob.core.windows.net/mycontainer/path/to/mydataset
93 | cd mydataset/
94 | ```
95 |
--------------------------------------------------------------------------------
/docs/content/en/backends/gcs.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Google Cloud Storage
3 | weight: 12
4 | ---
5 |
6 | {{< toc >}}
7 |
8 | Use [Google Cloud Storage (GCS)](https://cloud.google.com/storage) as the repository backend.
9 |
10 | Note that Google Cloud Storage is not [Google Drive](https://www.google.com.tw/drive/). They are different google product.
11 |
12 | ## Configuration
13 |
14 | Before using the backend, you have to configure the service account credential. There are three method to configure it.
15 |
16 | 1. Use application default credentials. It is recommended way to use in your development environment.
17 |
18 | ```
19 | gcloud auth application-default login
20 | ```
21 |
22 | It will open the browser and start the login process.
23 |
24 | 1. Use service account credentials. It is recommended way to use in CI, job, or production environment.
25 |
26 | ```
27 | export GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account-credentials.json
28 | ```
29 |
30 | to get this json file, please see the [Passing credential manually](https://cloud.google.com/docs/authentication/production#manually) document
31 |
32 | 1. Use the service account in the GCP resources (e.g. GCE, GKE). It is recommended way if the `ArtiVC` is run in the GCP environment. Please see [default service accounts](https://cloud.google.com/iam/docs/service-accounts#default) document
33 |
34 |
35 | The GCS backend finds credentials by a default procedure defined by [Google Cloud](https://cloud.google.com/docs/authentication/production)
36 |
37 |
38 |
39 | ## Usage
40 |
41 | Init a workspace
42 | ```shell
43 | avc init gs://mybucket/path/to/mydataset
44 | ```
45 |
46 | Clone a repository
47 | ```shell
48 | avc clone gs://mybucket/path/to/mydataset
49 | cd mydataset/
50 | ```
51 |
52 |
53 | ## Environment Variables
54 |
55 | | Name | Description | Default value |
56 | | --- | --- | --- |
57 | | `GOOGLE_APPLICATION_CREDENTIALS` | The location of service account keys in JSON | |
--------------------------------------------------------------------------------
/docs/content/en/backends/local.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Local Filesystem
3 | weight: 1
4 | ---
5 |
6 | Use the local filesystem as the repository backend. The folder can reside in a local disk or a mounted NFS.
7 |
8 | ## Usage
9 | Init a workspace
10 |
11 | ```shell
12 | avc init /path/to/mydataset
13 |
14 | # Or use relative path
15 | # avc init ../mydataset
16 | ```
17 |
18 | Clone a repository
19 |
20 | ```shell
21 | avc clone /path/to/mydataset
22 | cd mydataset/
23 | ```
--------------------------------------------------------------------------------
/docs/content/en/backends/rclone.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Rclone
3 | weight: 50
4 | ---
5 |
6 | Use [Rclone](https://rclone.org/) as the repository backned.
7 |
8 | Rclone is a command-line program to manage files on cloud storage. As a analogy, **Rclone** is the **rsync** for cloud storage. Rclone supports [40+ providers](https://rclone.org/#providers). The Rclone backend is to use **Rclone** command to communicate with the remote backend.
9 |
10 |
11 | ## Configuration
12 |
13 | 1. [Install the Rclone](https://rclone.org/install/)
14 | 1. Configure the Rclone
15 | ```
16 | rclone config
17 | ```
18 | 1. Check if the Rclone commmand can access your remote backend and path.
19 | ```
20 | rclone lsd :
21 | ```
22 |
23 | ## Usage
24 |
25 | Init a workspace
26 |
27 | ```shell
28 | # avc init rclone:///
29 | avc init rclone://myremote/path/to/mydataset
30 | ```
31 |
32 | Clone a repository
33 |
34 | ```shell
35 | avc clone rclone://myremote/path/to/mydataset
36 | cd mydataset/
37 | ```
--------------------------------------------------------------------------------
/docs/content/en/backends/s3.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: AWS S3
3 | weight: 11
4 | ---
5 |
6 | {{< toc >}}
7 |
8 | Use the S3 as the repository backend.
9 |
10 | ## Features
11 |
12 | 1. Support `~/.aws/credentials` as the AWS CLI command
13 | 1. Support [Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/userguide/mpuoverview.html)
14 |
15 | ## Configuration
16 |
17 | 1. Install the [AWS CLI](https://aws.amazon.com/cli/)
18 | 2. Configure the AWS CLI. Please see the [AWS documentation](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html)
19 | ```
20 | aws configure
21 | ```
22 | 3. Check current config
23 | ```
24 | aws configure list
25 | ```
26 |
27 | The S3 backend loads configuration by a default procedure of [AWS SDK for Go](https://aws.github.io/aws-sdk-go-v2/docs/configuring-sdk/#specifying-credentials)
28 |
29 | ## Usage
30 |
31 | Init a workspace
32 | ```shell
33 | avc init s3://mybucket/path/to/mydataset
34 | ```
35 |
36 | Clone a repository
37 | ```shell
38 | avc clone s3://mybucket/path/to/mydataset
39 | cd mydataset/
40 | ```
41 |
42 | ## Environment Variables
43 |
44 | | Name | Description | Default value |
45 | | --- | --- | --- |
46 | | `AWS_ACCESS_KEY_ID` | The access key | |
47 | | `AWS_SECRET_ACCESS_KEY` | The access secret key | |
48 | | `AWS_PROFILE` | The profile to use in the credential file | `default` |
49 | | `AWS_REGION` | The region to use | the region from profile |
50 |
--------------------------------------------------------------------------------
/docs/content/en/backends/ssh.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Remote Filesystem (SSH)
3 | weight: 2
4 | ---
5 |
6 | {{< toc >}}
7 |
8 | Use remote filesystem through SSH as the repository backend.
9 |
10 | ## Features
11 |
12 | - Use the original `~/.ssh/config` without pain
13 | - Support password authentication
14 | - Support public key authentication
15 | - Support SSH agent
16 | - Support SSH proxy by `ProxyCommand` or `ProxyJump`
17 | - Support host key checking through `~/.ssh/known_hosts`
18 | - Concurrent uploading and downloading
19 |
20 | ## Configuration
21 | To use SSH backend, you have to configure `~/.ssh/config`.
22 |
23 | 1. Configure the `~/.ssh/config`
24 | ```
25 | Host myserver
26 | HostName myserver.hosts
27 | User myname
28 | IdentityFile ~/.ssh/id_ed25519
29 | ```
30 |
31 | 1. Check if you can access the SSH server successfully
32 |
33 | ```
34 | ssh myserver
35 | ```
36 |
37 | For more information, please see [ssh config manual](https://man.openbsd.org/ssh_config)
38 |
39 | ## Usage
40 |
41 | Init a workspace
42 |
43 | ```shell
44 | avc init myserver:path/to/mydataset
45 | ```
46 |
47 | Clone a repository
48 |
49 | ```shell
50 | avc clone myserver:path/to/mydataset
51 | cd mydataset/
52 | ```
53 |
54 | ## SSH Proxy
55 |
56 | There are two ways to connect to the destination server through bastion (proxy) server.
57 |
58 | ### ProxyCommand
59 |
60 | 1. Configure `~/.ssh/config`
61 | ```
62 | host bastion
63 | hostname mybastion.hosts
64 | user myname
65 | identityfile ~/id_ed25519
66 |
67 | host myserver
68 | hostname myserver.hosts
69 | user myname
70 | identityfile ~/id_ed25519
71 | ProxyCommand ssh bastion -W %h:%p
72 | ```
73 | 1. Init the workspace
74 | ```
75 | avc init myserver:path/to/mydataset
76 | ```
77 |
78 | ### ProxyJump
79 |
80 |
81 | 1. Configure `~/.ssh/config`
82 | ```
83 | host bastion
84 | hostname mybastion.hosts
85 | user myname
86 | identityfile ~/id_ed25519
87 |
88 | host myserver
89 | hostname myserver.hosts
90 | user myname
91 | identityfile ~/id_ed25519
92 | ProxyJump bastion
93 | ```
94 |
95 | 1. Init the workspace
96 | ```
97 | avc init myserver:path/to/mydataset
98 | ```
99 |
100 | ## Environment Variables
101 |
102 |
103 | | Name | Description | Default value |
104 | | --- | --- | --- |
105 | | `SSH_USER` | The login user | The current user. |
106 | | `SSH_PASSWORD` | The passowrd to be used for password authentication | |
107 | | `SSH_PORT` | The port of the ssh server | 22 |
108 | | `SSH_IDENTITY_FILE` | The identity file to be used for pubkey authentication | |
109 | | `SSH_KEY_PASSPHRASE` | The passphrase of the identity key | |
110 | | `SSH_STRICT_HOST_KEY_CHECKING` | Set the value to `no` to disable the key checking | |
111 |
112 | ## Supported Directives for SSH config
113 |
114 | - [Port](https://man.openbsd.org/ssh_config#Port)
115 | - [User](https://man.openbsd.org/ssh_config#User)
116 | - [IdentityFile](https://man.openbsd.org/ssh_config#IdentityFile)
117 | - [StrictHostKeyChecking](https://man.openbsd.org/ssh_config#StrictHostKeyChecking)
118 | - [ProxyCommand](https://man.openbsd.org/ssh_config#ProxyCommand)
119 | - [ProxyJump](https://man.openbsd.org/ssh_config#ProxyJump)
120 |
--------------------------------------------------------------------------------
/docs/content/en/commands/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Commands
3 | weight: 99
4 | geekdocCollapseSection: true
5 | ---
6 |
--------------------------------------------------------------------------------
/docs/content/en/commands/avc.md:
--------------------------------------------------------------------------------
1 | ## avc
2 |
3 | ArtiVC is a version control system for large files
4 |
5 | ### Examples
6 |
7 | ```
8 | # Push data to the repository
9 | cd /path/to/my/data
10 | avc init s3://mybucket/path/to/repo
11 | avc push -m "my first commit"
12 |
13 | # Pull data from the repository
14 | cd /path/to/download
15 | avc init s3://mybucket/path/to/repo
16 | avc pull
17 |
18 | # Download by quick command
19 | avc get -o /path/to/download s3://mybucket/path/to/repo
20 |
21 | # Show command help
22 | avc -h
23 |
24 | For more information, please check https://github.com/infuseai/artivc
25 | ```
26 |
27 | ### Options
28 |
29 | ```
30 | -h, --help help for avc
31 | ```
32 |
33 | ### SEE ALSO
34 |
35 | * [avc clone](/commands/avc_clone/) - Clone a workspace
36 | * [avc completion](/commands/avc_completion/) - Generate the autocompletion script for the specified shell
37 | * [avc config](/commands/avc_config/) - Configure the workspace
38 | * [avc diff](/commands/avc_diff/) - Diff workspace/commits/references
39 | * [avc docs](/commands/avc_docs/) - Generate docs
40 | * [avc get](/commands/avc_get/) - Download data from a repository
41 | * [avc init](/commands/avc_init/) - Initiate a workspace
42 | * [avc list](/commands/avc_list/) - List files of a commit
43 | * [avc log](/commands/avc_log/) - Log commits
44 | * [avc pull](/commands/avc_pull/) - Pull data from the repository
45 | * [avc push](/commands/avc_push/) - Push data to the repository
46 | * [avc put](/commands/avc_put/) - Upload data to a repository
47 | * [avc status](/commands/avc_status/) - Show the status of the workspace
48 | * [avc tag](/commands/avc_tag/) - List or manage tags
49 | * [avc version](/commands/avc_version/) - Print the version information
50 |
51 | ###### Auto generated by spf13/cobra on 24-Mar-2022
52 |
--------------------------------------------------------------------------------
/docs/content/en/commands/avc_clone.md:
--------------------------------------------------------------------------------
1 | ## avc clone
2 |
3 | Clone a workspace
4 |
5 | ```
6 | avc clone []
7 | ```
8 |
9 | ### Examples
10 |
11 | ```
12 | # clone a workspace with local repository
13 | avc clone /path/to/mydataset
14 |
15 | # clone a workspace with s3 repository
16 | avc clone s3://mybucket/path/to/mydataset
17 | ```
18 |
19 | ### Options
20 |
21 | ```
22 | -h, --help help for clone
23 | ```
24 |
25 | ### SEE ALSO
26 |
27 | * [avc](/commands/avc/) - ArtiVC is a version control system for large files
28 |
29 | ###### Auto generated by spf13/cobra on 24-Mar-2022
30 |
--------------------------------------------------------------------------------
/docs/content/en/commands/avc_completion.md:
--------------------------------------------------------------------------------
1 | ## avc completion
2 |
3 | Generate the autocompletion script for the specified shell
4 |
5 | ### Synopsis
6 |
7 | Generate the autocompletion script for avc for the specified shell.
8 | See each sub-command's help for details on how to use the generated script.
9 |
10 |
11 | ### Options
12 |
13 | ```
14 | -h, --help help for completion
15 | ```
16 |
17 | ### SEE ALSO
18 |
19 | * [avc](/commands/avc/) - ArtiVC is a version control system for large files
20 | * [avc completion bash](/commands/avc_completion_bash/) - Generate the autocompletion script for bash
21 | * [avc completion fish](/commands/avc_completion_fish/) - Generate the autocompletion script for fish
22 | * [avc completion powershell](/commands/avc_completion_powershell/) - Generate the autocompletion script for powershell
23 | * [avc completion zsh](/commands/avc_completion_zsh/) - Generate the autocompletion script for zsh
24 |
25 | ###### Auto generated by spf13/cobra on 24-Mar-2022
26 |
--------------------------------------------------------------------------------
/docs/content/en/commands/avc_completion_bash.md:
--------------------------------------------------------------------------------
1 | ## avc completion bash
2 |
3 | Generate the autocompletion script for bash
4 |
5 | ### Synopsis
6 |
7 | Generate the autocompletion script for the bash shell.
8 |
9 | This script depends on the 'bash-completion' package.
10 | If it is not installed already, you can install it via your OS's package manager.
11 |
12 | To load completions in your current shell session:
13 |
14 | source <(avc completion bash)
15 |
16 | To load completions for every new session, execute once:
17 |
18 | #### Linux:
19 |
20 | avc completion bash > /etc/bash_completion.d/avc
21 |
22 | #### macOS:
23 |
24 | avc completion bash > /usr/local/etc/bash_completion.d/avc
25 |
26 | You will need to start a new shell for this setup to take effect.
27 |
28 |
29 | ```
30 | avc completion bash
31 | ```
32 |
33 | ### Options
34 |
35 | ```
36 | -h, --help help for bash
37 | --no-descriptions disable completion descriptions
38 | ```
39 |
40 | ### SEE ALSO
41 |
42 | * [avc completion](/commands/avc_completion/) - Generate the autocompletion script for the specified shell
43 |
44 | ###### Auto generated by spf13/cobra on 24-Mar-2022
45 |
--------------------------------------------------------------------------------
/docs/content/en/commands/avc_completion_fish.md:
--------------------------------------------------------------------------------
1 | ## avc completion fish
2 |
3 | Generate the autocompletion script for fish
4 |
5 | ### Synopsis
6 |
7 | Generate the autocompletion script for the fish shell.
8 |
9 | To load completions in your current shell session:
10 |
11 | avc completion fish | source
12 |
13 | To load completions for every new session, execute once:
14 |
15 | avc completion fish > ~/.config/fish/completions/avc.fish
16 |
17 | You will need to start a new shell for this setup to take effect.
18 |
19 |
20 | ```
21 | avc completion fish [flags]
22 | ```
23 |
24 | ### Options
25 |
26 | ```
27 | -h, --help help for fish
28 | --no-descriptions disable completion descriptions
29 | ```
30 |
31 | ### SEE ALSO
32 |
33 | * [avc completion](/commands/avc_completion/) - Generate the autocompletion script for the specified shell
34 |
35 | ###### Auto generated by spf13/cobra on 24-Mar-2022
36 |
--------------------------------------------------------------------------------
/docs/content/en/commands/avc_completion_powershell.md:
--------------------------------------------------------------------------------
1 | ## avc completion powershell
2 |
3 | Generate the autocompletion script for powershell
4 |
5 | ### Synopsis
6 |
7 | Generate the autocompletion script for powershell.
8 |
9 | To load completions in your current shell session:
10 |
11 | avc completion powershell | Out-String | Invoke-Expression
12 |
13 | To load completions for every new session, add the output of the above command
14 | to your powershell profile.
15 |
16 |
17 | ```
18 | avc completion powershell [flags]
19 | ```
20 |
21 | ### Options
22 |
23 | ```
24 | -h, --help help for powershell
25 | --no-descriptions disable completion descriptions
26 | ```
27 |
28 | ### SEE ALSO
29 |
30 | * [avc completion](/commands/avc_completion/) - Generate the autocompletion script for the specified shell
31 |
32 | ###### Auto generated by spf13/cobra on 24-Mar-2022
33 |
--------------------------------------------------------------------------------
/docs/content/en/commands/avc_completion_zsh.md:
--------------------------------------------------------------------------------
1 | ## avc completion zsh
2 |
3 | Generate the autocompletion script for zsh
4 |
5 | ### Synopsis
6 |
7 | Generate the autocompletion script for the zsh shell.
8 |
9 | If shell completion is not already enabled in your environment you will need
10 | to enable it. You can execute the following once:
11 |
12 | echo "autoload -U compinit; compinit" >> ~/.zshrc
13 |
14 | To load completions for every new session, execute once:
15 |
16 | #### Linux:
17 |
18 | avc completion zsh > "${fpath[1]}/_avc"
19 |
20 | #### macOS:
21 |
22 | avc completion zsh > /usr/local/share/zsh/site-functions/_avc
23 |
24 | You will need to start a new shell for this setup to take effect.
25 |
26 |
27 | ```
28 | avc completion zsh [flags]
29 | ```
30 |
31 | ### Options
32 |
33 | ```
34 | -h, --help help for zsh
35 | --no-descriptions disable completion descriptions
36 | ```
37 |
38 | ### SEE ALSO
39 |
40 | * [avc completion](/commands/avc_completion/) - Generate the autocompletion script for the specified shell
41 |
42 | ###### Auto generated by spf13/cobra on 24-Mar-2022
43 |
--------------------------------------------------------------------------------
/docs/content/en/commands/avc_config.md:
--------------------------------------------------------------------------------
1 | ## avc config
2 |
3 | Configure the workspace
4 |
5 | ### Synopsis
6 |
7 | Configure the workspace. The config file is stored at ".avc/config".
8 |
9 | ```
10 | avc config [ []]
11 | ```
12 |
13 | ### Examples
14 |
15 | ```
16 | # List the config
17 | avc config
18 |
19 | # Get the config
20 | avc config repo.url
21 |
22 | # Set the config
23 | avc config repo.url s3://your-bucket/data
24 | ```
25 |
26 | ### Options
27 |
28 | ```
29 | -h, --help help for config
30 | ```
31 |
32 | ### SEE ALSO
33 |
34 | * [avc](/commands/avc/) - ArtiVC is a version control system for large files
35 |
36 | ###### Auto generated by spf13/cobra on 24-Mar-2022
37 |
--------------------------------------------------------------------------------
/docs/content/en/commands/avc_diff.md:
--------------------------------------------------------------------------------
1 | ## avc diff
2 |
3 | Diff workspace/commits/references
4 |
5 | ```
6 | avc diff [flags]
7 | ```
8 |
9 | ### Examples
10 |
11 | ```
12 | # Diff two version
13 | avc diff v0.1.0 v0.2.0
14 | ```
15 |
16 | ### Options
17 |
18 | ```
19 | -h, --help help for diff
20 | ```
21 |
22 | ### SEE ALSO
23 |
24 | * [avc](/commands/avc/) - ArtiVC is a version control system for large files
25 |
26 | ###### Auto generated by spf13/cobra on 24-Mar-2022
27 |
--------------------------------------------------------------------------------
/docs/content/en/commands/avc_docs.md:
--------------------------------------------------------------------------------
1 | ## avc docs
2 |
3 | Generate docs
4 |
5 | ### Synopsis
6 |
7 | Generate docs. For example:
8 |
9 | avc docs
10 |
11 | ```
12 | avc docs [flags]
13 | ```
14 |
15 | ### Options
16 |
17 | ```
18 | -h, --help help for docs
19 | ```
20 |
21 | ### SEE ALSO
22 |
23 | * [avc](/commands/avc/) - ArtiVC is a version control system for large files
24 |
25 | ###### Auto generated by spf13/cobra on 24-Mar-2022
26 |
--------------------------------------------------------------------------------
/docs/content/en/commands/avc_get.md:
--------------------------------------------------------------------------------
1 | ## avc get
2 |
3 | Download data from a repository
4 |
5 | ```
6 | avc get [-o ] [@|] [--] ...
7 | ```
8 |
9 | ### Examples
10 |
11 | ```
12 | # Download the latest version. The data go to "mydataset" folder.
13 | avc get s3://bucket/mydataset
14 |
15 | # Download the specific version
16 | avc get s3://mybucket/path/to/mydataset@v1.0.0
17 |
18 | # Download to a specific folder
19 | avc get -o /tmp/mydataset s3://bucket/mydataset
20 |
21 | # Download partial files
22 | avc get -o /tmp/mydataset s3://bucket/mydataset -- path/to/file1 path/to/file2 data/
23 | ```
24 |
25 | ### Options
26 |
27 | ```
28 | --delete Delete extra files which are not listed in commit
29 | -h, --help help for get
30 | -o, --output string Output directory
31 | ```
32 |
33 | ### SEE ALSO
34 |
35 | * [avc](/commands/avc/) - ArtiVC is a version control system for large files
36 |
37 | ###### Auto generated by spf13/cobra on 24-Mar-2022
38 |
--------------------------------------------------------------------------------
/docs/content/en/commands/avc_init.md:
--------------------------------------------------------------------------------
1 | ## avc init
2 |
3 | Initiate a workspace
4 |
5 | ```
6 | avc init
7 | ```
8 |
9 | ### Examples
10 |
11 | ```
12 | # Init a workspace with local repository
13 | avc init /path/to/mydataset
14 |
15 | # Init a workspace with s3 repository
16 | avc init s3://mybucket/path/to/mydataset
17 | ```
18 |
19 | ### Options
20 |
21 | ```
22 | -h, --help help for init
23 | ```
24 |
25 | ### SEE ALSO
26 |
27 | * [avc](/commands/avc/) - ArtiVC is a version control system for large files
28 |
29 | ###### Auto generated by spf13/cobra on 24-Mar-2022
30 |
--------------------------------------------------------------------------------
/docs/content/en/commands/avc_list.md:
--------------------------------------------------------------------------------
1 | ## avc list
2 |
3 | List files of a commit
4 |
5 | ```
6 | avc list [flags]
7 | ```
8 |
9 | ### Examples
10 |
11 | ```
12 | # List files for the latest version
13 | avc list
14 |
15 | # List files for the specific version
16 | avc list v1.0.0
17 | ```
18 |
19 | ### Options
20 |
21 | ```
22 | -h, --help help for list
23 | ```
24 |
25 | ### SEE ALSO
26 |
27 | * [avc](/commands/avc/) - ArtiVC is a version control system for large files
28 |
29 | ###### Auto generated by spf13/cobra on 24-Mar-2022
30 |
--------------------------------------------------------------------------------
/docs/content/en/commands/avc_log.md:
--------------------------------------------------------------------------------
1 | ## avc log
2 |
3 | Log commits
4 |
5 | ```
6 | avc log [|]
7 | ```
8 |
9 | ### Examples
10 |
11 | ```
12 | # Log commits from the latest
13 | avc log
14 |
15 | # Log commits from a specific version
16 | avc log v1.0.0
17 | ```
18 |
19 | ### Options
20 |
21 | ```
22 | -h, --help help for log
23 | ```
24 |
25 | ### SEE ALSO
26 |
27 | * [avc](/commands/avc/) - ArtiVC is a version control system for large files
28 |
29 | ###### Auto generated by spf13/cobra on 24-Mar-2022
30 |
--------------------------------------------------------------------------------
/docs/content/en/commands/avc_pull.md:
--------------------------------------------------------------------------------
1 | ## avc pull
2 |
3 | Pull data from the repository
4 |
5 | ```
6 | avc pull [|] [flags] -- ...
7 | ```
8 |
9 | ### Examples
10 |
11 | ```
12 | # Pull the latest version
13 | avc pull
14 |
15 | # Pull from a specifc version
16 | avc pull v1.0.0
17 |
18 | # Pull partial files
19 | avc pull -- path/to/partia
20 | avc pull v0.1.0 -- path/to/partia ...
21 | ```
22 |
23 | ### Options
24 |
25 | ```
26 | --delete Delete extra files which are not listed in commit
27 | --dry-run Dry run
28 | -h, --help help for pull
29 | ```
30 |
31 | ### SEE ALSO
32 |
33 | * [avc](/commands/avc/) - ArtiVC is a version control system for large files
34 |
35 | ###### Auto generated by spf13/cobra on 24-Mar-2022
36 |
--------------------------------------------------------------------------------
/docs/content/en/commands/avc_push.md:
--------------------------------------------------------------------------------
1 | ## avc push
2 |
3 | Push data to the repository
4 |
5 | ### Synopsis
6 |
7 | Push data to the repository. There is no branch implemented yet, all put and push commands are always creating a commit and treat as the latest commit.
8 |
9 | ```
10 | avc push [-m ]
11 | ```
12 |
13 | ### Examples
14 |
15 | ```
16 | # Push to the latest version
17 | avc push -m 'Initial version'
18 |
19 | # Push to the latest version and tag to specific version
20 | avc push -m 'Initial version'
21 | avc tag v1.0.0
22 | ```
23 |
24 | ### Options
25 |
26 | ```
27 | --dry-run Dry run
28 | -h, --help help for push
29 | -m, --message string Commit meessage
30 | ```
31 |
32 | ### SEE ALSO
33 |
34 | * [avc](/commands/avc/) - ArtiVC is a version control system for large files
35 |
36 | ###### Auto generated by spf13/cobra on 24-Mar-2022
37 |
--------------------------------------------------------------------------------
/docs/content/en/commands/avc_put.md:
--------------------------------------------------------------------------------
1 | ## avc put
2 |
3 | Upload data to a repository
4 |
5 | ```
6 | avc put [-m ] [@]
7 | ```
8 |
9 | ### Examples
10 |
11 | ```
12 | # Upload the latest version
13 | avc put ./folder/ /path/to/mydataset
14 |
15 | # Upload the specific version
16 | avc put ./folder/ /path/to/mydataset@v1.0.0
17 | ```
18 |
19 | ### Options
20 |
21 | ```
22 | -h, --help help for put
23 | -m, --message string Commit meessage
24 | ```
25 |
26 | ### SEE ALSO
27 |
28 | * [avc](/commands/avc/) - ArtiVC is a version control system for large files
29 |
30 | ###### Auto generated by spf13/cobra on 24-Mar-2022
31 |
--------------------------------------------------------------------------------
/docs/content/en/commands/avc_status.md:
--------------------------------------------------------------------------------
1 | ## avc status
2 |
3 | Show the status of the workspace
4 |
5 | ```
6 | avc status
7 | ```
8 |
9 | ### Examples
10 |
11 | ```
12 | # check current status
13 | avc status
14 | ```
15 |
16 | ### Options
17 |
18 | ```
19 | -h, --help help for status
20 | ```
21 |
22 | ### SEE ALSO
23 |
24 | * [avc](/commands/avc/) - ArtiVC is a version control system for large files
25 |
26 | ###### Auto generated by spf13/cobra on 24-Mar-2022
27 |
--------------------------------------------------------------------------------
/docs/content/en/commands/avc_tag.md:
--------------------------------------------------------------------------------
1 | ## avc tag
2 |
3 | List or manage tags
4 |
5 | ```
6 | avc tag [--delete ] []
7 | ```
8 |
9 | ### Examples
10 |
11 | ```
12 | # List the tags
13 | avc tag
14 |
15 | # Tag the lastest commit
16 | avc tag v1.0.0
17 |
18 | # Tag the specific commit
19 | avc tag --ref a1b2c3d4 v1.0.0
20 |
21 | # Delete a tags
22 | avc tag --delete v1.0.0
23 | ```
24 |
25 | ### Options
26 |
27 | ```
28 | -D, --delete Delete a tag
29 | -h, --help help for tag
30 | --ref string The source commit or reference to be tagged (default "latest")
31 | ```
32 |
33 | ### SEE ALSO
34 |
35 | * [avc](/commands/avc/) - ArtiVC is a version control system for large files
36 |
37 | ###### Auto generated by spf13/cobra on 24-Mar-2022
38 |
--------------------------------------------------------------------------------
/docs/content/en/commands/avc_version.md:
--------------------------------------------------------------------------------
1 | ## avc version
2 |
3 | Print the version information
4 |
5 | ### Synopsis
6 |
7 | Print the version information. For example:
8 |
9 | avc version
10 | version.BuildInfo{"Version":"v0.1-dev","GitCommit":"59b5c650fbed4d91c1e54b7cb3c3f6f0c50e5fa4","GitTreeState":"dirty","GoVersion":"go1.17.5"}
11 |
12 |
13 | ```
14 | avc version [flags]
15 | ```
16 |
17 | ### Options
18 |
19 | ```
20 | -h, --help help for version
21 | ```
22 |
23 | ### SEE ALSO
24 |
25 | * [avc](/commands/avc/) - ArtiVC is a version control system for large files
26 |
27 | ###### Auto generated by spf13/cobra on 24-Mar-2022
28 |
--------------------------------------------------------------------------------
/docs/content/en/commands/images/geekdoc-dark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/content/en/commands/images/geekdoc-dark.png
--------------------------------------------------------------------------------
/docs/content/en/design/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Design
3 | weight: 3
4 | ---
5 |
--------------------------------------------------------------------------------
/docs/content/en/design/alternatives.md:
--------------------------------------------------------------------------------
1 | ---
2 | weight: 4
3 | ---
4 |
5 | - **Object storage or NFS:** Create folders or prefixes to add version semantics. It is straightforward to store different versions of data. But it lacks commit message, metadata, and history tracking. And we cannot know which one is the latest version.
6 | - **S3 versioning**: [S3 Versioning](https://docs.aws.amazon.com/AmazonS3/latest/userguide/Versioning.html) can achieve the object-level versioning. We can get the latest, but possible to roll back to the previous version.
7 | - **Git LFS**: [Git LFS](https://git-lfs.github.com/) is an open-source Git extension for versioning large files developed by Github.
8 | - [Github](https://docs.github.com/en/repositories/working-with-files/managing-large-files/about-git-large-file-storage), [Gitlab](https://docs.gitlab.com/ee/topics/git/lfs/index.html), [Huggingface](https://huggingface.co/docs/transformers/model_sharing#repository-features) provides Git LFS feature with limited quota.
9 | - If you want to put data on your own storage, you need to install a [standalone Git LFS server](https://github.com/git-lfs/lfs-test-server)
10 | - **DVC**: [DVC](https://dvc.org/) is built to make ML models shareable and reproducible. It is designed to handle large files, data sets, machine learning models, and metrics as well as code.
11 | - use `git` command to version small files or metadata, use `dvc` to manage large files.
12 | - you need to know both git and dvc. In the workflow, the two commands should switch back and forth. See the [dvc tutorial](https://dvc.org/doc/use-cases/versioning-data-and-model-files/tutorial)
13 | - **LakeFS**: [LakeFS](https://lakefs.io/) provides a multi-server solution to make s3 as git-like repositories
14 | - the [architecture](https://docs.lakefs.io/understand/architecture.html) is much heavier than ArtiVC due to an extra database for metadata storage and S3 configuration. (e.g., S3 gateway)
--------------------------------------------------------------------------------
/docs/content/en/design/benchmark.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Benchmark
3 | weight: 2
4 | ---
5 |
6 | {{}}
7 |
8 | This document shows the performance benchmark of ArtiVC and alternatives. We choose these three tools to compare
9 |
10 | - [AWS CLI](https://aws.amazon.com/cli/) (2.4.18): AWS CLI is the AWS official tool to upload/download and manage s3. We use it as the baseline to compare with other tools.
11 | - [Rclone](https://rclone.org/) (v1.57.0) RClone is a command line program to manage files on cloud storage. It is just like rsync, but for cloud storage.
12 | - [DVC](https://dvc.org/) (v2.9.5): DVC is an open-source tool for data science and machine learning projects. It provides Git-like commands to version large files.
13 |
14 | We upload data to the same bucket and path. The following are the example commands for each tool.
15 |
16 |
17 | {{}}
18 |
19 | {{}}
20 | ```bash
21 | # upload
22 | aws s3 cp --recursive . s3://art-vcs/benchmark/
23 |
24 | # download
25 | aws s3 cp --recursive s3://art-vcs/benchmark/ .
26 | ```
27 | {{< /tab >}}
28 |
29 | {{}}
30 | ```bash
31 | # upload
32 | rclone copy . s3:art-vcs
33 |
34 | # download
35 | rclone copy s3:art-vcs .
36 | ```
37 | {{< /tab >}}
38 |
39 | {{}}
40 | ```bash
41 | # init
42 | git init
43 | dvc init
44 | dvc remote add -d s3 s3://art-vcs/benchmark/
45 |
46 | # upload
47 | dvc add data
48 | dvc push
49 |
50 | # download
51 | dvc pull
52 | ```
53 | {{< /tab >}}
54 |
55 | {{}}
56 | ```bash
57 | # init
58 | avc init s3://art-vcs/benchmark/
59 |
60 | # upload
61 | avc push
62 |
63 | # download
64 | avc pull
65 | ```
66 | {{< /tab >}}
67 |
68 | {{< /tabs >}}
69 |
70 | Testing Environment
71 |
72 | {{}}
73 |
74 | {{}}
75 |
76 | ```shell
77 | Hardware Overview:
78 | Model Name: MacBook Pro
79 | Model Identifier: MacBookPro18,3
80 | Chip: Apple M1 Pro
81 | Total Number of Cores: 10 (8 performance and 2 efficiency)
82 | Memory: 32 GB
83 | ```
84 | {{< /tab >}}
85 |
86 | {{}}
87 |
88 | ```shell
89 | $ speedtest
90 | Speedtest by Ookla
91 |
92 | Server: Chunghwa Mobile - Hsinchu (id = 18450)
93 | ISP: Chunghwa Telecom
94 | Latency: 5.25 ms (0.59 ms jitter)
95 | Download: 224.54 Mbps (data used: 198.9 MB)
96 | Upload: 79.58 Mbps (data used: 136.3 MB)
97 | Packet Loss: Not available.
98 | Result URL: https://www.speedtest.net/result/c/4dd6b2ec-61d1-4b7b-a179-811951412997
99 | ```
100 | {{< /tab >}}
101 |
102 | {{}}
103 | The region is `ap-northeast-1`
104 | {{< /tab >}}
105 |
106 | {{< /tabs >}}
107 |
108 | # Test Plans
109 |
110 | We test these three scenarios
111 |
112 | 1. **[Transfer large size files](#transfer-large-size-files)**
113 | 2. **[Transfer large amount of files](#transfer-large-amount-of-files)**
114 | 3. **[Determining which files to upload and download](#determining-which-files-to-upload)**
115 |
116 | ## Transfer large size files
117 |
118 | In this test case, we create 10 x 100mb files under `data/` folder. S3 supports [multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/userguide/mpuoverview.html). If the client supports multipart upload and supports parallel upload and download, the result should be similar. We also try to transfer twice, the second trial is to test if the tool can know there is existing content at the same location.
119 |
120 | Prepare Data
121 | ```bash
122 | # create 10 x 100mb files in data/
123 | seq 10 | xargs -I {} dd if=/dev/urandom of=data/100m_{} bs=100m count=1
124 | ```
125 |
126 | Commands
127 | {{}}
128 | {{}}
129 |
130 | ```bash
131 | # upload
132 | time aws s3 cp --recursive . s3://art-vcs/benchmark/
133 |
134 | # download
135 | time aws s3 cp --recursive s3://art-vcs/benchmark/ .
136 | ```
137 | {{ }}
138 |
139 | {{}}
140 |
141 | ```bash
142 | # upload
143 | time rclone copy --progress . s3:art-vcs/benchmark/
144 |
145 | # download
146 | time rclone copy --progress s3:art-vcs/benchmark/ .
147 | ```
148 | {{ }}
149 |
150 | {{}}
151 |
152 |
153 | ```bash
154 | # init
155 | git init
156 | dvc init
157 | dvc remote add -d s3 s3://art-vcs/benchmark/
158 |
159 | # track data and compute hash
160 | dvc add data
161 |
162 | # upload
163 | time dvc push
164 |
165 | # download
166 | rm -rf .dvc/cache
167 | time dvc pull
168 | ```
169 | {{ }}
170 |
171 | {{}}
172 |
173 |
174 | ```bash
175 | # init
176 | avc init s3://art-vcs/benchmark/large-files/artivc/
177 |
178 | # upload
179 | time avc push
180 |
181 | # download
182 | time avc pull
183 | ```
184 | {{ }}
185 | {{ }}
186 |
187 | Result
188 |
189 | 
190 |
191 | | | Upload (1st / 2nd)| Download (1st / 2nd) |
192 | | --- | --- | --- |
193 | | AWS CLI | 1m43s / 1m43s | 1m53s / 1m52s |
194 | | Rclone | 1m59s / 2.6s | 2m22s / 2.6s |
195 | | DVC | 1m44s / 1s | 1m27s / <1s |
196 | | ArtiVC | 1m43s / <1s | 1m30s / <1s |
197 |
198 |
199 | For the first trial of download, all four tools have a similar result. But for the second trial, all tools except AWS CLI would check the content hash from local to remote. ArtiVC has the best performance because the file list and hashes are stored in ONE commit object. That is, there is only one API request required for this test case.
200 |
201 | ## Transfer large amount of files
202 |
203 | In this test case, we create 1,000 x 1kb files under `data/` folder. It is quite common for a dataset to contain many small files. Because the file size is small, if the tool can reduce the number of API requests, it would get better performance.
204 |
205 | Prepare data
206 |
207 | ```bash
208 | # create 1000 files with 1k size
209 | seq 1000 | xargs -I {} dd if=/dev/urandom of=data/1k_{} bs=1k count=1
210 | ```
211 |
212 |
213 | Commands
214 | {{}}
215 |
216 | {{}}
217 |
218 | ```bash
219 | # upload
220 | time aws s3 cp --recursive . s3://art-vcs/benchmark/
221 |
222 | # download
223 | time aws s3 cp --recursive s3://art-vcs/benchmark/ .
224 | ```
225 | {{ }}
226 |
227 | {{}}
228 |
229 | ```bash
230 | # upload
231 | time rclone copy --progress . s3:art-vcs/benchmark/
232 |
233 | # download
234 | time rclone copy --progress s3:art-vcs/benchmark/ .
235 | ```
236 | {{ }}
237 |
238 | {{}}
239 |
240 | ```bash
241 | # init
242 | git init
243 | dvc init
244 | dvc remote add -d s3 s3://art-vcs/benchmark/
245 |
246 | # track data and compute hash
247 | dvc add data
248 |
249 | # upload
250 | time dvc push
251 |
252 | # download
253 | rm -rf .dvc/cache
254 | time dvc pull
255 | ```
256 | {{ }}
257 |
258 | {{}}
259 |
260 | ```bash
261 | # init
262 | avc init s3://art-vcs/benchmark/
263 |
264 | # upload
265 | time avc push
266 |
267 | # download
268 | time avc pull
269 | ```
270 | {{ }}
271 | {{ }}
272 |
273 | Result
274 |
275 | 
276 |
277 | | | Upload (1st / 2nd) | Download (1st / 2nd) |
278 | | --- | --- | --- |
279 | | aws cli | 16s / 16s | 10s / 10s |
280 | | rclone | 51s / 12s | 20s / 12s |
281 | | dvc | 20s / 1s | 18s / <1s |
282 | | artivc | 12s / <1s | 8s / <1s |
283 |
284 |
285 | For the first trial, RClone gets the worst performance because it own have four threads (transfers) by default. We can use `--transfers 10` option to increase the transfer speed. ArtiVC has the best performance for the first trial because there is an optimization for the first push if there is no commit in the remote repository. It will upload all the files without a content check.
286 |
287 | For the second trial, ArtiVC and DVC have an efficient way to know no transfer is required. RClone would check all the content hash one by one.
288 |
289 | ## Determining which files to upload
290 |
291 | To transfer files is costly. If we can know the files are existing in local or remote in advance, we can prevent unnecessary transfer. This test is to understand how much time to take for difference checking. This test is inspired by the article [Cloud Data Sync Methods and Benchmark: DVC vs Rclone](https://dvc.org/blog/dvc-vs-rclone).
292 |
293 | To compare the local and remote files, there are two methods
294 |
295 | 1. Check the content hash with remote for each local file. The method would get better performance if there are few files in local and a lot of files in the remote.
296 | 2. List the remote files and compare the remote hash with local files. The performance of the method is linearly related to the amount of data on the remote.
297 |
298 | There are three scenarios to test
299 |
300 | 1. 50000 local files and no remote files
301 | 1. 500 local files and 50000 remote files
302 | 1. 1 local file and 50000 remote files
303 |
304 |
305 | Prepare data
306 |
307 | ```bash
308 | # create 50,000 files with 1k size
309 | seq 50000 | xargs -I {} dd if=/dev/urandom of=data/1k_{} bs=1k count=1
310 | ```
311 |
312 | Commands
313 | {{}}
314 | {{}}
315 |
316 |
317 | ```bash
318 | time rclone copy --dry-run s3:art-vcs/benchmark/ .
319 | ```
320 | {{ }}
321 | {{}}
322 |
323 |
324 | ```bash
325 | time rclone copy --dry-run —no-traverse s3:art-vcs/benchmark/ .
326 | ```
327 | {{ }}
328 | {{}}
329 |
330 | ```bash
331 | time dvc status -c
332 | ```
333 | {{ }}
334 | {{}}
335 |
336 | ```bash
337 | time avc push --dry-run
338 | ```
339 | {{ }}
340 | {{ }}
341 |
342 | Result
343 |
344 | 
345 | | | local 50000 / remote 0 | local 500 / remote 50000 | local 1 / remote 50000 |
346 | | --- | --- | --- | --- |
347 | | rclone | 3s | 16s | 11s |
348 | | rclone (—no-traverse) | 7m48s | 6.6s | 2s |
349 | | dvc | 3s | 6.6s | 2.2s |
350 | | artivc | 1.1s | <1s | <1s |
351 |
352 |
353 | Just like the [blog post](https://dvc.org/blog/dvc-vs-rclone), DVC uses an adaptive method to query data from remote, but ArtiVC still outperforms DVC, why? The reason is DVC uses a `data.dvc` file holding the content hash of a version of the folder, and the file list and md5 hashes are stored in a `.dir` file. Even though there is the file list stored in the `.dir`, it still cannot guarantee that all the files are available in the remote. DVC still needs to use one of the two methods to synchronize the status between local and remote.
354 |
355 | ArtiVC uses another way. ArtiVC is a centralized version control system and the commit object is stored in this repository. All push commands should guarantee that all files should be successfully uploaded to the repository and then the commit object can be uploaded to the repository. So if we can get the commit object from the repo, we can say that all the files listed in the commit objects are available in the repo. There is no additional need to check the existence one by one.
356 |
357 |
358 | # Conclusions
359 |
360 | From the benchmark, we know ArtiVC has a similar performance as AWS CLI while downloading and uploading the data. Using the commit object, we can easily manipulate the changeset with only one API call, no matter how many objects in local or remote.
--------------------------------------------------------------------------------
/docs/content/en/design/faq.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: FAQ
3 | weight: 2
4 | ---
5 |
6 | ### Q: Why don't we use object storage directly
7 | Yes. we can use object storage to version the artifact using different prefixes. However, it does not support atomic operation for multi-files upload. That is, we cannot know if there is one file missing when uploading, or if there are any files added to the folder. In ArtiVC, a commit is an atomic operation to advance one version.
8 |
9 | ### Q: Why don't we use git
10 | Git is not suitable for [versioning big files](https://stackoverflow.com/questions/17888604/git-with-large-files). Whenever we clone a git repository, it will download ALL files from every branch and commit to the local.
11 |
12 | There are good solutions like [Git LFS](https://git-lfs.github.com/) or [DVC](https://dvc.org/) out there to version large files in the git repository. However, we consider that if we require a dependency of some storage like s3 or NFS, why don't we do the versioning just only on the storage?
13 |
14 | ### Q: Can we do the versioning with multiple datasets or models?
15 | Yes. Just create different repositories with different prefixes in the same s3 bucket or different folders in NFS
16 |
17 | ### Q: What storage does ArtiVC support?
18 | Currently, we can support local, NFS(by local), AWS S3, GCS(Google Cloud Storage) and Azure Blob Storage.
19 |
20 | ### Q: Do we support Windows?
21 | Not yet, we will implement and test it soon.
22 |
--------------------------------------------------------------------------------
/docs/content/en/design/how-it-works.md:
--------------------------------------------------------------------------------
1 | ---
2 | weight: 1
3 | ---
4 | There are three major types of data are stored in the artifact repository
5 | - **Blob**: the actual object to be uploaded to object storage. The path is determined by the **SHA1** hash of the content. We use the same path strategy as [git](https://git-scm.com/book/zh-tw/v2/Git-Internals-Git-Objects).
6 | - **Commit**: whenever the client pushes a commit to the repository, it creates a commit object to the store. It contains the timestamp, message, and the list of blobs. A commit is also stored at the path of the content hash. It makes it impossible to change the content because the hash would be invalid. The content of a commit is a gzip-compressed JSON content.
7 | - **Reference**: References are the named tags or time strings to link to a commit. It makes it possible to do the versioning. A special kind of reference `latest` is used by default whenever the client pushes a commit to an artifact repository.
8 |
9 | 
10 |
11 |
12 |
13 | The sample file layout in the artifact repository
14 | ```
15 |
16 | ├── commits
17 | │ ├── 428f898a6792f37cf6805776098387783fdcaa87
18 | │ └── b8d996e998197f8be65f0a0e6ceaa2c7b26bfd11
19 | ├── objects
20 | │ ├── 03
21 | │ │ └── cfd743661f07975fa2f1220c5194cbaff48451
22 | │ ├── 23
23 | │ │ └── 715d393e1b723b249642a586597426885f64d1
24 | │ ├── 2b
25 | │ │ └── 77b0d6a5f3bd047fb2c6e44bbb22822f773f94
26 | │ ├── 32
27 | │ │ └── e2eb1b2f4b757fcc3e112d57ca702c65dad526
28 | │ ├── 6d
29 | │ │ └── 7cb1f44cb598a0db93f912791a1c20e3bd7c6b
30 | │ ├── 84
31 | │ │ └── 44869206da2a25b8ee79a8959ed4b34144f2a8
32 | │ ├── ac
33 | │ │ └── 9bcb803e59cd45610d87f2b683319420e7f76b
34 | │ └── d7
35 | │ └── 153b344c84ae187e2a894688310f1956dc45b7
36 | └── refs
37 | ├── latest
38 | └── tags
39 | ├── v1.0.0
40 | └── v1.0.1
41 | ```
42 |
43 | The sample content of a commit
44 | ```
45 | {
46 | "createdAt": "2022-02-06T00:34:45.406454+08:00",
47 | "messaage": null,
48 | "blobs": [
49 | {
50 | "path": "README.md",
51 | "hash": "32e2eb1b2f4b757fcc3e112d57ca702c65dad526",
52 | "mode": 420,
53 | "size": 6148
54 | },
55 | {
56 | "path": "front.png",
57 | "hash": "ac9bcb803e59cd45610d87f2b683319420e7f76b",
58 | "mode": 420,
59 | "size": 6130505
60 | },
61 | {
62 | "path": "back.png",
63 | "hash": "d7153b344c84ae187e2a894688310f1956dc45b7",
64 | "mode": 420,
65 | "size": 696686
66 | },
67 | {
68 | "path": "tmp",
69 | "hash": "03cfd743661f07975fa2f1220c5194cbaff48451",
70 | "mode": 420,
71 | "size": 4
72 | },
73 | {
74 | "path": "screen.png",
75 | "hash": "6d7cb1f44cb598a0db93f912791a1c20e3bd7c6b",
76 | "mode": 420,
77 | "size": 305686
78 | },
79 | {
80 | "path": "view/1.png",
81 | "hash": "8444869206da2a25b8ee79a8959ed4b34144f2a8",
82 | "mode": 420,
83 | "size": 578961
84 | },
85 | {
86 | "path": "view/2.png",
87 | "hash": "2b77b0d6a5f3bd047fb2c6e44bbb22822f773f94",
88 | "mode": 420,
89 | "size": 214683
90 | }
91 | ]
92 | }
93 | ```
--------------------------------------------------------------------------------
/docs/content/en/design/images/artiv-overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/content/en/design/images/artiv-overview.png
--------------------------------------------------------------------------------
/docs/content/en/design/mlops.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: MLOps
3 | weight: 5
4 | ---
5 |
6 | Machine learning application is a data-driven approach to solving problems. The MLOps is a methodology to has a feedback loop from data, model training, evaluation, model publish, deploy, monitoring. There are three core components in an ML application.
7 |
8 | - Code
9 | - Datasets
10 | - Models
11 |
12 | The code can be training code, application code. Mostly, it is versioned by git and we have been familiar with the way to version it. And we also use git as the single source of truth to drive the whole DevOps lifecycle.
13 |
14 | However, for datasets and models, there is still no defacto solution to version them. Usually, these data are stored in cloud object storage, on-premise object storage like MinIO, or NFS. There is still a gap between data storage and version metadata storage. Here is why we would like to build the *ArtiVC*.
15 |
16 | In addition, we are thinking about how to drive the automation when an artifact store event is triggered. In git, we can trigger a job whenever a git event. In the artifact store, we lack the fundamentals to trigger this event. *ArtiVC* reference the *git* design and provide the *commits* and *references* primitives to make it possible to define a commit or a version that is created. It makes it possible to listen to the object storage or the file system event to trigger an automation job accordingly.
--------------------------------------------------------------------------------
/docs/content/en/posts/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Blogs
3 | type: posts
4 | weight: 10
5 | geekdocHidden: true
6 | ---
7 |
--------------------------------------------------------------------------------
/docs/content/en/posts/initial-release.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: The first post
3 | type: posts
4 | date: 2022-03-15
5 | ---
6 |
7 | Welcome th the ArtiVC document site. The site is built by excellent project [hugo](https://gohugo.io/) and the beautiful theme by [geekdocs](https://geekdocs.de/)
8 |
--------------------------------------------------------------------------------
/docs/content/en/posts/rename-project.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Renamed to ArtiVC
3 | type: posts
4 | date: 2022-3-31
5 | ---
6 |
7 | Today, we are pleased to announce our project name is changed from `ArtiV` to `ArtiVC`. The new website is also published at https://artivc.io.
8 |
--------------------------------------------------------------------------------
/docs/content/en/usage/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Usage
3 | weight: 1
4 | ---
5 |
--------------------------------------------------------------------------------
/docs/content/en/usage/cheatsheet.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Cheatsheet
3 | weight: 9999
4 | ---
5 |
6 | Please see the [command reference](../../commands/avc) for the detail
7 |
8 | 
9 |
--------------------------------------------------------------------------------
/docs/content/en/usage/dryrun.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Dry Run
3 | weight: 11
4 | ---
5 |
6 | Pushing and pulling data is time-consuming. And need to be double-checked before transferring. Dry-run is the feature that allows listing the changeset before sending.
7 |
8 |
9 | ## Push
10 |
11 | 1. Dry run before pushing
12 | ```shell
13 | avc push --dry-run
14 | ```
15 |
16 | 1. Do the actual push
17 | ```
18 | avc push
19 | ```
20 |
21 | ## Pull
22 |
23 | 1. Dry run before pulling
24 | ```shell
25 | avc pull -dry-run
26 | # or check in delete mode
27 | # avc pull --delete -dry-run
28 | ```
29 |
30 | 1. Do the actual pull
31 |
32 | ```shell
33 | avc pull
34 | # avc pull --delete
35 | ```
36 |
37 |
--------------------------------------------------------------------------------
/docs/content/en/usage/expose.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Expose the data
3 | weight: 20
4 | ---
5 |
6 | ArtiVC repository can be exposed as an HTTP endpoint. In S3, we can just make the bucket and give the data consumer the HTTP endpoint of the repository. In this way, we can download data through CDN or other reverse proxies.
7 |
8 | 1. [Make your S3 bucket public](https://aws.amazon.com/premiumsupport/knowledge-center/read-access-objects-s3-bucket/?nc1=h_ls)
9 | 1. Copy the public URL of your repository. For example
10 | ```shell
11 | https://mybucket.s3.ap-northeast-1.amazonaws.com/datasets/flowers-classification
12 | ```
13 | 1. Download the data
14 | ```shell
15 | avc get -o /tmp/output https://mybucket.s3.ap-northeast-1.amazonaws.com/datasets/flowers-classification
16 | ```
17 |
--------------------------------------------------------------------------------
/docs/content/en/usage/getting-started.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Getting Started
3 | weight: 1
4 | ---
5 |
6 |
7 |
8 | {{< toc >}}
9 |
10 |
11 | # Install ArtiVC
12 |
13 | ## Install from Release Page
14 |
15 | Download the latest ArtiVC in the [release](https://github.com/InfuseAI/ArtiVC/releases) page
16 |
17 | ## Install by homebrew
18 |
19 | ```shell
20 | brew tap infuseai/artivc
21 | brew install artivc
22 | ```
23 |
24 | # Configuration
25 | Here we describe how to configure credentials to access the remote backend. The principle of ArtiVC is "Use your tool's config". This allows you to access among the toolchains painlessly.
26 |
27 | {{}}
28 | {{}}
29 | No configuration required
30 | {{ }}
31 |
32 | {{}}
33 | 1. Configure the `~/.ssh/config`
34 | ```bash
35 | Host myserver
36 | HostName myserver.hosts
37 | User myname
38 | IdentityFile ~/.ssh/id_ed25519
39 | ```
40 | 1. Check if you can access the SSH server successfully
41 | ```
42 | ssh myserver
43 | ```
44 |
45 | For more information, please see the [Remote Filesystem (SSH) backend](../../backends/ssh)
46 | {{ }}
47 |
48 | {{}}
49 | 1. Install the [AWS CLI](https://aws.amazon.com/cli/)
50 | 2. Configure the AWS CLI
51 | ```
52 | aws configure
53 | ```
54 | 3. Check current config
55 | ```
56 | aws configure list
57 | ```
58 |
59 | For more information, please see the [AWS S3 backend](../../backends/s3)
60 |
61 | {{ }}
62 |
63 | {{}}
64 | 1. Install the [glcoud CLI](https://cloud.google.com/sdk/gcloud)
65 | 2. Login the application default credential
66 | ```
67 | gcloud auth application-default login
68 | ```
69 | 3. Check the current credential is available
70 | ```
71 | gcloud auth application-default print-access-token
72 | ```
73 |
74 | For more information, please see the [Google Cloud Storage backend](../../backends/gcs)
75 | {{ }}
76 |
77 | {{}}
78 | 1. Install the [Azure CLI](https://docs.microsoft.com/cli/azure/install-azure-cli)
79 | 2. Login the Azure CLI
80 | ```
81 | az login
82 | ```
83 | 3. Check the login status
84 | ```
85 | az account show
86 | ```
87 |
88 | For more information, please see the [Azure Blob Storage](../../backends/azureblob)
89 | {{ }}
90 | {{ }}
91 |
92 | # Quick Start
93 | ## Push data
94 | 1. Prepare your data. We put data in the folder `/tmp/artivc/workspace`
95 | ```shell
96 | mkdir -p /tmp/artivc/workspace
97 | cd /tmp/artivc/workspace
98 | ```
99 |
100 | put the first version of data
101 | ```shell
102 | echo a > a
103 | echo b > b
104 | echo c > c
105 | ```
106 |
107 | 1. Select a folder as the repository. Here we use `/tmp/artivc/repo`
108 | ```shell
109 | mkdir -p /tmp/artivc/repo
110 | ```
111 |
112 | 1. Init the workspace
113 |
114 | {{}}
115 | {{}}
116 | ```shell
117 | # in /tmp/artivc/workspace
118 | avc init /tmp/artivc/repo
119 | ```
120 | {{ }}
121 |
122 | {{}}
123 | ```shell
124 | # in /tmp/artivc/workspace
125 | avc init :path/to/repo
126 | ```
127 | {{ }}
128 |
129 | {{}}
130 | ```shell
131 | # in /tmp/artivc/workspace
132 | avc init s3:///path/to/repo
133 | ```
134 | {{ }}
135 |
136 | {{}}
137 | ```shell
138 | # in /tmp/artivc/workspace
139 | avc init gs:///path/to/repo
140 | ```
141 | {{ }}
142 |
143 | {{}}
144 | ```shell
145 | # in /tmp/artivc/workspace
146 | avc init https://.blob.core.windows.net//path/to/repo
147 | ```
148 | {{ }}
149 |
150 | {{ }}
151 |
152 | 1. Push the data
153 | ```shell
154 | avc push
155 | ```
156 |
157 | 1. See the commit log
158 | ```shell
159 | avc log
160 | ```
161 |
162 | 1. Add more data to your repository
163 |
164 | ```shell
165 | echo "hello" > hello
166 | avc status
167 | ```
168 |
169 | push to the remote
170 | ```shell
171 | avc push
172 | ```
173 |
174 | 1. Tag a version
175 |
176 | ```shell
177 | avc tag v0.1.0
178 | ```
179 |
180 | See the log
181 | ```shell
182 | avc log
183 | ```
184 |
185 | ## Clone data from existing repository
186 | 1. Go to the folder to clone repository
187 |
188 | {{}}
189 | {{}}
190 | ```shell
191 | cd /tmp/artivc/
192 | avc clone /tmp/artivc/repo another-workspace
193 | ```
194 | {{ }}
195 |
196 | {{}}
197 | ```shell
198 | cd /tmp/artivc/
199 | avc clone :path/to/repo
200 | ```
201 | {{ }}
202 |
203 | {{}}
204 | ```shell
205 | cd /tmp/artivc/
206 | avc clone s3:///path/to/repo
207 | ```
208 | {{ }}
209 |
210 | {{}}
211 | ```shell
212 | cd /tmp/artivc/
213 | avc clone gs:///path/to/repo
214 | ```
215 | {{ }}
216 |
217 | {{}}
218 | ```shell
219 | cd /tmp/artivc/
220 | avc clone https://.blob.core.windows.net//path/to/repo
221 | ```
222 | {{ }}
223 |
224 | {{ }}
225 |
226 | Then the workspace is created, and the data is downloaded.
227 |
228 | 1. See the commit log
229 | ```shell
230 | cd another-workspace/
231 | avc log
232 | ```
233 |
234 | ## Download data
235 |
236 | 1. Download the latest version
237 |
238 | {{}}
239 | {{}}
240 | ```shell
241 | avc get -o /tmp/artivc/dl-latest /tmp/artivc/repo
242 | ```
243 | {{ }}
244 |
245 | {{}}
246 | ```shell
247 | avc get -o /tmp/artivc/dl-latest :path/to/repo
248 | ```
249 | {{ }}
250 |
251 | {{}}
252 | ```shell
253 | avc get -o /tmp/artivc/dl-latest s3:///path/to/repo
254 | ```
255 | {{ }}
256 |
257 | {{}}
258 | ```shell
259 | avc get -o /tmp/artivc/dl-latest gs:///path/to/repo
260 | ```
261 | {{ }}
262 |
263 | {{}}
264 | ```shell
265 | avc get -o /tmp/artivc/dl-latest https://.blob.core.windows.net//path/to/repo
266 | ```
267 | {{ }}
268 |
269 | {{ }}
270 |
271 | check the content
272 | ```shell
273 | ls /tmp/artivc/dl-latest
274 | ```
275 |
276 | 1. Or download the specific version
277 |
278 | {{}}
279 | {{}}
280 | ```shell
281 | avc get -o /tmp/artivc/dl-v0.1.0 /tmp/artivc/repo@v0.1.0
282 | ```
283 | {{ }}
284 |
285 | {{}}
286 | ```shell
287 | avc get -o /tmp/artivc/dl-v0.1.0 :path/to/repo@v0.1.0
288 | ```
289 | {{ }}
290 |
291 | {{}}
292 | ```shell
293 | avc get -o /tmp/artivc/dl-v0.1.0 s3:///path/to/repo@v0.1.0
294 | ```
295 | {{ }}
296 |
297 | {{}}
298 | ```shell
299 | avc get -o /tmp/artivc/dl-v0.1.0 gs:///path/to/repo@v0.1.0
300 | ```
301 | {{ }}
302 |
303 | {{}}
304 | ```shell
305 | avc get -o /tmp/artivc/dl-v0.1.0 https://.blob.core.windows.net//path/to/repo@v0.1.0
306 | ```
307 | {{ }}
308 |
309 | {{ }}
310 |
311 | check the content
312 | ```shell
313 | ls /tmp/artivc/dl-v0.1.0
314 | ```
--------------------------------------------------------------------------------
/docs/content/en/usage/ignore-file.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Ignore File
3 | weight: 12
4 | ---
5 |
6 | Just like git, you can put a `.avcignore` file at the root of workspace to define the excluding list. The rule is the same as `.gitignore`. For more details, please check the [pattern format](https://git-scm.com/docs/gitignore#_pattern_format) in the git document.
7 |
8 | Here is the example:
9 |
10 | ```shell
11 | # Ignore files
12 | test
13 | path/to/my/file
14 | .DS_Store
15 |
16 | # Ignore folders. Use a forward slash at the end
17 | build/
18 | path/to/my/folder/
19 | /build/
20 |
21 | # Ignore all file with extension '.py'
22 | *.py
23 | ````
24 |
25 |
26 |
--------------------------------------------------------------------------------
/docs/content/en/usage/images/cheatsheet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/content/en/usage/images/cheatsheet.png
--------------------------------------------------------------------------------
/docs/content/en/usage/partial-download.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Partial Download
3 | weight: 13
4 | ---
5 |
6 | By default, ArtiVC download all files of a version. It also supports to download partial of the files in a commit.
7 |
8 | You can download partially:
9 |
10 | - by specifying multiple files.
11 | - by specifying wildcard-like `.gitignore` pattern.
12 |
13 | For more details, please check the [pattern format](https://git-scm.com/docs/gitignore#_pattern_format) in the git document.
14 |
15 | ## Usage
16 | ```shell
17 | # get
18 | avc get -o output repo -- path/to/file1 path/to/file2 data/
19 |
20 | # pull
21 | avc pull -- path/to/partial
22 | avc pull v0.1.0 -- path/to/partial ...
23 | ```
24 |
25 | Note: if you would like to do a partial download, please put the path list after the "--" flag terminator.
26 |
27 |
28 | ## SEE ALSO
29 |
30 | * [avc get](/commands/avc_get/) - Download data from a repository
31 | * [avc pull](/commands/avc_pull/) - Pull data from the repository
32 |
--------------------------------------------------------------------------------
/docs/content/en/usage/windows-supports.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Windows Support
3 | weight: 100
4 | ---
5 |
6 | ArtiVC is supported on Windows platforms by running on the WSL.
7 |
8 |
9 | > WSL Prerequisites:
10 | >
11 | > You must be running Windows 10 version 2004 and higher (Build 19041 and higher) or Windows 11.
12 |
13 |
14 | ## What's WSL
15 | WSL is Windows Subsystem for Linux. It lets users work on a Linux environment on Windows directly.
16 |
17 | Check [official WSL site](https://docs.microsoft.com/en-us/windows/wsl/) for more information.
18 |
19 |
20 | ## How To
21 | You could download ArtiVC linux amd64/arm64 version binary from our [github release page](https://github.com/InfuseAI/ArtiVC/releases) depends on your platform architecture.
22 |
23 | After extracting ArtiVC binary from the archive, what you need to do is executing it on linux command line interface as usual.
24 |
25 | - extract the binary from archive
26 | ```shell
27 | $ tar zxvf ArtiVC-v0.4.0-linux-arm64.tar.gz
28 | ```
29 |
30 | - execution
31 | ```shell
32 | $ ./avc version
33 | version.BuildInfo{"Version":"v0.4.0","GitCommit":"febfe3bc579d77130570ba7d12fcf404326b0f7d","GitTreeState":"clean","GoVersion":"go1.17.8"}
34 | ```
35 |
--------------------------------------------------------------------------------
/docs/content/en/use-cases/_index.md:
--------------------------------------------------------------------------------
1 | ---
2 | weight: 3
3 | ---
--------------------------------------------------------------------------------
/docs/content/en/use-cases/backup.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Data Backup/Snapshot
3 | weight: 1
4 | ---
5 |
6 | Data backup is one of the most common requirement in different scenario. ArtiVC is a very simple tool to backup, or even snapshot, your data in the cloud storage.
7 |
8 | ## Snapshot the data
9 |
10 | 1. Init the repository
11 |
12 | ```shell
13 | avc init s3://mybucket/mydocuments
14 | ```
15 | 1. Snapshot
16 |
17 | ```
18 | avc push
19 | ```
20 | 1. Optionally to tag current snapshot as a version
21 | ```
22 | avc tag '2022-Q1'
23 | ```
24 |
25 | ## Rollback
26 |
27 | 1. See the snapshot timeline
28 |
29 | ```
30 | avc log
31 | ```
32 |
33 | 1. Rollback. Use `--delete` to delete local files which are not listed in the snapshot version.
34 |
35 | ```
36 | avc pull --delete 49175d02
37 | ```
38 |
39 | ## Get a file from a version
40 |
41 | 1. Get a file from a given version
42 |
43 | ```
44 | avc pull 49175d02 -- path/to/my/file
45 | ```
46 |
--------------------------------------------------------------------------------
/docs/content/en/use-cases/dataprep.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Dataset Preparation
3 | weight: 2
4 | ---
5 |
6 | Organizing dataset can be a hassle, especially as data is constantly evolving. ArtiVC is the most suitable tool to organize the dataset. There are the following benefits.
7 |
8 | - No need to transfer files with the existing content. Even you rename or copy to different folder. ArtiVC knows they are the same content. It is common to move or keep the same images, videos when the dataset is evolving.
9 | - Version tagging. If there is a stable version of dataset, we can tag a commit as the human-readable version.
10 |
11 | ## Prepare a dataset
12 |
13 | Here are the common steps to prepare a dataset
14 |
15 | 1. Create a dataset folder and use subfolders as image labels
16 | 1. Initiate the workspace.
17 | ```shell
18 | avc init s3://mybucket/datasets/flowers-classification
19 | ```
20 | 1. Push your first release
21 | ```shell
22 | avc push -m 'first version'
23 | ```
24 | 1. Clean the dataset, and move the wrong-classified data
25 | 1. Push the dataset again
26 | ```shell
27 | # See what data will be pushed
28 | avc status
29 | # Push
30 | avc push -m 'my second version'
31 | ```
32 | 1. If there are new versions is pushed by others, sync the data set with remote
33 | ```shell
34 | # Check the difference
35 | avc pull --dry-run
36 | # Sync with remote
37 | avc pull
38 | # or use the delete mode
39 | # avc pull --delete --dry-run
40 | # avc pull --delete
41 | ```
42 | 1. tag the version
43 | ```shell
44 | avc push
45 | avc tag v0.1.0
46 | ```
47 | and see the change
48 | ```shell
49 | avc log
50 | ```
51 |
52 | ## Clone the dataset
53 |
54 | Use the dataset in the other machine
55 |
56 | ```shell
57 | avc clone s3://mybucket/datasets/flowers-classification
58 | cd flowers-classification
59 | ```
--------------------------------------------------------------------------------
/docs/content/en/use-cases/experiment.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: ML Experiments
3 | weight: 3
4 | ---
5 | Here we use three repositories
6 | - Dataset for training
7 | - A base model for transfer learning of fine-tuning
8 | - Experiment output. Versioned by the timestamp of an experiment.
9 |
10 |
11 | In this use case, we use `get` and `put` commands to simplify the commands for programmatic use cases.
12 |
13 | 1. Clone the training code
14 | ```shell
15 | git clone https://github.com/my-org/my-ml-project.git
16 | cd my-ml-project
17 | ```
18 | 1. Download the dataset and the base model
19 | ```shell
20 | avc get -o dataset/ s3://mybucket/datasets/flowers-classification@v0.1.0
21 | avc get -o base/ s3://mybucket/models/my-base-model@v0.3.0
22 | ```
23 | 1. Train and output your training result (trained model, experiment log, hyperparams, etc) to `artifacts/` folder
24 | ```shell
25 | python ./train.py
26 | ```
27 | 1. Upload the artifacts
28 | ```shell
29 | avc put artifacts/ s3://mybucket/experiments/project1@202220303-100504
30 | ```
31 |
--------------------------------------------------------------------------------
/docs/data/menu/extra.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | header:
3 | - name: GitHub
4 | ref: https://github.com/InfuseAI/ArtiVC
5 | icon: gdoc_github
6 | external: true
7 | - name: Blogs
8 | ref: posts
9 | icon: gdoc_notification
--------------------------------------------------------------------------------
/docs/data/menu/main.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | main:
3 |
--------------------------------------------------------------------------------
/docs/data/menu/more.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | more:
3 | - name: News
4 | ref: "/posts"
5 | icon: "gdoc_notification"
6 | - name: Releases
7 | ref: "https://github.com/infuseai/artivc/releases"
8 | external: true
9 | icon: "gdoc_download"
10 | - name: View Source
11 | ref: "https://github.com/infuseai/artivc"
12 | external: true
13 | icon: "gdoc_github"
14 |
--------------------------------------------------------------------------------
/docs/layouts/shortcodes/sprites.html:
--------------------------------------------------------------------------------
1 |
2 | {{ range $key, $value := .Site.Data.sprites.geekdoc }}
3 |
4 |
5 |
6 |
7 |
8 | #{{ (replace $key "_" "_") | safeHTML }}
9 |
10 |
11 | {{ end }}
12 |
13 |
--------------------------------------------------------------------------------
/docs/static/.htaccess:
--------------------------------------------------------------------------------
1 | ErrorDocument 404 /404.html
2 |
3 | ExpiresActive On
4 | ExpiresDefault "access plus 600 seconds"
5 | ExpiresByType text/css "access plus 1 week"
6 | ExpiresByType text/javascript "access plus 1 month"
7 | ExpiresByType text/html "access plus 1 seconds"
8 | ExpiresByType application/javascript "access plus 1 month"
9 | ExpiresByType application/x-javascript "access plus 1 month"
10 | ExpiresByType image/gif "access plus 1 week"
11 | ExpiresByType image/jpeg "access plus 1 week"
12 | ExpiresByType image/png "access plus 1 week"
13 | ExpiresByType image/x-icon "access plus 1 month"
14 | ExpiresByType image/svg+xml "access plus 1 week"
15 | ExpiresByType application/x-font-woff "access plus 1 week"
16 | ExpiresByType application/font-woff2 "access plus 1 week"
17 |
--------------------------------------------------------------------------------
/docs/static/ArtiVC_workspace.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/static/ArtiVC_workspace.png
--------------------------------------------------------------------------------
/docs/static/_includes/example.html.part:
--------------------------------------------------------------------------------
1 |
2 | Example HTML include
3 |
4 |
5 | This is heading 4
6 | This is heading 5
7 | This is heading 6
8 |
--------------------------------------------------------------------------------
/docs/static/_includes/example.md.part:
--------------------------------------------------------------------------------
1 | _**Example Mardown include**_
2 |
3 | File including a simple Markdown table.
4 |
5 | | Head 1 | Head 2 | Head 3 |
6 | | ------ | ------ | ------ |
7 | | 1 | 2 | 3 |
8 |
--------------------------------------------------------------------------------
/docs/static/brand.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/static/custom.css:
--------------------------------------------------------------------------------
1 | :root {
2 | --code-max-height: 60rem;
3 | }
4 |
5 | /* Light mode theming */
6 | :root,
7 | :root[color-mode="light"] {
8 | --header-background: #25a476;
9 | --header-font-color: #ffffff;
10 |
11 | --body-background: #ffffff;
12 | --body-font-color: #343a40;
13 |
14 | --mark-color: #ffab00;
15 |
16 | --button-background: #62cb97;
17 | --button-border-color: #4ec58a;
18 |
19 | --link-color: #518169;
20 | --link-color-visited: #c54e8a;
21 |
22 | --code-background: #f5f6f8;
23 | --code-accent-color: #e3e7eb;
24 | --code-accent-color-lite: #eff1f3;
25 |
26 | --code-copy-font-color: #6b7784;
27 | --code-copy-border-color: #adb4bc;
28 | --code-copy-success-color: #00c853;
29 |
30 | --accent-color: #e9ecef;
31 | --accent-color-lite: #f8f9fa;
32 |
33 | --control-icons: #b2bac1;
34 |
35 | --footer-background: #2f333e;
36 | --footer-font-color: #ffffff;
37 | --footer-link-color: #ffcc5c;
38 | --footer-link-color-visited: #ffcc5c;
39 | }
40 | @media (prefers-color-scheme: light) {
41 | :root {
42 | --header-background: #25a476;
43 | --header-font-color: #ffffff;
44 |
45 | --body-background: #ffffff;
46 | --body-font-color: #343a40;
47 |
48 | --mark-color: #ffab00;
49 |
50 | --button-background: #62cb97;
51 | --button-border-color: #4ec58a;
52 |
53 | --link-color: #518169;
54 | --link-color-visited: #c54e8a;
55 |
56 | --code-background: #f5f6f8;
57 | --code-accent-color: #e3e7eb;
58 | --code-accent-color-lite: #eff1f3;
59 |
60 | --code-copy-font-color: #6b7784;
61 | --code-copy-border-color: #adb4bc;
62 | --code-copy-success-color: #00c853;
63 |
64 | --accent-color: #e9ecef;
65 | --accent-color-lite: #f8f9fa;
66 |
67 | --control-icons: #b2bac1;
68 |
69 | --footer-background: #2f333e;
70 | --footer-font-color: #ffffff;
71 | --footer-link-color: #ffcc5c;
72 | --footer-link-color-visited: #ffcc5c;
73 | }
74 | }
75 |
76 | /* Dark mode theming */
77 | :root[color-mode="dark"] {
78 | --header-background: #25a476;
79 | --header-font-color: #ffffff;
80 |
81 | --body-background: #343a40;
82 | --body-font-color: #ced3d8;
83 |
84 | --mark-color: #ffab00;
85 |
86 | --button-background: #62cb97;
87 | --button-border-color: #4ec58a;
88 |
89 | --link-color: #7ac29e;
90 | --link-color-visited: #c27a9e;
91 |
92 | --code-background: #2f353a;
93 | --code-accent-color: #262b2f;
94 | --code-accent-color-lite: #2b3035;
95 |
96 | --code-copy-font-color: #adb4bc;
97 | --code-copy-border-color: #808c98;
98 | --code-copy-success-color: #00c853;
99 |
100 | --accent-color: #2b3035;
101 | --accent-color-lite: #2f353a;
102 |
103 | --control-icons: #b2bac1;
104 |
105 | --footer-background: #2f333e;
106 | --footer-font-color: #ffffff;
107 | --footer-link-color: #ffcc5c;
108 | --footer-link-color-visited: #ffcc5c;
109 | }
110 | @media (prefers-color-scheme: dark) {
111 | :root {
112 | --header-background: #25a476;
113 | --header-font-color: #ffffff;
114 |
115 | --body-background: #343a40;
116 | --body-font-color: #ced3d8;
117 |
118 | --mark-color: #ffab00;
119 |
120 | --button-background: #62cb97;
121 | --button-border-color: #4ec58a;
122 |
123 | --link-color: #7ac29e;
124 | --link-color-visited: #c27a9e;
125 |
126 | --code-background: #2f353a;
127 | --code-accent-color: #262b2f;
128 | --code-accent-color-lite: #2b3035;
129 |
130 | --code-copy-font-color: #adb4bc;
131 | --code-copy-border-color: #808c98;
132 | --code-copy-success-color: #00c853;
133 |
134 | --accent-color: #2b3035;
135 | --accent-color-lite: #2f353a;
136 |
137 | --control-icons: #b2bac1;
138 |
139 | --footer-background: #2f333e;
140 | --footer-font-color: #ffffff;
141 | --footer-link-color: #ffcc5c;
142 | --footer-link-color-visited: #ffcc5c;
143 | }
144 | }
145 |
146 | .icon-grid {
147 | width: 8rem;
148 | height: 8rem;
149 | margin: 0.2em;
150 | text-align: center;
151 | padding: 0.3em;
152 | }
153 |
154 | .icon-grid__line {
155 | height: 4rem;
156 | }
157 |
158 | .icon-grid__line .icon {
159 | font-size: 3em;
160 | }
161 |
162 | .icon-grid__line--text {
163 | font-size: 0.8em;
164 | }
165 |
166 | img {
167 | box-shadow: 0 4px 8px 0 rgb(0 0 0 / 20%), 0 6px 20px 0 rgb(0 0 0 / 19%);
168 | }
169 |
170 | img.gdoc-brand__img {
171 | width: auto;
172 | box-shadow: none;
173 | }
174 |
175 | span.gdoc-brand__title {
176 | visibility: hidden;
177 | }
178 |
179 | span.gdoc-button--entry {
180 | background: var(--header-background);
181 | color: var(--header-font-color)
182 | }
183 |
184 | span.gdoc-button--entry:hover {
185 | background: var(--button-background);
186 | }
187 |
--------------------------------------------------------------------------------
/docs/static/custom.css.example:
--------------------------------------------------------------------------------
1 | /* Global customization */
2 |
3 | :root {
4 | --code-max-height: 60rem;
5 | }
6 |
7 | /* Light mode theming */
8 | :root,
9 | :root[color-mode="light"] {
10 | --header-background: #4ec58a;
11 | --header-font-color: #ffffff;
12 |
13 | --body-background: #ffffff;
14 | --body-font-color: #343a40;
15 |
16 | --mark-color: #ffab00;
17 |
18 | --button-background: #62cb97;
19 | --button-border-color: #4ec58a;
20 |
21 | --link-color: #518169;
22 | --link-color-visited: #c54e8a;
23 |
24 | --code-background: #f5f6f8;
25 | --code-accent-color: #e3e7eb;
26 | --code-accent-color-lite: #eff1f3;
27 |
28 | --code-copy-font-color: #6b7784;
29 | --code-copy-border-color: #adb4bc;
30 | --code-copy-success-color: #00c853;
31 |
32 | --accent-color: #e9ecef;
33 | --accent-color-lite: #f8f9fa;
34 |
35 | --control-icons: #b2bac1;
36 |
37 | --footer-background: #2f333e;
38 | --footer-font-color: #ffffff;
39 | --footer-link-color: #ffcc5c;
40 | --footer-link-color-visited: #ffcc5c;
41 | }
42 | @media (prefers-color-scheme: light) {
43 | :root {
44 | --header-background: #4ec58a;
45 | --header-font-color: #ffffff;
46 |
47 | --body-background: #ffffff;
48 | --body-font-color: #343a40;
49 |
50 | --mark-color: #ffab00;
51 |
52 | --button-background: #62cb97;
53 | --button-border-color: #4ec58a;
54 |
55 | --link-color: #518169;
56 | --link-color-visited: #c54e8a;
57 |
58 | --code-background: #f5f6f8;
59 | --code-accent-color: #e3e7eb;
60 | --code-accent-color-lite: #eff1f3;
61 |
62 | --code-copy-font-color: #6b7784;
63 | --code-copy-border-color: #adb4bc;
64 | --code-copy-success-color: #00c853;
65 |
66 | --accent-color: #e9ecef;
67 | --accent-color-lite: #f8f9fa;
68 |
69 | --control-icons: #b2bac1;
70 |
71 | --footer-background: #2f333e;
72 | --footer-font-color: #ffffff;
73 | --footer-link-color: #ffcc5c;
74 | --footer-link-color-visited: #ffcc5c;
75 | }
76 | }
77 |
78 | /* Dark mode theming */
79 | :root[color-mode="dark"] {
80 | --header-background: #4ec58a;
81 | --header-font-color: #ffffff;
82 |
83 | --body-background: #343a40;
84 | --body-font-color: #ced3d8;
85 |
86 | --mark-color: #ffab00;
87 |
88 | --button-background: #62cb97;
89 | --button-border-color: #4ec58a;
90 |
91 | --link-color: #7ac29e;
92 | --link-color-visited: #c27a9e;
93 |
94 | --code-background: #2f353a;
95 | --code-accent-color: #262b2f;
96 | --code-accent-color-lite: #2b3035;
97 |
98 | --code-copy-font-color: #adb4bc;
99 | --code-copy-border-color: #808c98;
100 | --code-copy-success-color: #00c853;
101 |
102 | --accent-color: #2b3035;
103 | --accent-color-lite: #2f353a;
104 |
105 | --control-icons: #b2bac1;
106 |
107 | --footer-background: #2f333e;
108 | --footer-font-color: #ffffff;
109 | --footer-link-color: #ffcc5c;
110 | --footer-link-color-visited: #ffcc5c;
111 | }
112 | @media (prefers-color-scheme: dark) {
113 | :root {
114 | --header-background: #4ec58a;
115 | --header-font-color: #ffffff;
116 |
117 | --body-background: #343a40;
118 | --body-font-color: #ced3d8;
119 |
120 | --mark-color: #ffab00;
121 |
122 | --button-background: #62cb97;
123 | --button-border-color: #4ec58a;
124 |
125 | --link-color: #7ac29e;
126 | --link-color-visited: #c27a9e;
127 |
128 | --code-background: #2f353a;
129 | --code-accent-color: #262b2f;
130 | --code-accent-color-lite: #2b3035;
131 |
132 | --code-copy-font-color: #adb4bc;
133 | --code-copy-border-color: #808c98;
134 | --code-copy-success-color: #00c853;
135 |
136 | --accent-color: #2b3035;
137 | --accent-color-lite: #2f353a;
138 |
139 | --control-icons: #b2bac1;
140 |
141 | --footer-background: #2f333e;
142 | --footer-font-color: #ffffff;
143 | --footer-link-color: #ffcc5c;
144 | --footer-link-color-visited: #ffcc5c;
145 | }
146 | }
147 |
--------------------------------------------------------------------------------
/docs/static/favicon/android-chrome-192x192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/static/favicon/android-chrome-192x192.png
--------------------------------------------------------------------------------
/docs/static/favicon/android-chrome-512x512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/static/favicon/android-chrome-512x512.png
--------------------------------------------------------------------------------
/docs/static/favicon/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/static/favicon/apple-touch-icon.png
--------------------------------------------------------------------------------
/docs/static/favicon/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/static/favicon/favicon-16x16.png
--------------------------------------------------------------------------------
/docs/static/favicon/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/static/favicon/favicon-32x32.png
--------------------------------------------------------------------------------
/docs/static/favicon/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/static/favicon/favicon.ico
--------------------------------------------------------------------------------
/docs/static/favicon/favicon.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/static/media/bundle-menu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/static/media/bundle-menu.png
--------------------------------------------------------------------------------
/docs/static/media/file-tree.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/static/media/file-tree.png
--------------------------------------------------------------------------------
/docs/static/media/more-menu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/static/media/more-menu.png
--------------------------------------------------------------------------------
/docs/static/socialartiv.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InfuseAI/ArtiVC/76a47f0105538a7511742f154c0da22ae0372eec/docs/static/socialartiv.png
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/infuseai/artivc
2 |
3 | go 1.17
4 |
5 | require (
6 | cloud.google.com/go/storage v1.21.0
7 | github.com/BurntSushi/toml v1.0.0
8 | github.com/aws/aws-sdk-go-v2/config v1.13.1
9 | github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.9.0
10 | github.com/aws/aws-sdk-go-v2/service/s3 v1.24.1
11 | github.com/fatih/color v1.13.0
12 | github.com/kevinburke/ssh_config v1.2.0
13 | github.com/pkg/sftp v1.13.4
14 | github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06
15 | github.com/spf13/cobra v1.3.0
16 | github.com/stretchr/testify v1.7.0
17 | golang.org/x/crypto v0.0.0-20220331220935-ae2d96664a29
18 | google.golang.org/api v0.69.0
19 | )
20 |
21 | require (
22 | cloud.google.com/go v0.100.2 // indirect
23 | cloud.google.com/go/compute v1.2.0 // indirect
24 | cloud.google.com/go/iam v0.1.1 // indirect
25 | github.com/Azure/azure-sdk-for-go/sdk/azcore v0.21.1 // indirect
26 | github.com/Azure/azure-sdk-for-go/sdk/azidentity v0.13.0 // indirect
27 | github.com/Azure/azure-sdk-for-go/sdk/internal v0.9.2 // indirect
28 | github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v0.3.0 // indirect
29 | github.com/AzureAD/microsoft-authentication-library-for-go v0.4.0 // indirect
30 | github.com/aws/aws-sdk-go-v2 v1.13.0 // indirect
31 | github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.2.0 // indirect
32 | github.com/aws/aws-sdk-go-v2/credentials v1.8.0 // indirect
33 | github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.10.0 // indirect
34 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.4 // indirect
35 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.2.0 // indirect
36 | github.com/aws/aws-sdk-go-v2/internal/ini v1.3.5 // indirect
37 | github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.7.0 // indirect
38 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.7.0 // indirect
39 | github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.11.0 // indirect
40 | github.com/aws/aws-sdk-go-v2/service/sso v1.9.0 // indirect
41 | github.com/aws/aws-sdk-go-v2/service/sts v1.14.0 // indirect
42 | github.com/aws/smithy-go v1.10.0 // indirect
43 | github.com/cpuguy83/go-md2man/v2 v2.0.1 // indirect
44 | github.com/davecgh/go-spew v1.1.1 // indirect
45 | github.com/golang-jwt/jwt v3.2.1+incompatible // indirect
46 | github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
47 | github.com/golang/protobuf v1.5.2 // indirect
48 | github.com/google/go-cmp v0.5.7 // indirect
49 | github.com/google/uuid v1.2.0 // indirect
50 | github.com/googleapis/gax-go/v2 v2.1.1 // indirect
51 | github.com/inconshreveable/mousetrap v1.0.0 // indirect
52 | github.com/jmespath/go-jmespath v0.4.0 // indirect
53 | github.com/kr/fs v0.1.0 // indirect
54 | github.com/kylelemons/godebug v1.1.0 // indirect
55 | github.com/mattn/go-colorable v0.1.12 // indirect
56 | github.com/mattn/go-isatty v0.0.14 // indirect
57 | github.com/pkg/browser v0.0.0-20210115035449-ce105d075bb4 // indirect
58 | github.com/pmezard/go-difflib v1.0.0 // indirect
59 | github.com/russross/blackfriday/v2 v2.1.0 // indirect
60 | github.com/spf13/pflag v1.0.5 // indirect
61 | go.opencensus.io v0.23.0 // indirect
62 | golang.org/x/net v0.0.0-20220407224826-aac1ed45d8e3 // indirect
63 | golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8 // indirect
64 | golang.org/x/sys v0.0.0-20220209214540-3681064d5158 // indirect
65 | golang.org/x/text v0.3.7 // indirect
66 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
67 | google.golang.org/appengine v1.6.7 // indirect
68 | google.golang.org/genproto v0.0.0-20220216160803-4663080d8bc8 // indirect
69 | google.golang.org/grpc v1.44.0 // indirect
70 | google.golang.org/protobuf v1.27.1 // indirect
71 | gopkg.in/yaml.v2 v2.4.0 // indirect
72 | gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect
73 | )
74 |
--------------------------------------------------------------------------------
/internal/core/config.go:
--------------------------------------------------------------------------------
1 | package core
2 |
3 | import (
4 | "errors"
5 | "fmt"
6 | "io/ioutil"
7 | "os"
8 | "path"
9 | "path/filepath"
10 | "strings"
11 |
12 | "github.com/BurntSushi/toml"
13 | gitignore "github.com/sabhiram/go-gitignore"
14 | )
15 |
16 | func InitWorkspace(baseDir, repo string) error {
17 | config := map[string]interface{}{
18 | "repo": map[string]interface{}{
19 | "url": repo,
20 | },
21 | }
22 |
23 | configPath := path.Join(baseDir, ".avc/config")
24 | err := mkdirsForFile(configPath)
25 | if err != nil {
26 | return err
27 | }
28 |
29 | f, err := os.Create(configPath)
30 | if err != nil {
31 | return err
32 | }
33 |
34 | if err := toml.NewEncoder(f).Encode(config); err != nil {
35 | return err
36 | }
37 |
38 | if err := f.Close(); err != nil {
39 | return err
40 | }
41 | return nil
42 | }
43 |
44 | type ArtConfig struct {
45 | config map[string]interface{}
46 | MetadataDir string
47 | BaseDir string
48 | }
49 |
50 | func NewConfig(baseDir, metadataDir, repoUrl string) ArtConfig {
51 | config := ArtConfig{
52 | BaseDir: baseDir,
53 | MetadataDir: metadataDir,
54 | }
55 | config.config = make(map[string]interface{})
56 | config.SetRepoUrl(repoUrl)
57 | return config
58 | }
59 |
60 | func LoadConfig(dir string) (ArtConfig, error) {
61 | load := func(dir string) (map[string]interface{}, error) {
62 | config := make(map[string]interface{})
63 | configPath := path.Join(dir, ".avc/config")
64 |
65 | data, err := ioutil.ReadFile(configPath)
66 | if err != nil {
67 | return nil, err
68 | }
69 |
70 | err = toml.Unmarshal(data, &config)
71 | if err != nil {
72 | return nil, err
73 | }
74 |
75 | return config, nil
76 | }
77 |
78 | if dir == "" {
79 | var err2 error
80 | dir, err2 = os.Getwd()
81 | if err2 != nil {
82 | return ArtConfig{}, err2
83 | }
84 | }
85 |
86 | for {
87 | config, err := load(dir)
88 | var e *toml.ParseError
89 | if errors.As(err, &e) {
90 | fmt.Fprintf(os.Stderr, "cannot load the workspace config\n")
91 | return ArtConfig{}, err
92 | }
93 |
94 | if err == nil {
95 | return ArtConfig{config: config, BaseDir: dir, MetadataDir: path.Join(dir, ".avc")}, nil
96 | }
97 |
98 | newDir := filepath.Dir(dir)
99 | if dir == newDir {
100 | break
101 | }
102 | dir = newDir
103 | }
104 |
105 | err2 := ErrWorkspaceNotFound
106 |
107 | return ArtConfig{}, err2
108 | }
109 |
110 | func (config *ArtConfig) Set(path string, value interface{}) {
111 | var obj map[string]interface{} = config.config
112 |
113 | parts := strings.Split(path, ".")
114 | for i, p := range parts {
115 | if i == len(parts)-1 {
116 | obj[p] = value
117 | } else {
118 | if v, ok := obj[p].(map[string]interface{}); ok {
119 | obj = v
120 | } else {
121 | child := make(map[string]interface{})
122 | obj[p] = child
123 | obj = child
124 | }
125 | }
126 | }
127 | }
128 |
129 | func (config *ArtConfig) Get(path string) interface{} {
130 | var obj interface{} = config.config
131 | var val interface{} = nil
132 |
133 | parts := strings.Split(path, ".")
134 | for _, p := range parts {
135 | if v, ok := obj.(map[string]interface{}); ok {
136 | obj = v[p]
137 | val = obj
138 | } else {
139 | return nil
140 | }
141 | }
142 |
143 | return val
144 | }
145 |
146 | func (config *ArtConfig) GetString(path string) string {
147 | var value string
148 |
149 | if config.Get(path) != nil {
150 | value = config.Get(path).(string)
151 | }
152 |
153 | return value
154 | }
155 |
156 | func (config *ArtConfig) RepoUrl() string {
157 | return config.GetString("repo.url")
158 | }
159 |
160 | func (config *ArtConfig) SetRepoUrl(repoUrl string) {
161 | config.Set("repo.url", repoUrl)
162 | }
163 |
164 | func (config *ArtConfig) Print() {
165 | var printChild func(string, interface{})
166 |
167 | printChild = func(path string, obj interface{}) {
168 | if v, ok := obj.(map[string]interface{}); ok {
169 | for key, value := range v {
170 | if path == "" {
171 | printChild(key, value)
172 | } else {
173 | printChild(path+"."+key, value)
174 | }
175 | }
176 | } else {
177 | fmt.Printf("%s=%v\n", path, obj)
178 | }
179 | }
180 |
181 | printChild("", config.config)
182 | }
183 |
184 | func (config *ArtConfig) Save() error {
185 | configPath := path.Join(config.MetadataDir, "config")
186 | f, err := os.OpenFile(configPath, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0o644)
187 | if err != nil {
188 | return err
189 | }
190 |
191 | if err := toml.NewEncoder(f).Encode(config.config); err != nil {
192 | return err
193 | }
194 |
195 | err = f.Close()
196 | if err != nil {
197 | return err
198 | }
199 |
200 | return nil
201 | }
202 |
203 | type AvcIgnore = gitignore.GitIgnore
204 |
205 | func NewAvcIgnore(dir string) (*AvcIgnore, error) {
206 | avcIgnorePath := path.Join(dir, ".avcignore")
207 |
208 | avcIgnore, err := gitignore.CompileIgnoreFile(avcIgnorePath)
209 |
210 | if err != nil {
211 | return nil, err
212 | }
213 |
214 | return avcIgnore, nil
215 | }
216 |
217 | type AvcInclude = gitignore.GitIgnore
218 |
219 | func NewAvcInclude(filePath []string) *AvcInclude {
220 | filter := gitignore.CompileIgnoreLines(filePath...)
221 |
222 | return filter
223 | }
224 |
--------------------------------------------------------------------------------
/internal/core/errors.go:
--------------------------------------------------------------------------------
1 | package core
2 |
3 | import (
4 | "errors"
5 | "fmt"
6 | )
7 |
8 | var (
9 | ErrWorkspaceNotFound = errors.New("not a workspace")
10 | ErrEmptyRepository = errors.New("no commit is found in the repository. please push data to repository first")
11 | )
12 |
13 | type ReferenceNotFoundError struct {
14 | Ref string
15 | Err error
16 | }
17 |
18 | func (err ReferenceNotFoundError) Error() string {
19 | return fmt.Sprintf("reference not found: %s", err.Ref)
20 | }
21 |
--------------------------------------------------------------------------------
/internal/core/hash.go:
--------------------------------------------------------------------------------
1 | package core
2 |
3 | import (
4 | "crypto/sha1"
5 | "fmt"
6 | "io"
7 | "os"
8 | )
9 |
10 | func Sha1Sum(content []byte) string {
11 | hasher := sha1.New()
12 | hasher.Write(content)
13 | return fmt.Sprintf("%x", hasher.Sum(nil))
14 | }
15 |
16 | func Sha1SumFromFile(path string) (string, error) {
17 | hasher := sha1.New()
18 | f, err := os.Open(path)
19 | if err != nil {
20 | return "", err
21 | }
22 | defer f.Close()
23 | if _, err := io.Copy(hasher, f); err != nil {
24 | return "", err
25 | }
26 | sum := hasher.Sum([]byte{})
27 | return fmt.Sprintf("%x", sum), nil
28 | }
29 |
--------------------------------------------------------------------------------
/internal/core/manager_test.go:
--------------------------------------------------------------------------------
1 | package core
2 |
3 | import (
4 | "os"
5 | "path/filepath"
6 | "testing"
7 |
8 | "github.com/stretchr/testify/assert"
9 | )
10 |
11 | func TestPutGet(t *testing.T) {
12 | wp1 := t.TempDir()
13 | meta1 := t.TempDir()
14 | wp2 := t.TempDir()
15 | meta2 := t.TempDir()
16 | repo := t.TempDir()
17 |
18 | path := "test"
19 | content := "test-data"
20 |
21 | assert.NoError(t, writeFile([]byte(content), filepath.Join(wp1, path)))
22 |
23 | config := NewConfig(wp1, meta1, repo)
24 | mngr1, err := NewArtifactManager(config)
25 | assert.Empty(t, err)
26 | err = mngr1.Push(PushOptions{})
27 | assert.Empty(t, err)
28 |
29 | config = NewConfig(wp2, meta2, repo)
30 | mngr2, err := NewArtifactManager(config)
31 | assert.Empty(t, err)
32 | err = mngr2.Pull(PullOptions{})
33 | assert.Empty(t, err)
34 |
35 | data, err := readFile(filepath.Join(wp2, path))
36 | assert.Empty(t, err)
37 | assert.Equal(t, string(data), content)
38 |
39 | _, err = os.Stat(filepath.Join(wp2, ".avc"))
40 | assert.True(t, os.IsNotExist(err))
41 | }
42 |
43 | func TestPushPull(t *testing.T) {
44 | wp1 := t.TempDir()
45 | wp2 := t.TempDir()
46 | repo := t.TempDir()
47 |
48 | path := "test"
49 | content := "test-data"
50 |
51 | assert.NoError(t, writeFile([]byte(content), filepath.Join(wp1, path)))
52 |
53 | assert.NoError(t, InitWorkspace(wp1, repo))
54 | config, _ := LoadConfig(wp1)
55 | mngr1, _ := NewArtifactManager(config)
56 | assert.NoError(t, mngr1.Push(PushOptions{}))
57 |
58 | assert.NoError(t, InitWorkspace(wp2, repo))
59 | config, _ = LoadConfig(wp2)
60 | mngr2, _ := NewArtifactManager(config)
61 | assert.NoError(t, mngr2.Pull(PullOptions{}))
62 |
63 | data, _ := readFile(filepath.Join(wp2, path))
64 | assert.Equal(t, string(data), content)
65 |
66 | _, err := os.Stat(filepath.Join(wp2, ".avc/config"))
67 | assert.False(t, os.IsNotExist(err))
68 | }
69 |
70 | func TestPushWithIgnore(t *testing.T) {
71 | wp1 := t.TempDir()
72 | wp2 := t.TempDir()
73 | repo := t.TempDir()
74 |
75 | assert.NoError(t, writeFile([]byte("a"), filepath.Join(wp1, "a")))
76 | assert.NoError(t, writeFile([]byte("b"), filepath.Join(wp1, "b")))
77 | assert.NoError(t, writeFile([]byte("c"), filepath.Join(wp1, "c")))
78 |
79 | avcIgnore := `
80 | a
81 | e
82 | `
83 |
84 | assert.NoError(t, writeFile([]byte(avcIgnore), filepath.Join(wp1, ".avcignore")))
85 |
86 | assert.NoError(t, InitWorkspace(wp1, repo))
87 | config, _ := LoadConfig(wp1)
88 | mngr1, _ := NewArtifactManager(config)
89 | err := mngr1.Push(PushOptions{})
90 | assert.Empty(t, err)
91 |
92 | assert.NoError(t, InitWorkspace(wp2, repo))
93 | config, _ = LoadConfig(wp2)
94 | mngr2, _ := NewArtifactManager(config)
95 | err = mngr2.Pull(PullOptions{})
96 | assert.Empty(t, err)
97 |
98 | data, _ := readFile(filepath.Join(wp2, "a"))
99 | assert.Equal(t, "", string(data))
100 | data, _ = readFile(filepath.Join(wp2, "b"))
101 | assert.Equal(t, "b", string(data))
102 | data, _ = readFile(filepath.Join(wp2, "c"))
103 | assert.Equal(t, "c", string(data))
104 | }
105 |
106 | func TestPullWithIgnore(t *testing.T) {
107 | wp1 := t.TempDir()
108 | wp2 := t.TempDir()
109 | repo := t.TempDir()
110 |
111 | // push
112 | assert.NoError(t, writeFile([]byte("a"), filepath.Join(wp1, "a")))
113 | assert.NoError(t, writeFile([]byte("b"), filepath.Join(wp1, "b")))
114 | assert.NoError(t, writeFile([]byte("c"), filepath.Join(wp1, "c")))
115 | assert.NoError(t, InitWorkspace(wp1, repo))
116 | config, _ := LoadConfig(wp1)
117 | mngr1, _ := NewArtifactManager(config)
118 | err := mngr1.Push(PushOptions{})
119 | assert.Empty(t, err)
120 |
121 | // pull
122 | avcIgnore := `
123 | a
124 | e
125 | `
126 | assert.NoError(t, writeFile([]byte(avcIgnore), filepath.Join(wp2, ".avcignore")))
127 | assert.NoError(t, writeFile([]byte("abc"), filepath.Join(wp2, "a")))
128 | assert.NoError(t, writeFile([]byte("efg"), filepath.Join(wp2, "e")))
129 | assert.NoError(t, InitWorkspace(wp2, repo))
130 | config, _ = LoadConfig(wp2)
131 | mngr2, _ := NewArtifactManager(config)
132 | err = mngr2.Pull(PullOptions{})
133 | assert.Empty(t, err)
134 |
135 | data, _ := readFile(filepath.Join(wp2, "a"))
136 | assert.Equal(t, "abc", string(data))
137 | data, _ = readFile(filepath.Join(wp2, "c"))
138 | assert.Equal(t, "c", string(data))
139 | data, _ = readFile(filepath.Join(wp2, "e"))
140 | assert.Equal(t, "efg", string(data))
141 | }
142 |
143 | func TestSymlink(t *testing.T) {
144 | var err error
145 | wp1 := t.TempDir()
146 | wp2 := t.TempDir()
147 | repo := t.TempDir()
148 |
149 | // Firt version
150 | // a = "a"
151 | // b -> bb
152 | // c -> cc
153 | // d -> dd
154 | assert.NoError(t, writeFile([]byte("a"), filepath.Join(wp1, "a")))
155 | assert.NoError(t, symlinkFile("bb", filepath.Join(wp1, "b")))
156 | assert.NoError(t, symlinkFile("cc", filepath.Join(wp1, "c")))
157 | assert.NoError(t, symlinkFile("dd", filepath.Join(wp1, "d")))
158 |
159 | assert.NoError(t, InitWorkspace(wp1, repo))
160 | config, _ := LoadConfig(wp1)
161 | mngr1, _ := NewArtifactManager(config)
162 | assert.NoError(t, mngr1.Push(PushOptions{}))
163 |
164 | assert.NoError(t, InitWorkspace(wp2, repo))
165 | config, _ = LoadConfig(wp2)
166 | mngr2, _ := NewArtifactManager(config)
167 | assert.NoError(t, mngr2.Pull(PullOptions{}))
168 |
169 | data, _ := readFile(filepath.Join(wp2, "a"))
170 | assert.Equal(t, "a", string(data))
171 | link, _ := readlinkFile(filepath.Join(wp2, "b"))
172 | assert.Equal(t, "bb", link)
173 |
174 | // Second version
175 | // a = "a" => a -> aa
176 | // b -> bb => (deleted)
177 | // c -> cc => c = "c"
178 | // d -> dd => e -> dd
179 | assert.NoError(t, deleteFile(filepath.Join(wp1, "a")))
180 | assert.NoError(t, symlinkFile("aa", filepath.Join(wp1, "a")))
181 | assert.NoError(t, deleteFile(filepath.Join(wp1, "b")))
182 | assert.NoError(t, deleteFile(filepath.Join(wp1, "c")))
183 | assert.NoError(t, writeFile([]byte("c"), filepath.Join(wp1, "c")))
184 | assert.NoError(t, deleteFile(filepath.Join(wp1, "d")))
185 | assert.NoError(t, symlinkFile("dd", filepath.Join(wp1, "e")))
186 | assert.NoError(t, mngr1.Push(PushOptions{}))
187 | assert.NoError(t, mngr2.Pull(PullOptions{Delete: true}))
188 |
189 | link, _ = readlinkFile(filepath.Join(wp2, "a"))
190 | assert.Equal(t, "aa", link)
191 | _, err = readlinkFile(filepath.Join(wp2, "b"))
192 | assert.Error(t, err)
193 | data, _ = readFile(filepath.Join(wp2, "c"))
194 | assert.Equal(t, "c", string(data))
195 | _, err = readlinkFile(filepath.Join(wp2, "d"))
196 | assert.Error(t, err)
197 | link, _ = readlinkFile(filepath.Join(wp2, "e"))
198 | assert.Equal(t, "dd", link)
199 | }
200 |
201 | func TestPermMode(t *testing.T) {
202 | wp1 := t.TempDir()
203 | wp2 := t.TempDir()
204 | repo := t.TempDir()
205 |
206 | // Firt version
207 | // a = "a" 644
208 | // b = "b" 600
209 | // c = "c" 755
210 | assert.NoError(t, writeFile([]byte("a"), filepath.Join(wp1, "a")))
211 | assert.NoError(t, chmod(filepath.Join(wp1, "a"), 0o644))
212 | assert.NoError(t, writeFile([]byte("b"), filepath.Join(wp1, "b")))
213 | assert.NoError(t, chmod(filepath.Join(wp1, "b"), 0o600))
214 | assert.NoError(t, writeFile([]byte("c"), filepath.Join(wp1, "c")))
215 | assert.NoError(t, chmod(filepath.Join(wp1, "c"), 0o755))
216 |
217 | assert.NoError(t, InitWorkspace(wp1, repo))
218 | config, _ := LoadConfig(wp1)
219 | mngr1, _ := NewArtifactManager(config)
220 | assert.NoError(t, mngr1.Push(PushOptions{}))
221 |
222 | assert.NoError(t, InitWorkspace(wp2, repo))
223 | config, _ = LoadConfig(wp2)
224 | mngr2, _ := NewArtifactManager(config)
225 | assert.NoError(t, mngr2.Pull(PullOptions{}))
226 |
227 | mode, _ := readFileMode(filepath.Join(wp2, "a"))
228 | assert.Equal(t, 0o644, int(mode))
229 | mode, _ = readFileMode(filepath.Join(wp2, "b"))
230 | assert.Equal(t, 0o600, int(mode))
231 | mode, _ = readFileMode(filepath.Join(wp2, "c"))
232 | assert.Equal(t, 0o755, int(mode))
233 |
234 | // Second versio n
235 | // a = "a" 644 => a = "a" 755
236 | // b = "b" 600 => bb = "b" 600
237 | // c = "c" 755 => cc = "c" 700
238 | // (new) d = "d" 755
239 | assert.NoError(t, chmod(filepath.Join(wp1, "a"), 0o755))
240 | assert.NoError(t, renameFile(filepath.Join(wp1, "b"), filepath.Join(wp1, "bb")))
241 | assert.NoError(t, renameFile(filepath.Join(wp1, "c"), filepath.Join(wp1, "cc")))
242 | assert.NoError(t, chmod(filepath.Join(wp1, "cc"), 0o700))
243 | assert.NoError(t, writeFile([]byte("d"), filepath.Join(wp1, "d")))
244 | assert.NoError(t, chmod(filepath.Join(wp1, "d"), 0o755))
245 |
246 | assert.NoError(t, mngr1.Push(PushOptions{}))
247 | assert.NoError(t, mngr2.Pull(PullOptions{Delete: true}))
248 |
249 | mode, _ = readFileMode(filepath.Join(wp2, "a"))
250 | assert.Equal(t, 0o755, int(mode))
251 | mode, _ = readFileMode(filepath.Join(wp2, "bb"))
252 | assert.Equal(t, 0o600, int(mode))
253 | mode, _ = readFileMode(filepath.Join(wp2, "cc"))
254 | assert.Equal(t, 0o700, int(mode))
255 | mode, _ = readFileMode(filepath.Join(wp2, "d"))
256 | assert.Equal(t, 0o755, int(mode))
257 | }
258 |
--------------------------------------------------------------------------------
/internal/core/types.go:
--------------------------------------------------------------------------------
1 | package core
2 |
3 | import (
4 | "encoding/json"
5 | "fmt"
6 | "io/fs"
7 | "os"
8 | "path/filepath"
9 | "time"
10 | )
11 |
12 | const (
13 | RefLocal = "__local__"
14 | RefLatest = "latest"
15 | )
16 |
17 | type BlobMetaData struct {
18 | Path string `json:"path"`
19 | Hash string `json:"hash,omitempty"`
20 | Link string `json:"link,omitempty"`
21 | Mode fs.FileMode `json:"mode"`
22 | Size int64 `json:"size"`
23 | }
24 |
25 | type Commit struct {
26 | CreatedAt time.Time `json:"createdAt"`
27 | Parent string `json:"parent,omitempty"`
28 | Message *string `json:"messaage,omitempty"`
29 | Blobs []BlobMetaData `json:"blobs"`
30 | }
31 |
32 | type PushOptions struct {
33 | DryRun bool
34 | Message *string
35 | Tag *string
36 | }
37 |
38 | type ChangeMode int
39 |
40 | type PullOptions struct {
41 | DryRun bool
42 | NoFetch bool
43 | Delete bool
44 | RefOrCommit *string
45 | FileFilter PathFilter
46 | }
47 |
48 | type PathFilter func(path string) bool
49 |
50 | type DiffOptions struct {
51 | LeftRef string
52 | LeftCommit *Commit
53 | RightRef string
54 | RightCommit *Commit
55 | AddFilter PathFilter
56 | ChangeFilter PathFilter
57 | DeleteFilter PathFilter
58 | IncludeFilter PathFilter
59 | NoDelete bool
60 | }
61 |
62 | type DiffType int
63 |
64 | const (
65 | DiffTypeAdd DiffType = iota
66 | DiffTypeDelete
67 | DiffTypeChange
68 | DiffTypeRename
69 | )
70 |
71 | type DiffRecord struct {
72 | Type DiffType
73 | Hash string
74 | Link string
75 | Path string
76 | Size int64
77 | Mode fs.FileMode
78 | OldPath string
79 | OldLink string
80 | OldHash string
81 | OldSize int64
82 | OldMode fs.FileMode
83 | }
84 |
85 | type DiffResult struct {
86 | Records []DiffRecord
87 | }
88 |
89 | type BlobDownloadResult struct {
90 | // File not changed. Skip the download
91 | Skip bool
92 | }
93 |
94 | type BlobUploadResult struct {
95 | // Blob exists in ther repo. Skip the upload
96 | Skip bool
97 | }
98 |
99 | func MakeBlobMetadata(baseDir string, path string) (BlobMetaData, error) {
100 | fullPath := filepath.Join(baseDir, path)
101 | info, err := os.Lstat(fullPath)
102 | if err != nil {
103 | return BlobMetaData{}, err
104 | }
105 |
106 | if info.Mode()&fs.ModeSymlink != 0 {
107 | link, err := os.Readlink(fullPath)
108 | if err != nil {
109 | return BlobMetaData{}, err
110 | }
111 |
112 | return BlobMetaData{
113 | Path: path,
114 | Link: link,
115 | Mode: 0,
116 | }, nil
117 | } else if info.Mode().IsRegular() {
118 | hash, _ := Sha1SumFromFile(fullPath)
119 | return BlobMetaData{
120 | Path: path,
121 | Hash: hash,
122 | Mode: info.Mode().Perm(),
123 | Size: info.Size(),
124 | }, nil
125 | } else {
126 | fmt.Printf("%s %s\n", info.Mode(), info.Name())
127 | return BlobMetaData{}, os.ErrInvalid
128 | }
129 | }
130 |
131 | func MakeCommitMetadata(commit *Commit) ([]byte, string) {
132 | jsondata, _ := json.Marshal(commit)
133 | hash := Sha1Sum(jsondata)
134 | return jsondata, hash
135 | }
136 |
--------------------------------------------------------------------------------
/internal/core/utils.go:
--------------------------------------------------------------------------------
1 | package core
2 |
3 | import (
4 | "compress/gzip"
5 | "fmt"
6 | "io"
7 | "io/fs"
8 | "io/ioutil"
9 | "os"
10 | "path/filepath"
11 | )
12 |
13 | func MakeObjectPath(hash string) string {
14 | return fmt.Sprintf("objects/%s/%s", hash[:2], hash[2:])
15 | }
16 |
17 | func MakeCommitPath(hash string) string {
18 | return fmt.Sprintf("commits/%s", hash)
19 | }
20 |
21 | func MakeRefPath(ref string) string {
22 | return fmt.Sprintf("refs/%s", ref)
23 | }
24 |
25 | func MakeTagPath(ref string) string {
26 | return fmt.Sprintf("refs/tags/%s", ref)
27 | }
28 |
29 | func mkdirsForFile(file string) error {
30 | return os.MkdirAll(filepath.Dir(file), fs.ModePerm)
31 | }
32 |
33 | func removeEmptyDirs(dir string, removeSelf bool) (bool, error) {
34 | var hasEntries bool
35 |
36 | entires, err := os.ReadDir(dir)
37 | if err != nil {
38 | return false, err
39 | }
40 | for _, entry := range entires {
41 | if entry.IsDir() {
42 | subdir := filepath.Join(dir, entry.Name())
43 | removed, err := removeEmptyDirs(subdir, true)
44 | if err != nil {
45 | return false, err
46 | }
47 | if !removed {
48 | hasEntries = true
49 | }
50 | } else {
51 | hasEntries = true
52 | }
53 | }
54 |
55 | if !hasEntries && removeSelf {
56 | err := os.Remove(dir)
57 | if err != nil {
58 | return false, err
59 | }
60 | return true, nil
61 | }
62 |
63 | return false, nil
64 | }
65 |
66 | func writeFile(content []byte, dst string) error {
67 | err := os.MkdirAll(filepath.Dir(dst), fs.ModePerm)
68 | if err != nil {
69 | return err
70 | }
71 |
72 | err = ioutil.WriteFile(dst, content, 0o644)
73 | return err
74 | }
75 |
76 | func readFile(src string) ([]byte, error) {
77 | return ioutil.ReadFile(src)
78 | }
79 |
80 | func readFileMode(src string) (fs.FileMode, error) {
81 | info, err := os.Lstat(src)
82 | if err != nil {
83 | return 0, err
84 | }
85 |
86 | return info.Mode(), err
87 | }
88 |
89 | func writeGzipFile(content []byte, dst string) error {
90 | err := os.MkdirAll(filepath.Dir(dst), fs.ModePerm)
91 | if err != nil {
92 | return err
93 | }
94 |
95 | file, err := os.Create(dst)
96 | if err != nil {
97 | return err
98 | }
99 | gfile := gzip.NewWriter(file)
100 | defer gfile.Close()
101 | _, err = gfile.Write(content)
102 | return err
103 | }
104 |
105 | func readGzipFile(src string) ([]byte, error) {
106 | file, err := os.Open(src)
107 | if err != nil {
108 | return nil, err
109 | }
110 | gfile, err := gzip.NewReader(file)
111 | if err != nil {
112 | return nil, err
113 | }
114 | defer gfile.Close()
115 |
116 | return io.ReadAll(gfile)
117 | }
118 |
119 | func deleteFile(src string) error {
120 | return os.Remove(src)
121 | }
122 |
123 | func renameFile(src, dst string) error {
124 | err := os.MkdirAll(filepath.Dir(dst), fs.ModePerm)
125 | if err != nil {
126 | return err
127 | }
128 |
129 | return os.Rename(src, dst)
130 | }
131 |
132 | func symlinkFile(target, src string) error {
133 | return os.Symlink(target, src)
134 | }
135 |
136 | func readlinkFile(src string) (string, error) {
137 | return os.Readlink(src)
138 | }
139 |
140 | func chmod(src string, mode fs.FileMode) error {
141 | return os.Chmod(src, mode)
142 | }
143 |
--------------------------------------------------------------------------------
/internal/core/utils_test.go:
--------------------------------------------------------------------------------
1 | package core
2 |
3 | import (
4 | "os"
5 | "testing"
6 |
7 | "github.com/stretchr/testify/assert"
8 | )
9 |
10 | func TestRemoveEmptyDirs(t *testing.T) {
11 | tmpDir := t.TempDir()
12 |
13 | assert.NoError(t, os.Mkdir(tmpDir+"/a", os.ModePerm))
14 | removed, err := removeEmptyDirs(tmpDir+"/a", true)
15 | assert.True(t, removed)
16 | assert.Empty(t, err)
17 |
18 | assert.NoError(t, os.Mkdir(tmpDir+"/b", os.ModePerm))
19 | assert.NoError(t, writeFile([]byte("hello"), tmpDir+"/b/hello"))
20 | removed, err = removeEmptyDirs(tmpDir+"/b", true)
21 | assert.False(t, removed)
22 | assert.Empty(t, err)
23 |
24 | assert.NoError(t, os.MkdirAll(tmpDir+"/c/c/c/c/a", os.ModePerm))
25 | assert.NoError(t, os.MkdirAll(tmpDir+"/c/c/c/c/b", os.ModePerm))
26 | assert.NoError(t, os.MkdirAll(tmpDir+"/c/c/c/c/c", os.ModePerm))
27 | assert.NoError(t, writeFile([]byte("hello"), tmpDir+"/c/c/a"))
28 | removed, err = removeEmptyDirs(tmpDir+"/c", true)
29 | assert.Empty(t, err)
30 | assert.False(t, removed)
31 | stat, err := os.Stat(tmpDir + "/c/c/a")
32 | assert.Equal(t, "a", stat.Name())
33 | assert.Empty(t, err)
34 | _, err = os.Stat("/c/c/c")
35 | assert.Error(t, err)
36 | }
37 |
--------------------------------------------------------------------------------
/internal/executor/executor.go:
--------------------------------------------------------------------------------
1 | package executor
2 |
3 | import (
4 | "context"
5 | "runtime"
6 | "sync"
7 | )
8 |
9 | type TaskFunc func(ctx context.Context) error
10 |
11 | func ExecuteAll(numCPU int, tasks ...TaskFunc) error {
12 | var err error
13 | ctx, cancel := context.WithCancel(context.Background())
14 | defer cancel()
15 |
16 | if numCPU == 0 {
17 | numCPU = runtime.NumCPU()
18 | }
19 |
20 | wg := sync.WaitGroup{}
21 | wg.Add(numCPU)
22 |
23 | queue := make(chan TaskFunc, len(tasks))
24 | // Add tasks to queue
25 | for _, task := range tasks {
26 | queue <- task
27 | }
28 | close(queue)
29 |
30 | // Spawn the executer
31 | for i := 0; i < numCPU; i++ {
32 | go func() {
33 | defer wg.Done()
34 | for {
35 | select {
36 | case task, ok := <-queue:
37 | if ctx.Err() != nil || !ok {
38 | return
39 | }
40 | if e := task(ctx); e != nil {
41 | err = e
42 | cancel()
43 | }
44 | case <-ctx.Done():
45 | return
46 | }
47 | }
48 | }()
49 | }
50 |
51 | // wait for all task done
52 | wg.Wait()
53 | return err
54 | }
55 |
--------------------------------------------------------------------------------
/internal/executor/executor_test.go:
--------------------------------------------------------------------------------
1 | package executor
2 |
3 | import (
4 | "context"
5 | "errors"
6 | "math/rand"
7 | "runtime"
8 | "sync/atomic"
9 | "testing"
10 | "time"
11 |
12 | "github.com/stretchr/testify/assert"
13 | )
14 |
15 | func TestHappyPath(t *testing.T) {
16 | var str1, str2 *string
17 |
18 | task1 := func(ctx context.Context) error {
19 | str := "foo"
20 | str1 = &str
21 | return nil
22 | }
23 |
24 | task2 := func(ctx context.Context) error {
25 | str := "bar"
26 | str2 = &str
27 | return nil
28 | }
29 |
30 | err := ExecuteAll(runtime.NumCPU(), task1, task2)
31 | assert.Empty(t, err)
32 | assert.Equal(t, "foo", *str1)
33 | assert.Equal(t, "bar", *str2)
34 | }
35 |
36 | func TestFailedPath(t *testing.T) {
37 | ErrFoo := errors.New("foo")
38 |
39 | taskOk := func(ctx context.Context) error {
40 | return nil
41 | }
42 |
43 | taskErr := func(ctx context.Context) error {
44 | return ErrFoo
45 | }
46 |
47 | err := ExecuteAll(runtime.NumCPU(), taskOk, taskErr)
48 | assert.Equal(t, ErrFoo, err)
49 |
50 | err = ExecuteAll(runtime.NumCPU(), taskErr, taskOk)
51 | assert.Equal(t, ErrFoo, err)
52 | }
53 |
54 | func TestConcurrent(t *testing.T) {
55 | tasks := []TaskFunc{}
56 | var counter int32
57 |
58 | for i := 0; i < 100; i++ {
59 | f := func(ctx context.Context) error {
60 | time.Sleep(time.Duration(rand.Intn(50)) * time.Millisecond)
61 | atomic.AddInt32(&counter, 1)
62 | return nil
63 | }
64 | tasks = append(tasks, f)
65 |
66 | }
67 |
68 | err := ExecuteAll(50, tasks...)
69 | assert.Empty(t, err)
70 | assert.Equal(t, int32(100), counter)
71 | }
72 |
73 | func TestContext(t *testing.T) {
74 | ErrFoo := errors.New("foo")
75 |
76 | taskForever := func(ctx context.Context) error {
77 | <-ctx.Done()
78 | return nil
79 | }
80 |
81 | taskErr := func(ctx context.Context) error {
82 | return ErrFoo
83 | }
84 |
85 | err := ExecuteAll(3, taskForever, taskErr)
86 | assert.Equal(t, ErrFoo, err)
87 | }
88 |
--------------------------------------------------------------------------------
/internal/log/log.go:
--------------------------------------------------------------------------------
1 | package log
2 |
3 | import (
4 | "log"
5 | "os"
6 | )
7 |
8 | var logger *log.Logger
9 |
10 | func SetDebug(debug bool) {
11 | if debug {
12 | logger = log.New(os.Stderr, "[DBG] ", log.Ldate|log.Lmicroseconds)
13 | } else {
14 | logger = nil
15 | }
16 | }
17 |
18 | func Debug(v ...interface{}) {
19 | if logger == nil {
20 | return
21 | }
22 |
23 | logger.Print(v...)
24 | }
25 |
26 | func Debugf(format string, v ...interface{}) {
27 | if logger == nil {
28 | return
29 | }
30 |
31 | logger.Printf(format, v...)
32 | }
33 |
34 | func Debugln(v ...interface{}) {
35 | if logger == nil {
36 | return
37 | }
38 |
39 | logger.Println(v...)
40 | }
41 |
--------------------------------------------------------------------------------
/internal/repository/azureblob.go:
--------------------------------------------------------------------------------
1 | package repository
2 |
3 | import (
4 | "context"
5 | "errors"
6 | "fmt"
7 | neturl "net/url"
8 | "os"
9 | "path/filepath"
10 | "strings"
11 |
12 | "github.com/Azure/azure-sdk-for-go/sdk/azidentity"
13 | "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob"
14 | "github.com/infuseai/artivc/internal/log"
15 | )
16 |
17 | type AzureBlobRepository struct {
18 | Client *azblob.ContainerClient
19 | Prefix string
20 | BasePath string
21 | }
22 |
23 | func IsAzureStorageUrl(repoUrl string) bool {
24 | url, err := neturl.Parse(repoUrl)
25 | if err != nil {
26 | return false
27 | }
28 |
29 | return strings.HasSuffix(url.Host, ".blob.core.windows.net")
30 | }
31 |
32 | func ParseAzureBlobUrl(urlString string) (storageAccount, container, prefix string, err error) {
33 | url, err := neturl.Parse(urlString)
34 | if err != nil {
35 | return
36 | }
37 |
38 | storageAccount = url.Host[:len(url.Host)-len(".blob.core.windows.net")]
39 | comps := strings.Split(url.Path, "/")
40 | if len(comps) < 2 {
41 | err = fmt.Errorf("invalid azure blob url: " + urlString)
42 | return
43 | }
44 |
45 | container = comps[1]
46 | prefix = strings.Join(comps[2:], "/")
47 |
48 | return
49 | }
50 |
51 | func makeAzureServiceAccountUrl(accountName string) string {
52 | return fmt.Sprintf("https://%s.blob.core.windows.net/", accountName)
53 | }
54 |
55 | func NewAzureBlobRepository(repo string) (*AzureBlobRepository, error) {
56 | accountName, container, prefix, err := ParseAzureBlobUrl(repo)
57 | if err != nil {
58 | return nil, err
59 | }
60 |
61 | serviceUrl := makeAzureServiceAccountUrl(accountName)
62 | var serviceClient azblob.ServiceClient
63 |
64 | var accountKey string
65 | if value := os.Getenv("AZURE_STORAGE_ACCOUNT_KEY"); value != "" {
66 | accountKey = value
67 | }
68 |
69 | if accountKey != "" {
70 | credential, err := azblob.NewSharedKeyCredential(accountName, accountKey)
71 | if err != nil {
72 | return nil, err
73 | }
74 |
75 | serviceClient, err = azblob.NewServiceClientWithSharedKey(serviceUrl, credential, nil)
76 | if err != nil {
77 | return nil, err
78 | }
79 | } else {
80 | credential, err := azidentity.NewDefaultAzureCredential(nil)
81 | if err != nil {
82 | return nil, err
83 | }
84 |
85 | serviceClient, err = azblob.NewServiceClient(serviceUrl, credential, nil)
86 | if err != nil {
87 | return nil, err
88 | }
89 | }
90 |
91 | containerClient := serviceClient.NewContainerClient(container)
92 |
93 | r := &AzureBlobRepository{
94 | Client: &containerClient,
95 | BasePath: repo,
96 | Prefix: prefix,
97 | }
98 |
99 | // check if the client has enough permission
100 | dir, err := os.MkdirTemp("", "artivc-azblob-*")
101 | if err != nil {
102 | return nil, err
103 | }
104 | defer os.RemoveAll(dir) // clean up
105 |
106 | err = r.Download("refs/latest", filepath.Join(dir, "latest"), nil)
107 | if err != nil {
108 | var internalError *azblob.InternalError
109 | if !errors.As(err, &internalError) {
110 | return nil, err
111 | }
112 |
113 | var errStorage *azblob.StorageError
114 | if !internalError.As(&errStorage) {
115 | return nil, internalError
116 | }
117 |
118 | if errStorage.ErrorCode == azblob.StorageErrorCodeBlobNotFound {
119 | // blob not found. but authentication/authorization check is ok. PASS
120 | } else if errStorage.ErrorCode == azblob.StorageErrorCodeAuthorizationPermissionMismatch {
121 | // authorization permission mismatch
122 | log.Debugln(errStorage.Error())
123 | fmt.Fprintf(os.Stderr, "Authorization permission mismatch. Please assign 'Storage Blob Data Contributor' role to the logged-in account in the storage account '%s'\n", accountName)
124 | fmt.Fprintln(os.Stderr, "Please see https://docs.microsoft.com/azure/storage/blobs/assign-azure-role-data-access")
125 | fmt.Fprintln(os.Stderr, "")
126 | return nil, fmt.Errorf("authorization permission mismatch")
127 | } else {
128 | // other error
129 | return nil, errStorage
130 | }
131 | }
132 |
133 | return r, nil
134 | }
135 |
136 | func (repo *AzureBlobRepository) Upload(localPath, repoPath string, m *Meter) error {
137 | ctx := context.Background()
138 |
139 | // file
140 | src, err := os.Open(localPath)
141 | if err != nil {
142 | return err
143 | }
144 | defer src.Close()
145 |
146 | // upload
147 | blobPath := filepath.Join(repo.Prefix, repoPath)
148 | blobClient := repo.Client.NewBlockBlobClient(blobPath)
149 |
150 | _, err = blobClient.UploadFileToBlockBlob(
151 | ctx,
152 | src,
153 | azblob.HighLevelUploadToBlockBlobOption{
154 | Progress: func(bytesTransferred int64) {
155 | if m != nil {
156 | m.SetBytes(bytesTransferred)
157 | }
158 | },
159 | Parallelism: 10,
160 | },
161 | )
162 |
163 | return err
164 | }
165 |
166 | func (repo *AzureBlobRepository) Download(repoPath, localPath string, m *Meter) error {
167 | ctx := context.Background()
168 |
169 | // file
170 | dest, err := os.Create(localPath)
171 | if err != nil {
172 | return err
173 | }
174 | defer dest.Close()
175 |
176 | // download
177 | blobPath := filepath.Join(repo.Prefix, repoPath)
178 | blobClient := repo.Client.NewBlockBlobClient(blobPath)
179 | err = blobClient.DownloadBlobToFile(ctx, 0, 0, dest, azblob.HighLevelDownloadFromBlobOptions{
180 | Progress: func(bytesTransferred int64) {
181 | if m != nil {
182 | m.SetBytes(bytesTransferred)
183 | }
184 | },
185 | Parallelism: 10,
186 | })
187 | if err != nil {
188 | return err
189 | }
190 |
191 | return nil
192 | }
193 |
194 | func (repo *AzureBlobRepository) Delete(repoPath string) error {
195 | ctx := context.Background()
196 |
197 | blobPath := filepath.Join(repo.Prefix, repoPath)
198 | blobClient := repo.Client.NewBlockBlobClient(blobPath)
199 | _, err := blobClient.Delete(ctx, nil)
200 | if err != nil {
201 | return err
202 | }
203 |
204 | return nil
205 | }
206 |
207 | func (repo *AzureBlobRepository) Stat(repoPath string) (FileInfo, error) {
208 | ctx := context.Background()
209 |
210 | blobPath := filepath.Join(repo.Prefix, repoPath)
211 | blobClient := repo.Client.NewBlockBlobClient(blobPath)
212 | _, err := blobClient.GetProperties(ctx, nil)
213 | if err != nil {
214 | return nil, err
215 | }
216 |
217 | return &SimpleFileInfo{
218 | name: filepath.Base(repoPath),
219 | }, nil
220 | }
221 |
222 | func (repo *AzureBlobRepository) List(repoPath string) ([]FileInfo, error) {
223 | ctx := context.Background()
224 | entries := make([]FileInfo, 0)
225 | prefix := filepath.Join(repo.Prefix, repoPath) + "/"
226 | pager := repo.Client.ListBlobsHierarchy("/", &azblob.ContainerListBlobHierarchySegmentOptions{Prefix: &prefix})
227 | for pager.NextPage(ctx) {
228 | resp := pager.PageResponse()
229 |
230 | for _, blobInfo := range resp.Segment.BlobItems {
231 | n := *blobInfo.Name
232 | name := n[len(prefix):]
233 | entries = append(entries, &SimpleFileInfo{
234 | name: name,
235 | isDir: false,
236 | })
237 | }
238 |
239 | for _, blobPrefix := range resp.Segment.BlobPrefixes {
240 | p := *blobPrefix.Name
241 | name := p[len(prefix) : len((p))-1]
242 | entries = append(entries, &SimpleFileInfo{
243 | name: name,
244 | isDir: true,
245 | })
246 | }
247 | }
248 |
249 | return entries, nil
250 | }
251 |
--------------------------------------------------------------------------------
/internal/repository/azureblob_test.go:
--------------------------------------------------------------------------------
1 | package repository
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/stretchr/testify/assert"
7 | )
8 |
9 | func Test_ParseAzureBlobUrl(t *testing.T) {
10 | testCases := []struct {
11 | repo string
12 | storageAccount string
13 | container string
14 | prefix string
15 | }{
16 | {
17 | repo: "https://artivc.blob.core.windows.net/avc",
18 | storageAccount: "artivc",
19 | container: "avc",
20 | prefix: "",
21 | },
22 | {
23 | repo: "https://artivc.blob.core.windows.net/avc/",
24 | storageAccount: "artivc",
25 | container: "avc",
26 | prefix: "",
27 | },
28 | {
29 | repo: "https://artivc.blob.core.windows.net/avc/abc",
30 | storageAccount: "artivc",
31 | container: "avc",
32 | prefix: "abc",
33 | },
34 | {
35 | repo: "https://artivc.blob.core.windows.net/avc/abc/",
36 | storageAccount: "artivc",
37 | container: "avc",
38 | prefix: "abc/",
39 | },
40 | }
41 | for _, tC := range testCases {
42 | t.Run(tC.repo, func(t *testing.T) {
43 | storageAccount, container, prefix, err := ParseAzureBlobUrl(tC.repo)
44 | if err != nil {
45 | t.Error(err)
46 | return
47 | }
48 |
49 | assert.Equal(t, tC.storageAccount, storageAccount)
50 | assert.Equal(t, tC.container, container)
51 | assert.Equal(t, tC.prefix, prefix)
52 |
53 | })
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/internal/repository/errors.go:
--------------------------------------------------------------------------------
1 | package repository
2 |
3 | import "errors"
4 |
5 | var ErrUnsupportedRepository = errors.New("Unsupported repository")
6 |
7 | type UnsupportedRepositoryError struct {
8 | Message string
9 | }
10 |
11 | func (err UnsupportedRepositoryError) Error() string {
12 | return err.Message
13 | }
14 |
--------------------------------------------------------------------------------
/internal/repository/gcs.go:
--------------------------------------------------------------------------------
1 | package repository
2 |
3 | import (
4 | "context"
5 | "os"
6 | "path/filepath"
7 | "strings"
8 |
9 | "cloud.google.com/go/storage"
10 | "google.golang.org/api/iterator"
11 | )
12 |
13 | // Local Filesystem
14 | type GCSRepository struct {
15 | Bucket string
16 | BasePath string
17 | Client *storage.Client
18 | }
19 |
20 | func NewGCSRepository(bucket, basePath string) (*GCSRepository, error) {
21 | ctx := context.Background()
22 | basePath = strings.TrimPrefix(basePath, "/")
23 | client, err := storage.NewClient(ctx)
24 | if err != nil {
25 | return nil, err
26 | }
27 |
28 | return &GCSRepository{
29 | Bucket: bucket,
30 | BasePath: basePath,
31 | Client: client,
32 | }, nil
33 | }
34 |
35 | func (repo *GCSRepository) Upload(localPath, repoPath string, m *Meter) error {
36 | ctx := context.Background()
37 |
38 | // client, bucket, obj
39 | client := repo.Client
40 | bkt := client.Bucket(repo.Bucket)
41 | obj := bkt.Object(filepath.Join(repo.BasePath, repoPath))
42 |
43 | // src
44 | src, err := os.Open(localPath)
45 | if err != nil {
46 | return err
47 | }
48 | defer src.Close()
49 |
50 | // dest
51 |
52 | dest := obj.NewWriter(ctx)
53 | defer dest.Close()
54 |
55 | // copy
56 | _, err = CopyWithMeter(dest, src, m)
57 | if err != nil {
58 | return err
59 | }
60 |
61 | return nil
62 | }
63 |
64 | func (repo *GCSRepository) Download(repoPath, localPath string, m *Meter) error {
65 | ctx := context.Background()
66 |
67 | // client, bucket, obj
68 | client := repo.Client
69 | bkt := client.Bucket(repo.Bucket)
70 | obj := bkt.Object(filepath.Join(repo.BasePath, repoPath))
71 |
72 | // src
73 | src, err := obj.NewReader(ctx)
74 | if err != nil {
75 | return err
76 | }
77 | defer src.Close()
78 |
79 | // dest
80 | dest, err := os.Create(localPath)
81 | if err != nil {
82 | return err
83 | }
84 | defer dest.Close()
85 |
86 | // copy
87 | _, err = CopyWithMeter(dest, src, m)
88 | if err != nil {
89 | return err
90 | }
91 |
92 | return nil
93 | }
94 |
95 | func (repo *GCSRepository) Delete(repoPath string) error {
96 | ctx := context.Background()
97 |
98 | // client, bucket, obj
99 | client := repo.Client
100 | bkt := client.Bucket(repo.Bucket)
101 | obj := bkt.Object(filepath.Join(repo.BasePath, repoPath))
102 |
103 | // delete
104 | err := obj.Delete(ctx)
105 | if err != nil {
106 | return err
107 | }
108 |
109 | return nil
110 | }
111 |
112 | func (repo *GCSRepository) Stat(repoPath string) (FileInfo, error) {
113 | ctx := context.Background()
114 |
115 | // client, bucket, obj
116 | client := repo.Client
117 | bkt := client.Bucket(repo.Bucket)
118 | obj := bkt.Object(filepath.Join(repo.BasePath, repoPath))
119 |
120 | // get object stat
121 | _, err := obj.Attrs(ctx)
122 | if err != nil {
123 | return nil, err
124 | }
125 |
126 | return &GCSFileInfo{
127 | name: filepath.Base(repoPath),
128 | isDir: false,
129 | }, nil
130 | }
131 |
132 | func (repo *GCSRepository) List(repoPath string) ([]FileInfo, error) {
133 | ctx := context.Background()
134 | records := []FileInfo{}
135 |
136 | // client, bucket, obj
137 | client := repo.Client
138 | bkt := client.Bucket(repo.Bucket)
139 | prefix := filepath.Join(repo.BasePath, repoPath) + "/"
140 | query := &storage.Query{Prefix: prefix, Delimiter: "/"}
141 |
142 | it := bkt.Objects(ctx, query)
143 | for {
144 | attrs, err := it.Next()
145 | if err == iterator.Done {
146 | break
147 | }
148 | if err != nil {
149 | return records, err
150 | }
151 |
152 | fileinfo := GCSFileInfo{}
153 |
154 | if attrs.Name != "" {
155 | fileinfo.name = attrs.Name[len(prefix):]
156 | fileinfo.isDir = false
157 | } else {
158 | fileinfo.name = attrs.Prefix[len(prefix) : len(attrs.Prefix)-1]
159 | fileinfo.isDir = true
160 | }
161 | records = append(records, &fileinfo)
162 | }
163 |
164 | return records, nil
165 | }
166 |
167 | type GCSFileInfo struct {
168 | name string
169 | isDir bool
170 | }
171 |
172 | func (fi *GCSFileInfo) Name() string {
173 | return fi.name
174 | }
175 |
176 | func (fi *GCSFileInfo) IsDir() bool {
177 | return fi.isDir
178 | }
179 |
--------------------------------------------------------------------------------
/internal/repository/http.go:
--------------------------------------------------------------------------------
1 | package repository
2 |
3 | import (
4 | "errors"
5 | "fmt"
6 | "net/http"
7 | "net/url"
8 | "os"
9 | "strings"
10 | "time"
11 | )
12 |
13 | type HttpRepository struct {
14 | RepoUrl string
15 | }
16 |
17 | func NewHttpRepository(repo string) (*HttpRepository, error) {
18 | if !strings.HasSuffix(repo, "/") {
19 | repo += "/"
20 | }
21 |
22 | return &HttpRepository{
23 | RepoUrl: repo,
24 | }, nil
25 | }
26 |
27 | func (repo *HttpRepository) Upload(localPath, repoPath string, meter *Meter) error {
28 | return errors.New("Upload is not supported in Http repository")
29 | }
30 |
31 | func (repo *HttpRepository) Download(repoPath, localPath string, m *Meter) error {
32 | filePath, err := getFilePath(repo.RepoUrl, repoPath)
33 | if err != nil {
34 | return err
35 | }
36 |
37 | res, err := http.Get(filePath)
38 | if err != nil {
39 | retry := 0
40 | msg := err.Error()
41 |
42 | for err != nil && strings.HasSuffix(msg, "connection reset by peer") && retry < 10 {
43 | retry++
44 | time.Sleep(time.Millisecond * 50 * time.Duration(retry))
45 | res, err = http.Get(filePath)
46 | }
47 |
48 | if err != nil {
49 | return err
50 | }
51 | }
52 | defer res.Body.Close()
53 |
54 | if res.StatusCode != 200 {
55 | return fmt.Errorf("status code: %d", res.StatusCode)
56 | }
57 |
58 | outputFile, err := os.Create(localPath)
59 | if err != nil {
60 | return err
61 | }
62 | defer outputFile.Close()
63 |
64 | _, err = CopyWithMeter(outputFile, res.Body, m)
65 | return err
66 | }
67 |
68 | func (repo *HttpRepository) Delete(repoPath string) error {
69 | return errors.New("Delete is not supported in Http repository")
70 | }
71 |
72 | func (repo *HttpRepository) Stat(repoPath string) (FileInfo, error) {
73 | filePath, err := getFilePath(repo.RepoUrl, repoPath)
74 | if err != nil {
75 | return nil, err
76 | }
77 |
78 | res, err := http.Head(filePath)
79 | if err != nil {
80 | return nil, err
81 | }
82 | defer res.Body.Close()
83 |
84 | if res.StatusCode != 200 {
85 | return nil, fmt.Errorf("status code: %d", res.StatusCode)
86 | }
87 |
88 | info := &HttpFileInfo{
89 | name: repoPath,
90 | }
91 |
92 | return info, nil
93 | }
94 |
95 | func (repo *HttpRepository) List(repoPath string) ([]FileInfo, error) {
96 | return nil, errors.New("List is not supported in Http repository")
97 | }
98 |
99 | func getFilePath(repoPath, filePath string) (string, error) {
100 | base, err := url.Parse(repoPath)
101 | if err != nil {
102 | return "", err
103 | }
104 | path, err := url.Parse(filePath)
105 | if err != nil {
106 | return "", err
107 | }
108 | return base.ResolveReference(path).String(), nil
109 | }
110 |
111 | type HttpFileInfo struct {
112 | name string
113 | }
114 |
115 | func (info *HttpFileInfo) Name() string {
116 | return info.name
117 | }
118 |
119 | func (info *HttpFileInfo) IsDir() bool {
120 | return false
121 | }
122 |
--------------------------------------------------------------------------------
/internal/repository/local.go:
--------------------------------------------------------------------------------
1 | package repository
2 |
3 | import (
4 | "errors"
5 | "fmt"
6 | "io/fs"
7 | "os"
8 | "path"
9 | "path/filepath"
10 | )
11 |
12 | // Local Filesystem
13 | type LocalFileSystemRepository struct {
14 | RepoDir string
15 | }
16 |
17 | func NewLocalFileSystemRepository(repoDir string) (*LocalFileSystemRepository, error) {
18 | stat, err := os.Stat(repoDir)
19 | if err != nil {
20 | if os.IsNotExist(err) {
21 | err = os.Mkdir(repoDir, fs.ModePerm)
22 | if err != nil {
23 | return nil, errors.New("cannot make directory: " + repoDir)
24 | }
25 | } else {
26 | return nil, err
27 | }
28 | } else {
29 | if !stat.IsDir() {
30 | return nil, errors.New(repoDir + " is not a directory")
31 | }
32 | }
33 |
34 | return &LocalFileSystemRepository{
35 | RepoDir: repoDir,
36 | }, nil
37 | }
38 |
39 | func (repo *LocalFileSystemRepository) Upload(localPath, repoPath string, m *Meter) error {
40 | sourceFileStat, err := os.Stat(localPath)
41 | if err != nil {
42 | return err
43 | }
44 |
45 | if !sourceFileStat.Mode().IsRegular() {
46 | return fmt.Errorf("%s is not a regular file", localPath)
47 | }
48 |
49 | source, err := os.Open(localPath)
50 | if err != nil {
51 | return err
52 | }
53 | defer source.Close()
54 |
55 | // Copy from source to tmp
56 | tmpDir := path.Join(repo.RepoDir, "tmp")
57 | err = os.MkdirAll(tmpDir, fs.ModePerm)
58 | if err != nil {
59 | return err
60 | }
61 |
62 | tmp, err := os.CreateTemp(tmpDir, "*")
63 | if err != nil {
64 | return err
65 | }
66 | tmpPath := tmp.Name()
67 | defer os.Remove(tmpPath)
68 | _, err = CopyWithMeter(tmp, source, m)
69 | if err != nil {
70 | return err
71 | }
72 | err = tmp.Close()
73 | if err != nil {
74 | return err
75 | }
76 |
77 | // Move from tmp to dest
78 | destPath := path.Join(repo.RepoDir, repoPath)
79 | err = os.MkdirAll(filepath.Dir(destPath), fs.ModePerm)
80 | if err != nil {
81 | return err
82 | }
83 | err = os.Remove(destPath)
84 | if err != nil && !os.IsNotExist(err) {
85 | return err
86 | }
87 |
88 | err = os.Rename(tmpPath, destPath)
89 | if err != nil {
90 | return err
91 | }
92 |
93 | return nil
94 | }
95 |
96 | func (repo *LocalFileSystemRepository) Download(repoPath, localPath string, m *Meter) error {
97 | srcPath := path.Join(repo.RepoDir, repoPath)
98 | src, err := os.Open(srcPath)
99 | if err != nil {
100 | return err
101 | }
102 | defer src.Close()
103 |
104 | dest, err := os.Create(localPath)
105 | if err != nil {
106 | return err
107 | }
108 | defer dest.Close()
109 | written, err := CopyWithMeter(dest, src, m)
110 | if err != nil {
111 | return err
112 | }
113 |
114 | if written == 0 {
115 | err = os.Truncate(localPath, 0)
116 | }
117 |
118 | return err
119 | }
120 |
121 | func (repo *LocalFileSystemRepository) Delete(repoPath string) error {
122 | filePath := path.Join(repo.RepoDir, repoPath)
123 | return os.Remove(filePath)
124 | }
125 |
126 | func (repo *LocalFileSystemRepository) Stat(repoPath string) (FileInfo, error) {
127 | filePath := path.Join(repo.RepoDir, repoPath)
128 | return os.Stat(filePath)
129 | }
130 |
131 | func (repo *LocalFileSystemRepository) List(repoPath string) ([]FileInfo, error) {
132 | dir := path.Join(repo.RepoDir, repoPath)
133 | fs, err := os.ReadDir(dir)
134 | if err != nil {
135 | return []FileInfo{}, nil
136 | }
137 | fs2 := []FileInfo{}
138 |
139 | for _, info := range fs {
140 | info2, ok := info.(FileInfo)
141 | if ok {
142 | fs2 = append(fs2, info2)
143 | }
144 | }
145 | return fs2, nil
146 | }
147 |
--------------------------------------------------------------------------------
/internal/repository/local_test.go:
--------------------------------------------------------------------------------
1 | package repository
2 |
3 | import (
4 | "os"
5 | "testing"
6 |
7 | "github.com/stretchr/testify/assert"
8 | )
9 |
10 | func TestLocalUpload(t *testing.T) {
11 | testCases := []struct {
12 | desc string
13 | data string
14 | }{
15 | {
16 | desc: "empty file", data: "",
17 | },
18 | {
19 | desc: "non empty file", data: "hello",
20 | },
21 | }
22 | for _, tC := range testCases {
23 | t.Run(tC.desc, func(t *testing.T) {
24 | repoDir := t.TempDir()
25 | tmpDir := t.TempDir()
26 |
27 | repo, err := NewLocalFileSystemRepository(repoDir)
28 | if err != nil {
29 | t.Error(err)
30 | }
31 |
32 | err = os.WriteFile(tmpDir+"/test", []byte(tC.data), 0644)
33 | if err != nil {
34 | t.Error(err)
35 | }
36 |
37 | err = repo.Upload(tmpDir+"/test", "path/to/the/test", nil)
38 | if err != nil {
39 | t.Error(err)
40 | }
41 | data, err := os.ReadFile(repoDir + "/path/to/the/test")
42 | if err != nil {
43 | t.Error(err)
44 | }
45 | assert.Equal(t, []byte(tC.data), []byte(data))
46 | })
47 | }
48 | }
49 |
50 | func TestLocalDownload(t *testing.T) {
51 | testCases := []struct {
52 | desc string
53 | data string
54 | }{
55 | {
56 | desc: "empty file", data: "",
57 | },
58 | {
59 | desc: "non empty file", data: "hello",
60 | },
61 | }
62 | for _, tC := range testCases {
63 | t.Run(tC.desc, func(t *testing.T) {
64 | repoDir := t.TempDir()
65 | tmpDir := t.TempDir()
66 |
67 | repo, err := NewLocalFileSystemRepository(repoDir)
68 | if err != nil {
69 | t.Error(err)
70 | }
71 |
72 | err = os.MkdirAll(repoDir+"/path/to/the", os.ModePerm)
73 | if err != nil {
74 | t.Error(err)
75 | }
76 |
77 | err = os.WriteFile(repoDir+"/path/to/the/test", []byte(tC.data), 0644)
78 | if err != nil {
79 | t.Error(err)
80 | }
81 |
82 | err = repo.Download("path/to/the/test", tmpDir+"/test", nil)
83 | if err != nil {
84 | t.Error(err)
85 | }
86 | data, err := os.ReadFile(tmpDir + "/test")
87 | if err != nil {
88 | t.Error(err)
89 | }
90 | assert.Equal(t, []byte(tC.data), []byte(data))
91 | })
92 | }
93 | }
94 |
--------------------------------------------------------------------------------
/internal/repository/meter.go:
--------------------------------------------------------------------------------
1 | package repository
2 |
3 | import (
4 | "fmt"
5 | "io"
6 | "sync/atomic"
7 | "time"
8 | )
9 |
10 | type ByteSize float64
11 |
12 | const (
13 | _ = iota // ignore first value by assigning to blank identifier
14 | KB ByteSize = 1 << (10 * iota)
15 | MB
16 | GB
17 | TB
18 | PB
19 | EB
20 | ZB
21 | YB
22 | )
23 |
24 | func (b ByteSize) String() string {
25 | switch {
26 | case b >= YB:
27 | return fmt.Sprintf("%.2fYB", b/YB)
28 | case b >= ZB:
29 | return fmt.Sprintf("%.2fZB", b/ZB)
30 | case b >= EB:
31 | return fmt.Sprintf("%.2fEB", b/EB)
32 | case b >= PB:
33 | return fmt.Sprintf("%.2fPB", b/PB)
34 | case b >= TB:
35 | return fmt.Sprintf("%.2fTB", b/TB)
36 | case b >= GB:
37 | return fmt.Sprintf("%.2fGB", b/GB)
38 | case b >= MB:
39 | return fmt.Sprintf("%.2fMB", b/MB)
40 | case b >= KB:
41 | return fmt.Sprintf("%.2fKB", b/KB)
42 | }
43 | return fmt.Sprintf("%.2fB", b)
44 | }
45 |
46 | type Session struct {
47 | startedAt time.Time
48 | meters []*Meter
49 | }
50 |
51 | func NewSession() *Session {
52 | return &Session{
53 | startedAt: time.Now(),
54 | meters: []*Meter{},
55 | }
56 | }
57 |
58 | func (s *Session) NewMeter() *Meter {
59 | meter := &Meter{
60 | total: 0,
61 | }
62 | s.meters = append(s.meters, meter)
63 | return meter
64 | }
65 |
66 | func (s *Session) CalculateSpeed() ByteSize {
67 | totalDiff := time.Since(s.startedAt).Seconds()
68 | var total int64
69 | for _, meter := range s.meters {
70 | total = total + meter.total
71 | }
72 |
73 | speed := float64(total) / totalDiff
74 | return ByteSize(speed)
75 | }
76 |
77 | type Meter struct {
78 | total int64
79 | }
80 |
81 | func (m *Meter) Write(p []byte) (n int, err error) {
82 | written := len(p)
83 | m.AddBytes(written)
84 | return written, nil
85 | }
86 |
87 | func (m *Meter) AddBytes(bytes int) {
88 | atomic.AddInt64(&m.total, int64(bytes))
89 | }
90 |
91 | func (m *Meter) SetBytes(bytes int64) {
92 | atomic.StoreInt64(&m.total, bytes)
93 | }
94 |
95 | func CopyWithMeter(dest io.Writer, src io.Reader, meter *Meter) (int64, error) {
96 | buf := make([]byte, 1024*1024)
97 |
98 | if meter != nil {
99 | return io.CopyBuffer(dest, io.TeeReader(src, meter), buf)
100 | }
101 |
102 | return io.CopyBuffer(dest, src, buf)
103 | }
104 |
--------------------------------------------------------------------------------
/internal/repository/rclone.go:
--------------------------------------------------------------------------------
1 | package repository
2 |
3 | import (
4 | "bytes"
5 | "encoding/json"
6 | "os"
7 | "os/exec"
8 | "path/filepath"
9 | )
10 |
11 | // Local Filesystem
12 | type RcloneRepository struct {
13 | Remote string
14 | BaseDir string
15 | }
16 |
17 | func NewRcloneRepository(remote, basePath string) (*RcloneRepository, error) {
18 | cmd := exec.Command("rclone", "version")
19 | err := cmd.Run()
20 | if err != nil {
21 | return nil, err
22 | }
23 |
24 | return &RcloneRepository{
25 | Remote: remote,
26 | BaseDir: basePath,
27 | }, nil
28 | }
29 |
30 | func (repo *RcloneRepository) Upload(localPath, repoPath string, m *Meter) error {
31 | cmd := exec.Command("rclone", "copyto", "--no-check-dest", localPath, repo.remotePath(repoPath))
32 | err := cmd.Run()
33 | if err != nil {
34 | return err
35 | }
36 |
37 | return nil
38 | }
39 |
40 | func (repo *RcloneRepository) Download(repoPath, localPath string, m *Meter) error {
41 | cmd := exec.Command("rclone", "copyto", "--no-check-dest", repo.remotePath(repoPath), localPath)
42 | err := cmd.Run()
43 | if err != nil {
44 | return err
45 | }
46 |
47 | return nil
48 | }
49 |
50 | func (repo *RcloneRepository) Delete(repoPath string) error {
51 | cmd := exec.Command("rclone", "deletefile", repo.remotePath(repoPath))
52 | err := cmd.Run()
53 | if err != nil {
54 | return err
55 | }
56 |
57 | return nil
58 | }
59 |
60 | func (repo *RcloneRepository) Stat(repoPath string) (FileInfo, error) {
61 | var out bytes.Buffer
62 | cmd := exec.Command("rclone", "size", "--json", repo.remotePath(repoPath))
63 | cmd.Stdout = &out
64 | if err := cmd.Run(); err != nil {
65 | return nil, err
66 | }
67 |
68 | type RcloneSize struct {
69 | Count int `json:"count"`
70 | }
71 |
72 | var size RcloneSize
73 | if err := json.Unmarshal(out.Bytes(), &size); err != nil {
74 | return nil, err
75 | }
76 |
77 | if size.Count == 0 {
78 | return nil, os.ErrNotExist
79 | }
80 |
81 | return &RcloneFileInfo{
82 | Name_: filepath.Base(repoPath),
83 | IsDir_: false,
84 | }, nil
85 | }
86 |
87 | func (repo *RcloneRepository) List(repoPath string) ([]FileInfo, error) {
88 | var out bytes.Buffer
89 | cmd := exec.Command("rclone", "lsjson", repo.remotePath(repoPath))
90 | cmd.Stdout = &out
91 | err := cmd.Run()
92 | if err != nil {
93 | return nil, err
94 | }
95 |
96 | var rcloneEntries []RcloneFileInfo
97 | err = json.Unmarshal(out.Bytes(), &rcloneEntries)
98 | if err != nil {
99 | return nil, err
100 | }
101 |
102 | entries := make([]FileInfo, 0)
103 | for _, entry := range rcloneEntries {
104 | entries = append(entries, &entry)
105 | }
106 | return entries, nil
107 | }
108 |
109 | func (repo *RcloneRepository) remotePath(repoPath string) string {
110 | path := filepath.Join(repo.BaseDir, repoPath)
111 | return repo.Remote + ":" + path
112 | }
113 |
114 | type RcloneFileInfo struct {
115 | Name_ string `json:"Name"`
116 | IsDir_ bool `json:"IsDir"`
117 | }
118 |
119 | func (e *RcloneFileInfo) Name() string {
120 | return e.Name_
121 | }
122 |
123 | func (e *RcloneFileInfo) IsDir() bool {
124 | return e.IsDir_
125 | }
126 |
--------------------------------------------------------------------------------
/internal/repository/repo_integration_test.go:
--------------------------------------------------------------------------------
1 | // Run integration test to any repository
2 | //
3 | // TEST_REPOSITORY=s3://bucket/myrepo go test -v ./internal/repository
4 | package repository
5 |
6 | import (
7 | cryptorand "crypto/rand"
8 | "crypto/sha1"
9 | "fmt"
10 | "io"
11 | "math/rand"
12 | "os"
13 | "path/filepath"
14 | "testing"
15 | "time"
16 |
17 | "github.com/infuseai/artivc/internal/log"
18 | "github.com/stretchr/testify/assert"
19 | )
20 |
21 | func getRepo() (Repository, error) {
22 | repoStr := os.Getenv("TEST_REPOSITORY")
23 | if repoStr == "" {
24 | return nil, nil
25 | }
26 | result, err := ParseRepo(repoStr)
27 | if err != nil {
28 | return nil, err
29 | }
30 | return NewRepository(result)
31 | }
32 |
33 | func sha1sum(path string) string {
34 | hasher := sha1.New()
35 | f, err := os.Open(path)
36 | if err != nil {
37 | panic(err)
38 | }
39 | defer f.Close()
40 | if _, err := io.Copy(hasher, f); err != nil {
41 | panic(err)
42 | }
43 | sum := hasher.Sum([]byte{})
44 | return fmt.Sprintf("%x", sum)
45 | }
46 |
47 | func generateRandomFile(path string, size int64) error {
48 | f, err := os.Create(path)
49 | if err != nil {
50 | return err
51 | }
52 | defer f.Close()
53 |
54 | _, err = io.CopyN(f, cryptorand.Reader, size)
55 | if err != nil {
56 | return err
57 | }
58 |
59 | return nil
60 | }
61 |
62 | func Test_Transfer(t *testing.T) {
63 | repo, err := getRepo()
64 | if repo == nil {
65 | return
66 | }
67 |
68 | if err != nil {
69 | t.Error(err)
70 | }
71 |
72 | testCases := []struct {
73 | desc string
74 | size int64
75 | repoPath string
76 | }{
77 | {desc: "small file", size: 1024, repoPath: "bin"},
78 | {desc: "small file with subpath", size: 1024, repoPath: "this/is/my/bin"},
79 | {desc: "large file", size: 10 * 1024 * 1024, repoPath: "bin"},
80 | {desc: "empty file", size: 0, repoPath: "bin"},
81 | }
82 | for _, tC := range testCases {
83 | t.Run(tC.desc, func(t *testing.T) {
84 | tmpDir := t.TempDir()
85 | path := tmpDir + "/in"
86 | assert.NoError(t, generateRandomFile(path, tC.size))
87 |
88 | if err := repo.Upload(path, tC.repoPath, nil); err != nil {
89 | t.Error(err)
90 | }
91 |
92 | if err := repo.Download(tC.repoPath, tmpDir+"/out", nil); err != nil {
93 | t.Error(err)
94 | }
95 |
96 | assert.Equal(t, sha1sum(tmpDir+"/in"), sha1sum(tmpDir+"/out"))
97 |
98 | if err := repo.Delete(tC.repoPath); err != nil {
99 | t.Error(err)
100 | }
101 | })
102 | }
103 | }
104 |
105 | func Test_Stat(t *testing.T) {
106 | repo, err := getRepo()
107 | if repo == nil {
108 | return
109 | }
110 |
111 | if err != nil {
112 | t.Error(err)
113 | }
114 |
115 | rand.Seed(time.Now().UnixNano())
116 | tmpDir := t.TempDir()
117 | path := tmpDir + "/bin"
118 | repoPath := fmt.Sprintf("stat/%d", rand.Int())
119 |
120 | // stat non-existed file
121 | _, err = repo.Stat(repoPath)
122 | assert.Error(t, err, "Stat() should return error if the file does not exist")
123 |
124 | // upload & stat
125 | assert.NoError(t, generateRandomFile(path, 1024))
126 | err = repo.Upload(path, repoPath, nil)
127 | if err != nil {
128 | t.Error(err)
129 | }
130 |
131 | info, err := repo.Stat(repoPath)
132 | if err != nil {
133 | t.Error(err)
134 | }
135 | assert.Equal(t, filepath.Base(repoPath), info.Name(), "name of Stat() should be the last component of path")
136 | assert.Equal(t, false, info.IsDir(), "result of Stat() should not be a directory ")
137 |
138 | // delete
139 | err = repo.Delete(repoPath)
140 | if err != nil {
141 | t.Error(err)
142 | }
143 |
144 | _, err = repo.Stat(repoPath)
145 | assert.Error(t, err, "Stat() should return error after the file deleted")
146 | }
147 |
148 | func Test_List(t *testing.T) {
149 | repo, err := getRepo()
150 | if repo == nil {
151 | return
152 | }
153 |
154 | if err != nil {
155 | t.Error(err)
156 | }
157 |
158 | rand.Seed(time.Now().UnixNano())
159 | tmpDir := t.TempDir()
160 | path := tmpDir + "/bin"
161 | assert.NoError(t, generateRandomFile(path, 1024))
162 |
163 | // Create files
164 | //
165 | // dir
166 | // ├── 0
167 | // ├── 1
168 | // ├── 2
169 | // └── 3
170 | // ├── 0
171 | // ├── 1
172 | // └── 2
173 | for i := 0; i < 3; i++ {
174 | rpath := fmt.Sprintf("dir/%d", i)
175 | err = repo.Upload(path, rpath, nil)
176 | if err != nil {
177 | t.Error(err)
178 | }
179 |
180 | defer func() {
181 | if err := repo.Delete(rpath); err != nil {
182 | log.Debugln("can't delete repo: " + err.Error())
183 | }
184 | }()
185 | }
186 | for i := 0; i < 3; i++ {
187 | rpath := fmt.Sprintf("dir/3/%d", i)
188 |
189 | err = repo.Upload(path, rpath, nil)
190 | if err != nil {
191 | t.Error(err)
192 | }
193 |
194 | defer func() {
195 | if err := repo.Delete(rpath); err != nil {
196 | log.Debugln("can't delete repo: " + err.Error())
197 | }
198 | }()
199 | }
200 |
201 | // test
202 | // ls dir
203 | list, err := repo.List("dir")
204 | assert.NoError(t, err)
205 | assert.Equal(t, 4, len(list))
206 | for _, info := range list {
207 | switch info.Name() {
208 | case "0":
209 | assert.False(t, info.IsDir())
210 | case "1":
211 | assert.False(t, info.IsDir())
212 | case "2":
213 | assert.False(t, info.IsDir())
214 | case "3":
215 | assert.True(t, info.IsDir())
216 | default:
217 | assert.Fail(t, "wrong list item")
218 | }
219 | }
220 |
221 | // ls dir/3
222 | list, err = repo.List("dir/3")
223 | if err != nil {
224 | t.Error(err)
225 | }
226 | assert.Equal(t, 3, len(list))
227 | for _, info := range list {
228 | switch info.Name() {
229 | case "0":
230 | assert.False(t, info.IsDir())
231 | case "1":
232 | assert.False(t, info.IsDir())
233 | case "2":
234 | assert.False(t, info.IsDir())
235 | default:
236 | assert.Fail(t, "wrong list item")
237 | }
238 | }
239 |
240 | // ls nono-existing folder
241 | list, err = repo.List("dir-12345")
242 | if err != nil {
243 | t.Error(err)
244 | }
245 | assert.Equal(t, 0, len(list))
246 | }
247 |
--------------------------------------------------------------------------------
/internal/repository/repository.go:
--------------------------------------------------------------------------------
1 | package repository
2 |
3 | import (
4 | neturl "net/url"
5 | "os"
6 | "path/filepath"
7 | "strings"
8 | )
9 |
10 | type FileInfo interface {
11 | Name() string
12 | IsDir() bool
13 | }
14 |
15 | type SimpleFileInfo struct {
16 | name string
17 | isDir bool
18 | }
19 |
20 | func (fi *SimpleFileInfo) Name() string {
21 | return fi.name
22 | }
23 |
24 | func (fi *SimpleFileInfo) IsDir() bool {
25 | return fi.isDir
26 | }
27 |
28 | type Repository interface {
29 | Upload(localPath, repoPath string, meter *Meter) error
30 | Download(repoPath, localPath string, meter *Meter) error
31 | Delete(repoPath string) error
32 | Stat(repoPath string) (FileInfo, error)
33 | List(repoPath string) ([]FileInfo, error)
34 | }
35 |
36 | type RepoParseResult struct {
37 | Repo string
38 | scheme string
39 | host string
40 | path string
41 | }
42 |
43 | func ParseRepo(repo string) (RepoParseResult, error) {
44 | var result RepoParseResult
45 |
46 | if strings.Contains(repo, "://") {
47 | url, err := neturl.Parse(repo)
48 | if err != nil {
49 | return result, err
50 | }
51 |
52 | if url.Scheme == "" {
53 | return result, UnsupportedRepositoryError{
54 | Message: "unsupported repository. Relative path is not allowed as a repository path",
55 | }
56 | }
57 |
58 | result.Repo = repo
59 | result.scheme = url.Scheme
60 | result.host = url.Host
61 | result.path = url.Path
62 | } else {
63 | i := strings.Index(repo, ":")
64 | if i > 0 {
65 | result.Repo = repo
66 | result.scheme = "ssh"
67 | result.host = repo[0:i]
68 | result.path = repo[i+1:]
69 | } else {
70 | cwd, err := os.Getwd()
71 | if err != nil {
72 | return result, err
73 | }
74 | if !strings.HasPrefix(repo, "/") {
75 | repo, err = filepath.Abs(filepath.Join(cwd, repo))
76 | if err != nil {
77 | return result, err
78 | }
79 | }
80 |
81 | result.Repo = repo
82 | result.scheme = "file"
83 | result.host = ""
84 | result.path = repo
85 | }
86 | }
87 |
88 | return result, nil
89 | }
90 |
91 | func ParseRepoName(result RepoParseResult) (string, error) {
92 | if result.scheme == "ssh" {
93 | name := filepath.Base(result.path)
94 | if name == "/" {
95 | return result.host, nil
96 | }
97 | return name, nil
98 | } else {
99 | url, err := neturl.Parse(result.Repo)
100 | if err != nil {
101 | return "", err
102 | }
103 |
104 | if url.Path == "" {
105 | return url.Hostname(), nil
106 | }
107 |
108 | name := filepath.Base(url.Path)
109 | if name == "/" {
110 | return url.Hostname(), nil
111 | }
112 |
113 | return name, nil
114 | }
115 | }
116 |
117 | func NewRepository(result RepoParseResult) (Repository, error) {
118 | repo := result.Repo
119 | host := result.host
120 | path := result.path
121 |
122 | switch result.scheme {
123 | case "file":
124 | return NewLocalFileSystemRepository(path)
125 | case "s3":
126 | return NewS3Repository(host, path)
127 | case "gs":
128 | return NewGCSRepository(host, path)
129 | case "rclone":
130 | return NewRcloneRepository(host, path)
131 | case "ssh":
132 | return NewSSHRepository(host, path)
133 | case "http":
134 | return NewHttpRepository(repo)
135 | case "https":
136 | if IsAzureStorageUrl(repo) {
137 | return NewAzureBlobRepository(repo)
138 | } else {
139 | return NewHttpRepository(repo)
140 | }
141 | default:
142 | return nil, UnsupportedRepositoryError{
143 | Message: "unsupported repository",
144 | }
145 | }
146 | }
147 |
--------------------------------------------------------------------------------
/internal/repository/repository_test.go:
--------------------------------------------------------------------------------
1 | package repository
2 |
3 | import (
4 | "os"
5 | "path/filepath"
6 | "strings"
7 | "testing"
8 |
9 | "github.com/stretchr/testify/assert"
10 | )
11 |
12 | func getAbsFilePath(path string) string {
13 | cwd, err := os.Getwd()
14 | if err != nil {
15 | return ""
16 | }
17 | if !strings.HasPrefix(path, "/") {
18 | path, err = filepath.Abs(filepath.Join(cwd, path))
19 | if err != nil {
20 | return ""
21 | }
22 | }
23 | return path
24 | }
25 |
26 | func Test(t *testing.T) {
27 | testCases := []struct {
28 | desc string
29 | repo string
30 | scheme string
31 | host string
32 | path string
33 | name string
34 | }{
35 | {repo: "/tmp", scheme: "file", host: "", path: getAbsFilePath("/tmp"), name: "tmp"},
36 | {repo: "tmp", scheme: "file", host: "", path: getAbsFilePath("tmp"), name: "tmp"},
37 | {repo: "../tmp", scheme: "file", host: "", path: getAbsFilePath("../tmp"), name: "tmp"},
38 | {repo: "file:///tmp", scheme: "file", host: "", path: "/tmp", name: "tmp"},
39 | {repo: "host:/tmp", scheme: "ssh", host: "host", path: "/tmp", name: "tmp"},
40 | {repo: "host:tmp", scheme: "ssh", host: "host", path: "tmp", name: "tmp"},
41 | {repo: "host:../tmp", scheme: "ssh", host: "host", path: "../tmp", name: "tmp"},
42 | {repo: "ssh://host/tmp", scheme: "ssh", host: "host", path: "/tmp", name: "tmp"},
43 | {repo: "xyz://host/tmp", scheme: "xyz", host: "host", path: "/tmp", name: "tmp"},
44 | {repo: "xyz://host", scheme: "xyz", host: "host", path: "", name: "host"},
45 | }
46 |
47 | for _, tC := range testCases {
48 | t.Run("pares repo "+tC.repo, func(t *testing.T) {
49 | result, err := ParseRepo(tC.repo)
50 | if err != nil {
51 | t.Error(err)
52 | return
53 | }
54 |
55 | assert.Equal(t, tC.scheme, result.scheme)
56 | assert.Equal(t, tC.host, result.host)
57 | assert.Equal(t, tC.path, result.path)
58 |
59 | repoName, err := ParseRepoName(result)
60 | if err != nil {
61 | t.Error(err)
62 | return
63 | }
64 |
65 | assert.Equal(t, tC.name, repoName)
66 | })
67 | }
68 | }
69 |
--------------------------------------------------------------------------------
/internal/repository/s3.go:
--------------------------------------------------------------------------------
1 | package repository
2 |
3 | import (
4 | "context"
5 | "fmt"
6 | "io"
7 | "os"
8 | "path/filepath"
9 | "strings"
10 |
11 | "github.com/aws/aws-sdk-go-v2/config"
12 | "github.com/aws/aws-sdk-go-v2/feature/s3/manager"
13 | "github.com/aws/aws-sdk-go-v2/service/s3"
14 | )
15 |
16 | type S3Repository struct {
17 | Bucket string
18 | BasePath string
19 | client *s3.Client
20 | }
21 |
22 | func NewS3Repository(bucket, basePath string) (*S3Repository, error) {
23 | basePath = strings.TrimPrefix(basePath, "/")
24 |
25 | cfg, err := config.LoadDefaultConfig(context.TODO())
26 | if err != nil {
27 | return nil, err
28 | }
29 | client := s3.NewFromConfig(cfg)
30 |
31 | return &S3Repository{
32 | Bucket: bucket,
33 | BasePath: basePath,
34 | client: client,
35 | }, nil
36 | }
37 |
38 | func (repo *S3Repository) Upload(localPath, repoPath string, m *Meter) error {
39 | // Reference the code to show the progress when uploading
40 | // https://github.com/aws/aws-sdk-go/blob/main/example/service/s3/putObjectWithProcess/putObjWithProcess.go
41 | sourceFileStat, err := os.Stat(localPath)
42 | if err != nil {
43 | return err
44 | }
45 |
46 | if !sourceFileStat.Mode().IsRegular() {
47 | return fmt.Errorf("%s is not a regular file", localPath)
48 | }
49 |
50 | source, err := os.Open(localPath)
51 | if err != nil {
52 | return err
53 | }
54 | defer source.Close()
55 |
56 | fileInfo, err := source.Stat()
57 | if err != nil {
58 | return err
59 | }
60 |
61 | reader := &progressReader{
62 | fp: source,
63 | size: fileInfo.Size(),
64 | meter: m,
65 | }
66 |
67 | key := filepath.Join(repo.BasePath, repoPath)
68 | input := &s3.PutObjectInput{
69 | Bucket: &repo.Bucket,
70 | Key: &key,
71 | Body: reader,
72 | }
73 |
74 | if sourceFileStat.Size() < manager.DefaultUploadPartSize {
75 | _, err = repo.client.PutObject(context.TODO(), input)
76 | } else {
77 | uploader := manager.NewUploader(repo.client)
78 | _, err = uploader.Upload(context.TODO(), input)
79 | }
80 | return err
81 | }
82 |
83 | func (repo *S3Repository) Download(repoPath, localPath string, m *Meter) error {
84 | // Reference the code to show the progress when downloading
85 | // https://github.com/aws/aws-sdk-go/tree/main/example/service/s3/getObjectWithProgress
86 | key := filepath.Join(repo.BasePath, repoPath)
87 | input := &s3.GetObjectInput{
88 | Bucket: &repo.Bucket,
89 | Key: &key,
90 | }
91 |
92 | downloader := manager.NewDownloader(repo.client)
93 |
94 | dest, err := os.Create(localPath)
95 | if err != nil {
96 | fmt.Printf("%v\n", err)
97 | return err
98 | }
99 | defer dest.Close()
100 |
101 | writer := &progressWriter{writer: dest, meter: m}
102 | _, err = downloader.Download(context.TODO(), writer, input)
103 | return err
104 | }
105 |
106 | func (repo *S3Repository) Delete(repoPath string) error {
107 | key := filepath.Join(repo.BasePath, repoPath)
108 | input := &s3.DeleteObjectInput{
109 | Bucket: &repo.Bucket,
110 | Key: &key,
111 | }
112 |
113 | _, err := repo.client.DeleteObject(context.TODO(), input)
114 | return err
115 | }
116 |
117 | func (repo *S3Repository) Stat(repoPath string) (FileInfo, error) {
118 | key := filepath.Join(repo.BasePath, repoPath)
119 | input := &s3.HeadObjectInput{
120 | Bucket: &repo.Bucket,
121 | Key: &key,
122 | }
123 | _, err := repo.client.HeadObject(context.TODO(), input)
124 | if err != nil {
125 | return nil, err
126 | }
127 |
128 | return &S3FileInfo{
129 | name: filepath.Base(repoPath),
130 | }, nil
131 | }
132 |
133 | func (repo *S3Repository) List(repoPath string) ([]FileInfo, error) {
134 | fullRepoPath := filepath.Join(repo.BasePath, repoPath)
135 | fullRepoPath = fullRepoPath + "/"
136 | delimeter := "/"
137 | input := &s3.ListObjectsV2Input{
138 | Bucket: &repo.Bucket,
139 | Prefix: &fullRepoPath,
140 | Delimiter: &delimeter,
141 | }
142 | output, err := repo.client.ListObjectsV2(context.TODO(), input)
143 | if err != nil {
144 | return nil, err
145 | }
146 |
147 | entries := make([]FileInfo, 0)
148 | for _, prefix := range output.CommonPrefixes {
149 | fullname := *prefix.Prefix
150 | name := fullname[len(fullRepoPath) : len(fullname)-1]
151 | entry := S3FileInfo{name: name, isDir: true}
152 | entries = append(entries, &entry)
153 | }
154 |
155 | for _, obj := range output.Contents {
156 | fullname := *obj.Key
157 | entry := S3FileInfo{name: fullname[len(fullRepoPath):]}
158 | entries = append(entries, &entry)
159 | }
160 | return entries, err
161 | }
162 |
163 | type S3FileInfo struct {
164 | name string
165 | isDir bool
166 | }
167 |
168 | func (fi *S3FileInfo) Name() string {
169 | return fi.name
170 | }
171 |
172 | func (fi *S3FileInfo) IsDir() bool {
173 | return fi.isDir
174 | }
175 |
176 | type progressReader struct {
177 | fp *os.File
178 | size int64
179 | meter *Meter
180 | }
181 |
182 | func (r *progressReader) Read(p []byte) (int, error) {
183 | read, err := r.fp.Read(p)
184 | if r.meter != nil {
185 | r.meter.AddBytes(read)
186 | }
187 | return read, err
188 | }
189 |
190 | func (r *progressReader) ReadAt(p []byte, off int64) (int, error) {
191 | n, err := r.fp.ReadAt(p, off)
192 | if err != nil {
193 | return n, err
194 | }
195 |
196 | if r.meter != nil {
197 | r.meter.AddBytes(n)
198 | }
199 |
200 | return n, err
201 | }
202 |
203 | func (r *progressReader) Seek(offset int64, whence int) (int64, error) {
204 | return r.fp.Seek(offset, whence)
205 | }
206 |
207 | type progressWriter struct {
208 | writer io.WriterAt
209 | meter *Meter
210 | }
211 |
212 | func (w *progressWriter) WriteAt(p []byte, off int64) (int, error) {
213 | n, err := w.writer.WriteAt(p, off)
214 | if err != nil {
215 | return n, err
216 | }
217 |
218 | if w.meter != nil {
219 | w.meter.AddBytes(n)
220 | }
221 |
222 | return n, err
223 | }
224 |
--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "github.com/infuseai/artivc/cmd"
5 | )
6 |
7 | func main() {
8 | cmd.Execute()
9 | }
10 |
--------------------------------------------------------------------------------