├── .bazelrc
├── .env.example
├── .github
└── workflows
│ ├── docs.yml
│ ├── lint.yaml
│ └── pr.yml
├── .gitignore
├── BUILD
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── MODULE.bazel
├── Manatee_technical_charter_final_9-26-2024.pdf
├── README.md
├── app
├── api
│ ├── .gitignore
│ ├── .hz
│ ├── BUILD.bazel
│ ├── biz
│ │ ├── dal
│ │ │ ├── BUILD.bazel
│ │ │ ├── db
│ │ │ │ ├── BUILD.bazel
│ │ │ │ ├── init.go
│ │ │ │ └── job.go
│ │ │ └── init.go
│ │ ├── handler
│ │ │ ├── BUILD.bazel
│ │ │ ├── health.go
│ │ │ └── job
│ │ │ │ ├── BUILD.bazel
│ │ │ │ └── job_handler.go
│ │ ├── model
│ │ │ └── job
│ │ │ │ ├── BUILD.bazel
│ │ │ │ └── job.go
│ │ ├── pkg
│ │ │ ├── errno
│ │ │ │ ├── BUILD.bazel
│ │ │ │ └── errno.go
│ │ │ ├── storage
│ │ │ │ ├── BUILD.bazel
│ │ │ │ ├── gcs.go
│ │ │ │ ├── minio.go
│ │ │ │ ├── mock.go
│ │ │ │ └── storage.go
│ │ │ └── utils
│ │ │ │ ├── BUILD.bazel
│ │ │ │ └── resp.go
│ │ ├── router
│ │ │ ├── BUILD.bazel
│ │ │ ├── job
│ │ │ │ ├── BUILD.bazel
│ │ │ │ ├── job.go
│ │ │ │ └── middleware.go
│ │ │ └── register.go
│ │ └── service
│ │ │ ├── BUILD.bazel
│ │ │ ├── job_service.go
│ │ │ └── job_service_test.go
│ ├── idl
│ │ └── job.thrift
│ ├── main.go
│ ├── router.go
│ ├── router_gen.go
│ └── script
│ │ └── bootstrap.sh
├── executor
│ ├── .gitignore
│ ├── BUILD.bazel
│ └── attestation
│ │ ├── BUILD.bazel
│ │ └── main.go
├── jupyterlab_manatee
│ ├── .dockerignore
│ ├── .gitignore
│ ├── .yarnrc.yml
│ ├── 20custom-hook.sh
│ ├── BUILD.bazel
│ ├── LICENSE
│ ├── README.md
│ ├── RELEASE.md
│ ├── babel.config.js
│ ├── install.json
│ ├── jest.config.js
│ ├── jupyter-config
│ │ └── jupyter_server_config.d
│ │ │ └── jupyterlab_manatee.json
│ ├── jupyterlab_manatee
│ │ ├── __init__.py
│ │ └── handlers.py
│ ├── noble.lock.json
│ ├── noble.yaml
│ ├── package.json
│ ├── pyproject.toml
│ ├── requirements.in
│ ├── requirements.txt
│ ├── requirements_linux.txt
│ ├── setup.py
│ ├── src
│ │ ├── __tests__
│ │ │ └── jupyterlab_manatee.spec.ts
│ │ ├── index.ts
│ │ ├── jobs.tsx
│ │ ├── sidebar.ts
│ │ └── sources.ts
│ ├── style
│ │ ├── base.css
│ │ ├── index.css
│ │ └── index.js
│ ├── tsconfig.json
│ ├── tsconfig.test.json
│ ├── ui-tests
│ │ ├── README.md
│ │ ├── jupyter_server_test_config.py
│ │ ├── package.json
│ │ ├── playwright.config.js
│ │ ├── tests
│ │ │ └── jupyterlab_manatee.spec.ts
│ │ └── yarn.lock
│ └── yarn.lock
└── reconciler
│ ├── BUILD.bazel
│ ├── imagebuilder
│ ├── BUILD.bazel
│ ├── kaniko.go
│ └── kaniko_test.go
│ ├── main.go
│ ├── reconciler.go
│ ├── reconciler_test.go
│ ├── registry
│ ├── BUILD.bazel
│ └── registry.go
│ └── tee_backend
│ ├── BUILD.bazel
│ ├── confidential_space.go
│ └── mock_teebackend.go
├── deployment
├── deploy.sh
├── jupyterhub
│ ├── config.yaml
│ └── deploy.sh
├── manatee
│ ├── .helmignore
│ ├── Chart.yaml
│ ├── config.yaml
│ ├── deploy.sh
│ ├── templates
│ │ ├── NOTES.txt
│ │ ├── _helpers.tpl
│ │ ├── configmap.yaml
│ │ ├── deployment.yaml
│ │ ├── ingress.yaml
│ │ ├── reconciler.yaml
│ │ ├── service.yaml
│ │ └── tests
│ │ │ └── test-connection.yaml
│ └── values.yaml
└── minikube
│ ├── deploy.sh
│ ├── minio-dev.yaml
│ ├── mysql-deployment.yaml
│ └── mysql-service.yaml
├── docs
├── assets
│ └── img
│ │ ├── arch.png
│ │ ├── jobs.png
│ │ ├── logo.png
│ │ ├── manatee-architecture.png
│ │ ├── manatee-white.png
│ │ ├── manatee.png
│ │ ├── plugin.png
│ │ ├── stage-1.png
│ │ ├── two-stage.png
│ │ └── unzip.png
├── blog
│ ├── index.md
│ └── posts
│ │ └── 2025-01-community-release.md
├── developer
│ └── architecture.md
├── getting-started
│ ├── building.md
│ ├── deployment.md
│ ├── llm-model-evaluation.md
│ ├── minikube.md
│ └── tutorials.md
├── index.md
├── project-status.md
└── stylesheets
│ └── extra.css
├── go.mod
├── go.sum
├── mkdocs.yml
├── resources
├── .gitignore
├── deployment
│ ├── apply.sh
│ ├── backend.tf
│ ├── cluster_rolebinding.tf
│ ├── db_account.tf
│ ├── namespace.tf
│ ├── providers.tf
│ ├── repositories.tf
│ ├── role.tf
│ ├── secret.tf
│ ├── service_accounts.tf
│ └── variables.tf
├── global
│ ├── apply.sh
│ ├── backend.tf
│ ├── buckets.tf
│ ├── cluster.tf
│ ├── database.tf
│ ├── iam.tf
│ ├── network.tf
│ ├── providers.tf
│ ├── repositories.tf
│ ├── service_accounts.tf
│ └── variables.tf
└── minikube
│ ├── apply.sh
│ ├── namespace.tf
│ ├── providers.tf
│ ├── role.tf
│ ├── secret.tf
│ ├── service_accounts.tf
│ └── variables.tf
└── tutorials
├── code
├── insurance.ipynb
├── regression.ipynb
└── sdk
│ ├── .gitignore
│ ├── __init__.py
│ ├── __version__.py
│ └── data.py
├── data
├── stage1
│ └── insurance.csv
└── stage2
│ └── insurance.csv
└── tutorial.sh
/.bazelrc:
--------------------------------------------------------------------------------
1 | common --enable_bzlmod
2 | build --host_macos_minimum_os=13.3
3 | build --macos_minimum_os=13.3
4 | build --apple_platform_type=macos
5 |
--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
1 | env=""
2 | project_id=""
3 | region=""
4 | zone=""
5 |
--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
1 | name: docs
2 | on:
3 | push:
4 | branches: [main]
5 | permissions:
6 | contents: write
7 | jobs:
8 | deploy:
9 | runs-on: ubuntu-latest
10 | steps:
11 | - uses: actions/checkout@v4
12 | - name: Configure Git Credentials
13 | run: |
14 | git config user.name github-actions[bot]
15 | git config user.email 41898282+github-actions[bot]@users.noreply.github.com
16 | - uses: actions/setup-python@v5
17 | with:
18 | python-version: 3.x
19 | - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV
20 | - uses: actions/cache@v4
21 | with:
22 | key: mkdocs-material-${{ env.cache_id }}
23 | path: .cache
24 | restore-keys: |
25 | mkdocs-material-
26 | - run: pip install mkdocs-material
27 | - run: mkdocs gh-deploy --force
28 |
--------------------------------------------------------------------------------
/.github/workflows/lint.yaml:
--------------------------------------------------------------------------------
1 | on:
2 | pull_request:
3 | branches: [main]
4 | jobs:
5 | format:
6 | name: Enforce Code Format
7 | runs-on: ubuntu-latest
8 | steps:
9 | - name: Check out code
10 | uses: actions/checkout@v4
11 | - name: Set up Bazel
12 | uses: bazel-contrib/setup-bazel@0.9.0
13 | with:
14 | bazelisk-cache: true
15 | disk-cache: ${{ github.workflow }}
16 | repository-cache: true
17 | - name: Check code format
18 | run: |
19 | bazelisk run @go_sdk//:bin/gofmt -- -l . > gofmt_output.txt || true
20 | if [ -s gofmt_output.txt ]; then
21 | echo "Following files are not properly formatted:"
22 | cat gofmt_output.txt
23 | echo "Please run: bazelisk run @go_sdk//:bin/gofmt -- -w ."
24 | exit 1
25 | else
26 | echo "All files are properly formatted!"
27 | fi
28 |
29 |
--------------------------------------------------------------------------------
/.github/workflows/pr.yml:
--------------------------------------------------------------------------------
1 | on:
2 | pull_request:
3 | branches: [main]
4 | jobs:
5 | build:
6 | name: Build & Test Everything
7 | runs-on: ubuntu-latest
8 | steps:
9 | - uses: actions/checkout@v4
10 | - uses: bazel-contrib/setup-bazel@0.9.0
11 | with:
12 | # Avoid downloading Bazel every time.
13 | bazelisk-cache: true
14 | # Store build cache per workflow.
15 | disk-cache: ${{ github.workflow }}
16 | # Share repository cache between workflows.
17 | repository-cache: true
18 | - run: cp .env.example env.bzl
19 | - run: bazel build //...
20 | - run: bazel test //...
21 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .terraform.lock.hcl
2 | site/*
3 | *.bazel.lock
4 | .gitconfig
5 | bazel-*
6 | env.bzl
7 | .DS_Store
8 |
9 |
--------------------------------------------------------------------------------
/BUILD:
--------------------------------------------------------------------------------
1 | load("@gazelle//:def.bzl", "gazelle")
2 | load("@rules_multirun//:defs.bzl", "multirun")
3 | load("@rules_oci//oci:defs.bzl", "oci_push")
4 | load("//:env.bzl", "env", "project_id", "region", "zone")
5 |
6 | # gazelle:prefix github.com/manatee-project/manatee
7 | gazelle(name = "gazelle")
8 |
9 | REPOS = {
10 | "api": "us-docker.pkg.dev/{}/dcr-{}-$$namespace-images/manatee-api".format(project_id, env),
11 | "reconciler": "us-docker.pkg.dev/{}/dcr-{}-$$namespace-images/manatee-reconciler".format(project_id, env),
12 | "jupyterlab_manatee": "us-docker.pkg.dev/{}/dcr-{}-$$namespace-images/manatee-jupyterlab-singleuser".format(project_id, env),
13 | "executor": "us-docker.pkg.dev/{}/dcr-{}-user-images/manatee-executor-base".format(project_id, env),
14 | }
15 |
16 | [
17 | genrule(
18 | name = "{}_repo".format(k),
19 | outs = ["{}_repo.txt".format(k)],
20 | cmd = "echo '{}' | envsubst > $@".format(v),
21 | )
22 | for (k, v) in REPOS.items()
23 | ]
24 |
25 | [
26 | oci_push(
27 | name = "push_{}_image".format(k),
28 | image = "//app/{}:image".format(k),
29 | remote_tags = ["latest"],
30 | repository_file = ":{}_repo".format(k),
31 | )
32 | for k in REPOS.keys()
33 | ]
34 |
35 | multirun(
36 | name = "push_all_images",
37 | commands = [
38 | "push_{}_image".format(k)
39 | for k in REPOS.keys()
40 | ],
41 | jobs = 0,
42 | )
43 |
44 | multirun(
45 | name = "load_all_images",
46 | commands = [
47 | "//app/{}:load_image".format(k)
48 | for k in REPOS.keys()
49 | ],
50 | )
51 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to ManaTEE
2 |
3 | We happily welcome contributions to the ManaTEE. We use [GitHub Issues](https://github.com/manatee-project/manatee/issues) to track community reported issues and [GitHub Pull Requests](https://github.com/manatee-project/manatee/pulls) for accepting changes.
4 |
5 |
6 | Read our [Code of Coduct](./CODE_OF_CONDUCT.md) to keep our community approachable and respectable.
7 |
8 | This guide details how to use issues and pull requests to improve the project.
9 |
10 | ## General Guidelines
11 |
12 | ### Pull Requests
13 |
14 | Make sure to keep Pull Requests small and functional to make them easier to review, understand, and look up in commit history.
15 |
16 | Adding the appropriate documentation, unit tests and e2e tests as part of a feature is the responsibility of the feature owner, whether it is done in the same Pull Request or not.
17 |
18 | Pull Requests should follow the "Title: Description" format, where the Description describes what part of the code is being modified.
19 |
20 | ### Design Docs
21 |
22 | A contributor proposes a design with a PR on the repository to allow for revisions and discussions. If a design needs to be discussed before formulating a document for it, make use of Google doc and [GitHub issue](https://github.com/manatee-project/manatee/issues) to involve the community on the discussion.
23 |
24 | ### GitHub Issues
25 |
26 | GitHub Issues are used to file bugs, work items, and feature requests with actionable items/issues (Please refer to the "Reporting Bugs/Feature Requests" section below for more information).
27 |
28 | ### Reporting Bugs/Feature Requests
29 |
30 | We welcome you to use the GitHub issue tracker to report bugs or suggest features that have actionable items/issues (as opposed to introducing a feature request on GitHub Discussions).
31 |
32 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
33 |
34 | - A reproducible test case or series of steps
35 | - The version of the code being used
36 | - Any modifications you've made relevant to the bug
37 | - Anything unusual about your environment or deployment
38 |
39 | ## Contributing via Pull Requests
40 |
41 | ### Find interesting issue
42 |
43 | If you spot a problem, [search if an issue already exists](https://github.com/manatee-project/manatee/issues). If a related issue doesn't exist, you can open a new issue by clicking the [New issue](https://github.com/manatee-project/manatee/issues/new).
44 |
45 |
46 | ### Open a Pull request.
47 |
48 | When you're done making the changes, open a [Pull Requests](https://github.com/manatee-project/manatee/pulls) and fill PR template so we can better review your PR. The [template](https://github.com/manatee-project/manatee/issues/new) helps reviewers understand your changes and the purpose of your pull request.
49 |
50 | Don't forget to link PR to issue if you are solving one.
51 |
52 | If you run into any merge issues, checkout this [github tutorial](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/addressing-merge-conflicts) to help you resolve merge conflicts and other issues.
53 |
54 |
55 | ## Finding contributions to work on
56 |
57 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, uses the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' and 'good first issue' issues are a great place to start.
58 |
--------------------------------------------------------------------------------
/MODULE.bazel:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | # Bazel now uses Bzlmod by default to manage external dependencies.
3 | # Please consider migrating your external dependencies from WORKSPACE to MODULE.bazel.
4 | #
5 | # For more details, please check https://github.com/bazelbuild/bazel/issues/18958
6 | ###############################################################################
7 |
8 | # rules_proto
9 | bazel_dep(name = "rules_proto", version = "7.1.0")
10 |
11 | # rules_python
12 | bazel_dep(name = "rules_python", version = "0.40.0")
13 |
14 | pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip")
15 | pip.parse(
16 | hub_name = "pydeps",
17 | python_version = "3.11",
18 | requirements_linux = "//app/jupyterlab_manatee:requirements_linux.txt",
19 | requirements_lock = "//app/jupyterlab_manatee:requirements.txt",
20 | )
21 | use_repo(pip, "pydeps")
22 |
23 | # rules nodejs
24 | bazel_dep(name = "aspect_rules_js", version = "2.0.2")
25 | bazel_dep(name = "rules_nodejs", version = "6.2.0")
26 |
27 | node = use_extension("@rules_nodejs//nodejs:extensions.bzl", "node", dev_dependency = True)
28 | node.toolchain(
29 | name = "nodejs",
30 | node_version = "18.17.1",
31 | )
32 | use_repo(node, "nodejs", "nodejs_toolchains")
33 |
34 | # rules_oci
35 | bazel_dep(name = "rules_oci", version = "2.0.0")
36 |
37 | oci = use_extension("@rules_oci//oci:extensions.bzl", "oci")
38 |
39 | # pull base image for scipy-notebook
40 | oci.pull(
41 | name = "scipy-notebook",
42 | digest = "sha256:dc0f8efb6f288d5fc67a94715963282f8066cb3b93324131585edaa0a7a46780",
43 | image = "quay.io/jupyter/scipy-notebook",
44 | platforms = [
45 | "linux/amd64",
46 | ],
47 | )
48 | oci.pull(
49 | name = "distroless_base",
50 | digest = "sha256:ccaef5ee2f1850270d453fdf700a5392534f8d1a8ca2acda391fbb6a06b81c86",
51 | image = "gcr.io/distroless/base",
52 | platforms = [
53 | "linux/amd64",
54 | "linux/arm64",
55 | ],
56 | )
57 | use_repo(
58 | oci,
59 | "distroless_base",
60 | "distroless_base_linux_amd64",
61 | "distroless_base_linux_arm64",
62 | "scipy-notebook",
63 | "scipy-notebook_linux_amd64",
64 | )
65 |
66 | # A multi-arch base image with variants, note that it won't work with just "linux/arm64"
67 |
68 | # rules_pkg
69 | bazel_dep(name = "rules_pkg", version = "1.0.1")
70 | bazel_dep(name = "rules_go", version = "0.50.1")
71 | bazel_dep(name = "gazelle", version = "0.43.0")
72 |
73 | go_sdk = use_extension("@rules_go//go:extensions.bzl", "go_sdk")
74 | go_sdk.download(
75 | name = "go_sdk",
76 | version = "1.23.8",
77 | )
78 | use_repo(go_sdk, "go_sdk")
79 |
80 | register_toolchains("@go_sdk//:all")
81 |
82 | # gazelle:proto disable_global
83 | go_deps = use_extension("@gazelle//:extensions.bzl", "go_deps")
84 | go_deps.from_file(go_mod = "//:go.mod")
85 | go_deps.gazelle_default_attributes(
86 | build_extra_args = [
87 | "-go_naming_convention_external=go_default_library",
88 | ],
89 | build_file_generation = "on",
90 | directives = [
91 | "gazelle:proto disable",
92 | ],
93 | )
94 | go_deps.gazelle_override(
95 | build_file_generation = "clean",
96 | directives = [
97 | "gazelle:build_file_name BUILD.bazel",
98 | ],
99 | path = "github.com/envoyproxy/protoc-gen-validate",
100 | )
101 | use_repo(
102 | go_deps,
103 | "com_github_apache_thrift",
104 | "com_github_cloudwego_hertz",
105 | "com_github_gin_gonic_gin",
106 | "com_github_google_uuid",
107 | "com_github_minio_minio_go_v7",
108 | "com_github_pkg_errors",
109 | "com_google_cloud_go_compute",
110 | "com_google_cloud_go_iam",
111 | "com_google_cloud_go_storage",
112 | "io_gorm_driver_mysql",
113 | "io_gorm_gorm",
114 | "io_k8s_api",
115 | "io_k8s_apimachinery",
116 | "io_k8s_client_go",
117 | "org_golang_google_protobuf",
118 | )
119 |
120 | bazel_dep(name = "rules_multirun", version = "0.10.0")
121 | bazel_dep(name = "rules_distroless", version = "0.5.1")
122 |
123 | apt = use_extension(
124 | "@rules_distroless//apt:extensions.bzl",
125 | "apt",
126 | dev_dependency = True,
127 | )
128 | apt.install(
129 | name = "noble",
130 | lock = "//app/jupyterlab_manatee:noble.lock.json",
131 | manifest = "//app/jupyterlab_manatee:noble.yaml",
132 | )
133 | use_repo(apt, "noble")
134 |
--------------------------------------------------------------------------------
/Manatee_technical_charter_final_9-26-2024.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manatee-project/manatee/07d4c7d5f6fb4f1d5e91252d30cc1f133467abdc/Manatee_technical_charter_final_9-26-2024.pdf
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # ManaTEE Project
4 |
5 | ManaTEE is an open-source project for easily building and deploying data collaboration framework to the cloud using trusted execution environments (TEEs).
6 | It allows users to easily collaborate on private datasets without leaking privacy of individual data.
7 | ManaTEE achieves this by combining different privacy-enhancing technologies (PETs) in different stages.
8 |
9 | # What does it offer?
10 |
11 | ManaTEE allows organizations to quickly customize and deploy data collaboration framework in the cloud.
12 | The organizations can provide an programming environment to the external data scientists to conduct research, while protecting the data privacy with a custom policy.
13 |
14 | > Note: ManaTEE is under active development, and it is not production-ready. We are looking forward to your feedback and contributions.
15 |
16 | # Quick Start
17 |
18 | Install Bazel with [Bazelisk](https://github.com/bazelbuild/bazelisk):
19 | ```sh
20 | brew install bazelisk # on MacOS
21 | choco install bazelisk # on Windows
22 | ```
23 | On Ubuntu, download the latest Bazelisk binary via [Releases](https://github.com/bazelbuild/bazelisk/releases)
24 |
25 | Build all images
26 | ```
27 | bazelisk build //...
28 | ```
29 |
30 | Run all tests
31 | ```
32 | bazelisk test //...
33 | ```
34 |
35 | See [documents](https://manatee-project.github.io/manatee) for more details including cloud deployment.
36 | # License
37 |
38 | ManaTEE is licensed under the Apache License 2.0.
39 | See [LICENSE](LICENSE) for details.
--------------------------------------------------------------------------------
/app/api/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | *.a
3 | *.so
4 | _obj
5 | _test
6 | *.[568vq]
7 | [568vq].out
8 | *.cgo1.go
9 | *.cgo2.c
10 | _cgo_defun.c
11 | _cgo_gotypes.go
12 | _cgo_export.*
13 | _testmain.go
14 | *.exe
15 | *.exe~
16 | *.test
17 | *.prof
18 | *.rar
19 | *.zip
20 | *.gz
21 | *.psd
22 | *.bmd
23 | *.cfg
24 | *.pptx
25 | *.log
26 | *nohup.out
27 | *settings.pyc
28 | *.sublime-project
29 | *.sublime-workspace
30 | !.gitkeep
31 | .DS_Store
32 | /.idea
33 | /.vscode
34 | /output
35 | *.local.yml
36 | dumped_hertz_remote_config.json
37 | conf
38 | github.com
39 | api
40 |
--------------------------------------------------------------------------------
/app/api/.hz:
--------------------------------------------------------------------------------
1 | // Code generated by hz. DO NOT EDIT.
2 |
3 | hz version: v0.9.1
4 | handlerDir: ""
5 | modelDir: biz/model
6 | routerDir: ""
7 |
--------------------------------------------------------------------------------
/app/api/BUILD.bazel:
--------------------------------------------------------------------------------
1 | load("@rules_go//go:def.bzl", "go_binary", "go_library")
2 | load("@rules_oci//oci:defs.bzl", "oci_image", "oci_load")
3 | load("@rules_pkg//pkg:tar.bzl", "pkg_tar")
4 |
5 | go_binary(
6 | name = "api",
7 | embed = [":api_lib"],
8 | goarch = "amd64",
9 | goos = "linux",
10 | visibility = ["//visibility:public"],
11 | )
12 |
13 | go_library(
14 | name = "api_lib",
15 | srcs = [
16 | "main.go",
17 | "router.go",
18 | "router_gen.go",
19 | ],
20 | importpath = "github.com/manatee-project/manatee/app/api",
21 | visibility = ["//visibility:private"],
22 | deps = [
23 | "//app/api/biz/dal",
24 | "//app/api/biz/handler",
25 | "//app/api/biz/router",
26 | "@com_github_cloudwego_hertz//pkg/app/server",
27 | ],
28 | )
29 |
30 | pkg_tar(
31 | name = "tar",
32 | srcs = [":api"],
33 | )
34 |
35 | oci_image(
36 | name = "image",
37 | base = "@distroless_base_linux_amd64",
38 | entrypoint = ["/api"],
39 | tars = [
40 | ":tar",
41 | ],
42 | visibility = ["//visibility:public"],
43 | )
44 |
45 | oci_load(
46 | name = "load_image",
47 | image = ":image",
48 | repo_tags = ["api:latest"],
49 | visibility = ["//visibility:public"],
50 | )
51 |
--------------------------------------------------------------------------------
/app/api/biz/dal/BUILD.bazel:
--------------------------------------------------------------------------------
1 | load("@rules_go//go:def.bzl", "go_library")
2 |
3 | go_library(
4 | name = "dal",
5 | srcs = ["init.go"],
6 | importpath = "github.com/manatee-project/manatee/app/api/biz/dal",
7 | visibility = ["//visibility:public"],
8 | deps = ["//app/api/biz/dal/db"],
9 | )
10 |
--------------------------------------------------------------------------------
/app/api/biz/dal/db/BUILD.bazel:
--------------------------------------------------------------------------------
1 | load("@rules_go//go:def.bzl", "go_library")
2 |
3 | go_library(
4 | name = "db",
5 | srcs = [
6 | "init.go",
7 | "job.go",
8 | ],
9 | importpath = "github.com/manatee-project/manatee/app/api/biz/dal/db",
10 | visibility = ["//visibility:public"],
11 | deps = [
12 | "@com_github_pkg_errors//:errors",
13 | "@io_gorm_driver_mysql//:mysql",
14 | "@io_gorm_gorm//:gorm",
15 | "@io_gorm_gorm//logger",
16 | ],
17 | )
18 |
--------------------------------------------------------------------------------
/app/api/biz/dal/db/init.go:
--------------------------------------------------------------------------------
1 | // Copyright 2024 TikTok Pte. Ltd.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package db
16 |
17 | import (
18 | "fmt"
19 | "os"
20 |
21 | "gorm.io/driver/mysql"
22 | "gorm.io/gorm"
23 | "gorm.io/gorm/logger"
24 | )
25 |
26 | var DB *gorm.DB
27 |
28 | func Init() {
29 | var err error
30 | mysqlDsn := fmt.Sprintf("%s:%s@tcp(%s:%s)/%s?charset=utf8&parseTime=True&loc=Local", os.Getenv("MYSQL_USERNAME"), os.Getenv("MYSQL_PASSWORD"), os.Getenv("MYSQL_HOST"), os.Getenv("MYSQL_PORT"), os.Getenv("MYSQL_DATABASE"))
31 | DB, err = gorm.Open(mysql.Open(mysqlDsn), &gorm.Config{
32 | SkipDefaultTransaction: true,
33 | PrepareStmt: true,
34 | Logger: logger.Default.LogMode(logger.Info),
35 | })
36 | if err != nil {
37 | panic(err)
38 | }
39 |
40 | // Auto database schema migration
41 | // This has caveat: see https://gorm.io/docs/migration.html
42 | err = DB.AutoMigrate(&Job{})
43 | if err != nil {
44 | panic(err)
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/app/api/biz/dal/init.go:
--------------------------------------------------------------------------------
1 | // Copyright 2024 TikTok Pte. Ltd.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package dal
16 |
17 | import "github.com/manatee-project/manatee/app/api/biz/dal/db"
18 |
19 | func Init() {
20 | db.Init()
21 | }
22 |
--------------------------------------------------------------------------------
/app/api/biz/handler/BUILD.bazel:
--------------------------------------------------------------------------------
1 | load("@rules_go//go:def.bzl", "go_library")
2 |
3 | go_library(
4 | name = "handler",
5 | srcs = ["health.go"],
6 | importpath = "github.com/manatee-project/manatee/app/api/biz/handler",
7 | visibility = ["//visibility:public"],
8 | deps = [
9 | "@com_github_cloudwego_hertz//pkg/app",
10 | "@com_github_cloudwego_hertz//pkg/common/utils",
11 | "@com_github_cloudwego_hertz//pkg/protocol/consts",
12 | ],
13 | )
14 |
--------------------------------------------------------------------------------
/app/api/biz/handler/health.go:
--------------------------------------------------------------------------------
1 | // Copyright 2024 TikTok Pte. Ltd.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | // Code generated by hertz generator.
16 |
17 | package handler
18 |
19 | import (
20 | "context"
21 |
22 | "github.com/cloudwego/hertz/pkg/app"
23 | "github.com/cloudwego/hertz/pkg/common/utils"
24 | "github.com/cloudwego/hertz/pkg/protocol/consts"
25 | )
26 |
27 | // Health .
28 | func Health(ctx context.Context, c *app.RequestContext) {
29 | c.JSON(consts.StatusOK, utils.H{
30 | "message": "pong",
31 | })
32 | }
33 |
--------------------------------------------------------------------------------
/app/api/biz/handler/job/BUILD.bazel:
--------------------------------------------------------------------------------
1 | load("@rules_go//go:def.bzl", "go_library")
2 |
3 | go_library(
4 | name = "job",
5 | srcs = ["job_handler.go"],
6 | importpath = "github.com/manatee-project/manatee/app/api/biz/handler/job",
7 | visibility = ["//visibility:public"],
8 | deps = [
9 | "//app/api/biz/model/job",
10 | "//app/api/biz/pkg/errno",
11 | "//app/api/biz/pkg/utils",
12 | "//app/api/biz/service",
13 | "@com_github_cloudwego_hertz//pkg/app",
14 | "@com_github_cloudwego_hertz//pkg/common/hlog",
15 | "@com_github_cloudwego_hertz//pkg/protocol/consts",
16 | ],
17 | )
18 |
--------------------------------------------------------------------------------
/app/api/biz/model/job/BUILD.bazel:
--------------------------------------------------------------------------------
1 | load("@rules_go//go:def.bzl", "go_library")
2 |
3 | go_library(
4 | name = "job",
5 | srcs = ["job.go"],
6 | importpath = "github.com/manatee-project/manatee/app/api/biz/model/job",
7 | visibility = ["//visibility:public"],
8 | deps = ["@com_github_apache_thrift//lib/go/thrift"],
9 | )
10 |
--------------------------------------------------------------------------------
/app/api/biz/pkg/errno/BUILD.bazel:
--------------------------------------------------------------------------------
1 | load("@rules_go//go:def.bzl", "go_library")
2 |
3 | go_library(
4 | name = "errno",
5 | srcs = ["errno.go"],
6 | importpath = "github.com/manatee-project/manatee/app/api/biz/pkg/errno",
7 | visibility = ["//visibility:public"],
8 | )
9 |
--------------------------------------------------------------------------------
/app/api/biz/pkg/errno/errno.go:
--------------------------------------------------------------------------------
1 | // Copyright 2024 TikTok Pte. Ltd.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package errno
16 |
17 | import (
18 | "fmt"
19 | )
20 |
21 | const (
22 | SuccessCode = 0
23 | ServiceErrCode = iota + 10000
24 | ReachJobLimitErrCode
25 | )
26 |
27 | const (
28 | SuccessMsg = "Success"
29 | ServiceErrMsg = "Service internal error"
30 | ReachJobLimitErrMsg = "The number of in progress jobs has reached the limit"
31 | )
32 |
33 | type ErrNo struct {
34 | ErrCode int32
35 | ErrMsg string
36 | }
37 |
38 | func (e ErrNo) Error() string {
39 | return fmt.Sprintf("err_code=%d, err_msg=%s", e.ErrCode, e.ErrMsg)
40 | }
41 |
42 | func NewErrNo(code int32, msg string) ErrNo {
43 | return ErrNo{code, msg}
44 | }
45 |
46 | func (e ErrNo) WithMessage(msg string) ErrNo {
47 | e.ErrMsg = msg
48 | return e
49 | }
50 |
51 | var (
52 | Success = NewErrNo(SuccessCode, SuccessMsg)
53 | ServiceErr = NewErrNo(ServiceErrCode, ServiceErrMsg)
54 | ReachJobLimitErr = NewErrNo(ReachJobLimitErrCode, ReachJobLimitErrMsg)
55 | )
56 |
--------------------------------------------------------------------------------
/app/api/biz/pkg/storage/BUILD.bazel:
--------------------------------------------------------------------------------
1 | load("@rules_go//go:def.bzl", "go_library")
2 |
3 | go_library(
4 | name = "storage",
5 | srcs = [
6 | "gcs.go",
7 | "minio.go",
8 | "mock.go",
9 | "storage.go",
10 | ],
11 | importpath = "github.com/manatee-project/manatee/app/api/biz/pkg/storage",
12 | visibility = ["//visibility:public"],
13 | deps = [
14 | "@com_github_minio_minio_go_v7//:minio-go",
15 | "@com_github_minio_minio_go_v7//pkg/credentials",
16 | "@com_github_pkg_errors//:errors",
17 | "@com_google_cloud_go_iam//credentials/apiv1",
18 | "@com_google_cloud_go_iam//credentials/apiv1/credentialspb",
19 | "@com_google_cloud_go_storage//:storage",
20 | ],
21 | )
22 |
--------------------------------------------------------------------------------
/app/api/biz/pkg/storage/gcs.go:
--------------------------------------------------------------------------------
1 | package storage
2 |
3 | import (
4 | "compress/gzip"
5 | "context"
6 | "fmt"
7 | "io"
8 | "net/http"
9 | "time"
10 |
11 | "cloud.google.com/go/storage"
12 | "github.com/pkg/errors"
13 |
14 | credentials "cloud.google.com/go/iam/credentials/apiv1"
15 | credentialspb "cloud.google.com/go/iam/credentials/apiv1/credentialspb"
16 | )
17 |
18 | type GoogleCloudStorage struct {
19 | ctx context.Context
20 | bucket string
21 | client *storage.Client
22 | iamClient *credentials.IamCredentialsClient
23 | googleAccessId string
24 | }
25 |
26 | func NewGoogleCloudStorage(ctx context.Context, bucket string) (*GoogleCloudStorage, error) {
27 | client, err := storage.NewClient(ctx)
28 | if err != nil {
29 | return nil, errors.Wrap(err, "failed to create storage client")
30 | }
31 | serviceAccount, err := getGoogleServiceAccount()
32 | if err != nil {
33 | return nil, errors.Wrap(err, "failed to get google service account")
34 | }
35 | iamClient, err := credentials.NewIamCredentialsClient(ctx)
36 | if err != nil {
37 | return nil, errors.Wrap(err, "failed to create iam client")
38 | }
39 | return &GoogleCloudStorage{
40 | ctx: ctx,
41 | bucket: bucket,
42 | iamClient: iamClient,
43 | client: client,
44 | googleAccessId: serviceAccount,
45 | }, nil
46 | }
47 |
48 | func (g *GoogleCloudStorage) Close() {
49 | g.client.Close()
50 | }
51 |
52 | func (g *GoogleCloudStorage) BucketPath() string {
53 | return fmt.Sprintf("gs://%s", g.bucket)
54 | }
55 |
56 | func (g *GoogleCloudStorage) UploadFile(reader io.Reader, remotePath string, compress bool) error {
57 | writer := g.client.Bucket(g.bucket).Object(remotePath).NewWriter(g.ctx)
58 | defer writer.Close()
59 | if compress {
60 | gzipWriter := gzip.NewWriter(writer)
61 | if _, err := io.Copy(gzipWriter, reader); err != nil {
62 | return errors.Wrap(err, "failed to copy content to gzip writer")
63 | }
64 | defer gzipWriter.Close()
65 | } else {
66 | if _, err := io.Copy(writer, reader); err != nil {
67 | return errors.Wrap(err, "failed to copy content to writer")
68 | }
69 | }
70 | return nil
71 | }
72 |
73 | func (g *GoogleCloudStorage) IssueSignedUrl(remotePath string, method string, expires time.Duration) (string, error) {
74 | if method != "GET" && method != "PUT" {
75 | return "", errors.Wrap(fmt.Errorf("unkown method for signed url, supported are GET and PUT"), "")
76 | }
77 |
78 | opts := &storage.SignedURLOptions{
79 | Scheme: storage.SigningSchemeV4,
80 | Method: method,
81 | Expires: time.Now().Add(expires),
82 | GoogleAccessID: g.googleAccessId,
83 | SignBytes: func(b []byte) ([]byte, error) {
84 | req := &credentialspb.SignBlobRequest{
85 | Payload: b,
86 | Name: g.googleAccessId,
87 | }
88 | resp, err := g.iamClient.SignBlob(g.ctx, req)
89 | if err != nil {
90 | return nil, errors.Wrap(err, "failed to sign blocb")
91 | }
92 | return resp.SignedBlob, err
93 | },
94 | }
95 | url, err := storage.SignedURL(g.bucket, remotePath, opts)
96 | if err != nil {
97 | return "", errors.Wrap(err, "failed to sign url")
98 | }
99 | return url, nil
100 | }
101 |
102 | func getGoogleServiceAccount() (string, error) {
103 | url := "http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/email"
104 | req, err := http.NewRequest("GET", url, nil)
105 | if err != nil {
106 | return "", errors.Wrap(err, "failed to create http client")
107 | }
108 | req.Header.Add("Metadata-Flavor", "Google")
109 | client := &http.Client{}
110 | resp, err := client.Do(req)
111 | if err != nil {
112 | return "", errors.Wrap(err, "failed to request google meta service account")
113 | }
114 | defer resp.Body.Close()
115 | account, err := io.ReadAll(resp.Body)
116 | if err != nil {
117 | return "", errors.Wrap(err, "failed to request google meta service account")
118 | }
119 | return string(account), nil
120 | }
121 |
--------------------------------------------------------------------------------
/app/api/biz/pkg/storage/minio.go:
--------------------------------------------------------------------------------
1 | package storage
2 |
3 | import (
4 | "context"
5 | "fmt"
6 | "io"
7 | "net/url"
8 | "os"
9 | "time"
10 |
11 | "github.com/minio/minio-go/v7"
12 | "github.com/minio/minio-go/v7/pkg/credentials"
13 | "github.com/pkg/errors"
14 | )
15 |
16 | type MinioStorage struct {
17 | ctx context.Context
18 | bucket string
19 | minioClient minio.Client
20 | }
21 |
22 | func NewMinioStorage(ctx context.Context, bucket string) (*MinioStorage, error) {
23 | accessKeyID := os.Getenv("AWS_ACCESS_KEY_ID")
24 | if accessKeyID == "" {
25 | return nil, fmt.Errorf("AWS_ACCESS_KEY_ID environment variable is not present")
26 | }
27 | secretAccessKey := os.Getenv("AWS_SECRET_ACCESS_KEY")
28 | if secretAccessKey == "" {
29 | return nil, fmt.Errorf("AWS_SECRET_ACCESS_KEY environment variable is not present")
30 | }
31 | endpoint := os.Getenv("S3_ENDPOINT")
32 | if endpoint == "" {
33 | return nil, fmt.Errorf("S3_ENDPOINT environment variable is not present")
34 | }
35 | minioClient, err := minio.New(endpoint, &minio.Options{
36 | Creds: credentials.NewStaticV4(accessKeyID, secretAccessKey, ""),
37 | Secure: false,
38 | })
39 | if err != nil {
40 | return nil, err
41 | }
42 |
43 | exist, err := minioClient.BucketExists(ctx, bucket)
44 | if err != nil {
45 | return nil, err
46 | }
47 |
48 | if !exist {
49 | err = minioClient.MakeBucket(ctx, bucket, minio.MakeBucketOptions{Region: "us"})
50 | if err != nil {
51 | return nil, err
52 | }
53 | }
54 |
55 | return &MinioStorage{
56 | ctx: ctx,
57 | bucket: bucket,
58 | minioClient: *minioClient,
59 | }, nil
60 | }
61 |
62 | func (m *MinioStorage) Close() {
63 | }
64 |
65 | func (m *MinioStorage) BucketPath() string {
66 | return fmt.Sprintf("s3://%s", m.bucket)
67 | }
68 |
69 | // compress parameter hasn't been implemented for minio client
70 | func (m *MinioStorage) UploadFile(reader io.Reader, remotePath string, compress bool) error {
71 | _, err := m.minioClient.PutObject(m.ctx, m.bucket, remotePath, reader, -1, minio.PutObjectOptions{ContentType: "application/octet-stream"})
72 | if err != nil {
73 | return errors.Wrap(err, "failed to upload to minio")
74 | }
75 | return nil
76 | }
77 |
78 | func (m *MinioStorage) IssueSignedUrl(remotePath string, method string, expires time.Duration) (string, error) {
79 | reqParams := make(url.Values)
80 | var url *url.URL
81 | var err error
82 | if method == "GET" {
83 | url, err = m.minioClient.PresignedGetObject(m.ctx, m.bucket, remotePath, expires, reqParams)
84 | if err != nil {
85 | return "", err
86 | }
87 | } else if method == "PUT" {
88 | url, err = m.minioClient.PresignedPutObject(m.ctx, m.bucket, remotePath, expires)
89 | if err != nil {
90 | return "", err
91 | }
92 | } else {
93 | return "", errors.Wrap(fmt.Errorf("unkown method for signed url, supported are GET and PUT"), "")
94 | }
95 | return url.String(), nil
96 | }
97 |
--------------------------------------------------------------------------------
/app/api/biz/pkg/storage/mock.go:
--------------------------------------------------------------------------------
1 | package storage
2 |
3 | import (
4 | "context"
5 | "io"
6 | "time"
7 | )
8 |
9 | // A mock storage only used for job service testing
10 | type MockStorage struct {
11 | ctx context.Context
12 | }
13 |
14 | func NewMockStorage(ctx context.Context) *MockStorage {
15 | return &MockStorage{
16 | ctx: ctx,
17 | }
18 | }
19 |
20 | func (m *MockStorage) Close() {
21 | }
22 |
23 | func (m *MockStorage) BucketPath() string {
24 | return ""
25 | }
26 |
27 | func (m *MockStorage) UploadFile(reader io.Reader, remotePath string, compress bool) error {
28 | return nil
29 | }
30 |
31 | func (m *MockStorage) IssueSignedUrl(remotePath string, method string, expires time.Duration) (string, error) {
32 | return "", nil
33 | }
34 |
--------------------------------------------------------------------------------
/app/api/biz/pkg/storage/storage.go:
--------------------------------------------------------------------------------
1 | package storage
2 |
3 | import (
4 | "context"
5 | "fmt"
6 | "io"
7 | "os"
8 | "time"
9 |
10 | "github.com/pkg/errors"
11 | )
12 |
13 | type Storage interface {
14 | BucketPath() string
15 | UploadFile(reader io.Reader, remotePath string, compress bool) error
16 | IssueSignedUrl(remotePath string, method string, expiry time.Duration) (string, error)
17 | Close()
18 | }
19 |
20 | func getBucket() (string, error) {
21 | env := os.Getenv("ENV")
22 | if env == "" {
23 | return "", errors.Wrap(fmt.Errorf("ENV environment variable is not present"), "")
24 | }
25 | return fmt.Sprintf("dcr-%s-hub", env), nil
26 | }
27 |
28 | func GetStorage(ctx context.Context) (Storage, error) {
29 | storageType := os.Getenv("STORAGE_TYPE")
30 | if storageType == "" {
31 | storageType = "MOCK"
32 | }
33 | var storage Storage
34 | bucket, err := getBucket()
35 | if err != nil {
36 | return storage, err
37 | }
38 | if storageType == "GCP" {
39 | storage, err = NewGoogleCloudStorage(ctx, bucket)
40 | } else if storageType == "MINIO" {
41 | storage, err = NewMinioStorage(ctx, bucket)
42 | } else if storageType == "MOCK" {
43 | storage = NewMockStorage(ctx)
44 | }
45 | return storage, err
46 | }
47 |
--------------------------------------------------------------------------------
/app/api/biz/pkg/utils/BUILD.bazel:
--------------------------------------------------------------------------------
1 | load("@rules_go//go:def.bzl", "go_library")
2 |
3 | go_library(
4 | name = "utils",
5 | srcs = ["resp.go"],
6 | importpath = "github.com/manatee-project/manatee/app/api/biz/pkg/utils",
7 | visibility = ["//visibility:public"],
8 | deps = [
9 | "//app/api/biz/pkg/errno",
10 | "@com_github_cloudwego_hertz//pkg/app",
11 | "@com_github_gin_gonic_gin//:gin",
12 | ],
13 | )
14 |
--------------------------------------------------------------------------------
/app/api/biz/pkg/utils/resp.go:
--------------------------------------------------------------------------------
1 | // Copyright 2024 TikTok Pte. Ltd.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package utils
16 |
17 | import (
18 | "errors"
19 | "net/http"
20 |
21 | "github.com/cloudwego/hertz/pkg/app"
22 | "github.com/gin-gonic/gin"
23 |
24 | "github.com/manatee-project/manatee/app/api/biz/pkg/errno"
25 | )
26 |
27 | type BaseResp struct {
28 | StatusCode int32
29 | StatusMsg string
30 | }
31 |
32 | // BuildBaseResp convert error and build BaseResp
33 | func BuildBaseResp(err error) *BaseResp {
34 | if err == nil {
35 | return baseResp(errno.Success)
36 | }
37 |
38 | e := errno.ErrNo{}
39 | if errors.As(err, &e) {
40 | return baseResp(e)
41 | }
42 |
43 | s := errno.ServiceErr.WithMessage(err.Error())
44 | return baseResp(s)
45 | }
46 |
47 | // baseResp build BaseResp from error
48 | func baseResp(err errno.ErrNo) *BaseResp {
49 | return &BaseResp{
50 | StatusCode: err.ErrCode,
51 | StatusMsg: err.ErrMsg,
52 | }
53 | }
54 |
55 | func ReturnsJSONError(c *app.RequestContext, err error) {
56 | resp := BuildBaseResp(err)
57 | c.JSON(http.StatusOK, gin.H{"code": resp.StatusCode, "msg": resp.StatusMsg})
58 | c.Abort()
59 | }
60 |
--------------------------------------------------------------------------------
/app/api/biz/router/BUILD.bazel:
--------------------------------------------------------------------------------
1 | load("@rules_go//go:def.bzl", "go_library")
2 |
3 | go_library(
4 | name = "router",
5 | srcs = ["register.go"],
6 | importpath = "github.com/manatee-project/manatee/app/api/biz/router",
7 | visibility = ["//visibility:public"],
8 | deps = [
9 | "//app/api/biz/router/job",
10 | "@com_github_cloudwego_hertz//pkg/app/server",
11 | ],
12 | )
13 |
--------------------------------------------------------------------------------
/app/api/biz/router/job/BUILD.bazel:
--------------------------------------------------------------------------------
1 | load("@rules_go//go:def.bzl", "go_library")
2 |
3 | go_library(
4 | name = "job",
5 | srcs = [
6 | "job.go",
7 | "middleware.go",
8 | ],
9 | importpath = "github.com/manatee-project/manatee/app/api/biz/router/job",
10 | visibility = ["//visibility:public"],
11 | deps = [
12 | "//app/api/biz/handler/job",
13 | "@com_github_cloudwego_hertz//pkg/app",
14 | "@com_github_cloudwego_hertz//pkg/app/server",
15 | ],
16 | )
17 |
--------------------------------------------------------------------------------
/app/api/biz/router/job/job.go:
--------------------------------------------------------------------------------
1 | // Code generated by hertz generator. DO NOT EDIT.
2 |
3 | package job
4 |
5 | import (
6 | "github.com/cloudwego/hertz/pkg/app/server"
7 | job "github.com/manatee-project/manatee/app/api/biz/handler/job"
8 | )
9 |
10 | /*
11 | This file will register all the routes of the services in the master idl.
12 | And it will update automatically when you use the "update" command for the idl.
13 | So don't modify the contents of the file, or your code will be deleted when it is updated.
14 | */
15 |
16 | // Register register routes based on the IDL 'api.${HTTP Method}' annotation.
17 | func Register(r *server.Hertz) {
18 |
19 | root := r.Group("/", rootMw()...)
20 | {
21 | _v1 := root.Group("/v1", _v1Mw()...)
22 | {
23 | _job := _v1.Group("/job", _jobMw()...)
24 | {
25 | _attestation := _job.Group("/attestation", _attestationMw()...)
26 | _attestation.POST("/", append(_queryjobattestationreportMw(), job.QueryJobAttestationReport)...)
27 | }
28 | {
29 | _delete := _job.Group("/delete", _deleteMw()...)
30 | _delete.POST("/", append(_deletejobMw(), job.DeleteJob)...)
31 | }
32 | {
33 | _output := _job.Group("/output", _outputMw()...)
34 | {
35 | _download := _output.Group("/download", _downloadMw()...)
36 | _download.POST("/", append(_downloadjoboutputMw(), job.DownloadJobOutput)...)
37 | }
38 | }
39 | {
40 | _query := _job.Group("/query", _queryMw()...)
41 | _query.POST("/", append(_queryjobMw(), job.QueryJob)...)
42 | }
43 | {
44 | _submit := _job.Group("/submit", _submitMw()...)
45 | _submit.POST("/", append(_submitjobMw(), job.SubmitJob)...)
46 | }
47 | }
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/app/api/biz/router/job/middleware.go:
--------------------------------------------------------------------------------
1 | // Code generated by hertz generator.
2 |
3 | package job
4 |
5 | import (
6 | "github.com/cloudwego/hertz/pkg/app"
7 | )
8 |
9 | func rootMw() []app.HandlerFunc {
10 | // your code...
11 | return nil
12 | }
13 |
14 | func _v1Mw() []app.HandlerFunc {
15 | // your code...
16 | return nil
17 | }
18 |
19 | func _jobMw() []app.HandlerFunc {
20 | // your code...
21 | return nil
22 | }
23 |
24 | func _deleteMw() []app.HandlerFunc {
25 | // your code...
26 | return nil
27 | }
28 |
29 | func _deletejobMw() []app.HandlerFunc {
30 | // your code...
31 | return nil
32 | }
33 |
34 | func _queryMw() []app.HandlerFunc {
35 | // your code...
36 | return nil
37 | }
38 |
39 | func _queryjobMw() []app.HandlerFunc {
40 | // your code...
41 | return nil
42 | }
43 |
44 | func _submitMw() []app.HandlerFunc {
45 | // your code...
46 | return nil
47 | }
48 |
49 | func _createjobMw() []app.HandlerFunc {
50 | // your code...
51 | return nil
52 | }
53 |
54 | func _updateMw() []app.HandlerFunc {
55 | // your code...
56 | return nil
57 | }
58 |
59 | func _fileMw() []app.HandlerFunc {
60 | // your code...
61 | return nil
62 | }
63 |
64 | func _attrsMw() []app.HandlerFunc {
65 | // your code...
66 | return nil
67 | }
68 |
69 | func _queryjoboutputattrMw() []app.HandlerFunc {
70 | // your code...
71 | return nil
72 | }
73 |
74 | func _downloadMw() []app.HandlerFunc {
75 | // your code...
76 | return nil
77 | }
78 |
79 | func _downloadjoboutputMw() []app.HandlerFunc {
80 | // your code...
81 | return nil
82 | }
83 |
84 | func _attestationMw() []app.HandlerFunc {
85 | // your code...
86 | return nil
87 | }
88 |
89 | func _queryjobattestationreportMw() []app.HandlerFunc {
90 | // your code...
91 | return nil
92 | }
93 |
94 | func _outputMw() []app.HandlerFunc {
95 | // your code...
96 | return nil
97 | }
98 |
99 | func _submitjobMw() []app.HandlerFunc {
100 | // your code...
101 | return nil
102 | }
103 |
--------------------------------------------------------------------------------
/app/api/biz/router/register.go:
--------------------------------------------------------------------------------
1 | // Code generated by hertz generator. DO NOT EDIT.
2 |
3 | package router
4 |
5 | import (
6 | "github.com/cloudwego/hertz/pkg/app/server"
7 | job "github.com/manatee-project/manatee/app/api/biz/router/job"
8 | )
9 |
10 | // GeneratedRegister registers routers generated by IDL.
11 | func GeneratedRegister(r *server.Hertz) {
12 | //INSERT_POINT: DO NOT DELETE THIS LINE!
13 | job.Register(r)
14 | }
15 |
--------------------------------------------------------------------------------
/app/api/biz/service/BUILD.bazel:
--------------------------------------------------------------------------------
1 | load("@rules_go//go:def.bzl", "go_library", "go_test")
2 |
3 | go_library(
4 | name = "service",
5 | srcs = ["job_service.go"],
6 | importpath = "github.com/manatee-project/manatee/app/api/biz/service",
7 | visibility = ["//visibility:public"],
8 | deps = [
9 | "//app/api/biz/dal/db",
10 | "//app/api/biz/model/job",
11 | "//app/api/biz/pkg/errno",
12 | "//app/api/biz/pkg/storage",
13 | "@com_github_cloudwego_hertz//pkg/common/hlog",
14 | "@com_github_google_uuid//:uuid",
15 | "@com_github_pkg_errors//:errors",
16 | ],
17 | )
18 |
19 | go_test(
20 | name = "service_test",
21 | srcs = ["job_service_test.go"],
22 | embed = [":service"],
23 | )
24 |
--------------------------------------------------------------------------------
/app/api/biz/service/job_service_test.go:
--------------------------------------------------------------------------------
1 | package service
2 |
3 | import (
4 | "context"
5 | "os"
6 | "strings"
7 | "testing"
8 | )
9 |
10 | var expectedDockerfile1 string = `ARG BASE_IMAGE
11 | FROM $BASE_IMAGE
12 | ARG OUTPUTPATH
13 | ARG JUPYTER_FILENAME
14 | ARG USER_WORKSPACE
15 | ARG CUSTOMTOKEN_CLOUDSTORAGE_PATH
16 |
17 | ENV OUTPUTPATH=$OUTPUTPATH
18 | ENV JUPYTER_FILENAME=$JUPYTER_FILENAME
19 | ENV CUSTOMTOKEN_CLOUDSTORAGE_PATH=$CUSTOMTOKEN_CLOUDSTORAGE_PATH
20 |
21 | WORKDIR /home/jovyan
22 | COPY $USER_WORKSPACE/* ./
23 |
24 |
25 | ENTRYPOINT jupyter nbconvert --execute --to notebook --inplace $JUPYTER_FILENAME --ExecutePreprocessor.timeout=-1 --allow-errors \
26 | && hash=$(md5sum $JUPYTER_FILENAME | awk '{ print $1 }') \
27 | && ./gscp $JUPYTER_FILENAME $OUTPUTPATH \
28 | && ./gen_custom_token --nonce $hash \
29 | && ./gscp custom_token $CUSTOMTOKEN_CLOUDSTORAGE_PATH
30 | `
31 |
32 | func TestGenerateDockerfile(t *testing.T) {
33 | os.Setenv("STORAGE_TYPE", "MOCK")
34 | os.Setenv("ENV", "minikube")
35 | js := NewJobService(context.Background())
36 |
37 | content := js.generateDockerfile([]string{})
38 | if strings.Contains(content, `LABEL "tee.launch_policy.allow_env_override"`) {
39 | t.Errorf("Dockerfile contains wrong allow_env_override policy")
40 | }
41 | content = js.generateDockerfile([]string{"USER_TOKEN"})
42 | if !strings.Contains(content, `LABEL "tee.launch_policy.allow_env_override"="USER_TOKEN"`) {
43 | t.Errorf("Dockerfile does not contain correct allow_env_override policy")
44 | }
45 | content = js.generateDockerfile([]string{"USER_TOKEN", "CUSTOM_ENV_VAR", "BREAKPOINT"})
46 | if !strings.Contains(content, `LABEL "tee.launch_policy.allow_env_override"="USER_TOKEN,CUSTOM_ENV_VAR,BREAKPOINT"`) {
47 | t.Errorf("Dockerfile does not contain correct allow_env_override policy")
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/app/api/idl/job.thrift:
--------------------------------------------------------------------------------
1 | namespace go job
2 |
3 | enum JobStatus {
4 | Created = 0
5 | ImageBuilding = 1
6 | ImageBuildingFailed = 2
7 | VMWaiting = 3
8 | VMRunning = 4
9 | VMFinished = 5
10 | VMKilled = 6
11 | VMFailed = 7
12 | VMOther = 8
13 | VMLaunchFailed = 9
14 | }
15 |
16 | struct Job {
17 | 1: i64 id
18 | 2: string uuid
19 | 3: string creator
20 | 4: JobStatus job_status
21 | 5: string jupyter_file_name
22 | 6: string created_at
23 | 7: string updated_at
24 | }
25 |
26 | struct Env {
27 | 1: string key
28 | 2: string value
29 | }
30 |
31 | struct SubmitJobRequest{
32 | 1: string jupyter_file_name (api.body="filename", api.vd="len($) > 0 && len($) < 128 && regexp('^.*\\.ipynb$') && !regexp('.*\\.\\..*')")
33 | 2: string creator (api.body="creator", api.vd="len($) > 0 && len($) < 32 && !regexp('.*\\.\\..*')")
34 | 3: list envs (api.body="envs", api.json="envs")
35 | 255: required string access_token (api.header="Authorization")
36 | }
37 |
38 | struct SubmitJobResponse{
39 | 1: i32 code
40 | 2: string msg
41 | 3: string uuid
42 | }
43 |
44 | struct QueryJobRequest {
45 | 1: i64 page (api.body="page", api.query="page",api.vd="$>0")
46 | 2: i64 page_size (api.body="page_size", api.query="page_size", api.vd="$ > 0 || $ <= 100")
47 | 3: string creator (api.body="creator", api.vd="len($) > 0 && len($) < 32 && !regexp('.*\\.\\..*')")
48 | 255: required string access_token (api.header="Authorization")
49 | }
50 |
51 | struct QueryJobResponse {
52 | 1: i32 code
53 | 2: string msg
54 | 3: list jobs
55 | 4: i64 total
56 | }
57 |
58 | struct DeleteJobRequest {
59 | 1: string uuid (api.body="uuid", api.query="uuid")
60 | 2: string creator (api.body="creator", api.vd="len($) > 0 && len($) < 32 && !regexp('.*\\.\\..*')")
61 | 255: required string access_token (api.header="Authorization")
62 | }
63 |
64 | struct DeleteJobResponse {
65 | 1: i32 code
66 | 2: string msg
67 | }
68 |
69 | struct DownloadJobOutputRequest {
70 | 1: i64 id (api.body="id", api.query="id", api.vd="$>0")
71 | 2: string creator (api.body="creator", api.vd="len($) > 0 && len($) < 32 && !regexp('.*\\.\\..*')")
72 | 255: required string access_token (api.header="Authorization")
73 | }
74 |
75 | struct DownloadJobOutputResponse {
76 | 1: i32 code
77 | 2: string msg
78 | 3: string signed_url
79 | 4: string filename
80 | }
81 |
82 | struct QueryJobAttestationRequest {
83 | 1: i64 id (api.body="id", api.query="id", api.vd="$>0")
84 | 2: string creator (api.body="creator", api.vd="len($) > 0 && len($) < 32 && !regexp('.*\\.\\..*')")
85 | }
86 |
87 | struct QueryJobAttestationResponse {
88 | 1: i32 code
89 | 2: string msg
90 | 3: string signed_url
91 | }
92 |
93 | service JobHandler {
94 | SubmitJobResponse SubmitJob(1:SubmitJobRequest req)(api.post="/v1/job/submit/")
95 | QueryJobResponse QueryJob(1:QueryJobRequest req)(api.post="/v1/job/query/")
96 | DeleteJobResponse DeleteJob(1:DeleteJobRequest req)(api.post="/v1/job/delete/")
97 | DownloadJobOutputResponse DownloadJobOutput(1:DownloadJobOutputRequest req) (api.post="/v1/job/output/download/")
98 | QueryJobAttestationResponse QueryJobAttestationReport(1:QueryJobAttestationRequest req) (api.post="/v1/job/attestation/")
99 | }
--------------------------------------------------------------------------------
/app/api/main.go:
--------------------------------------------------------------------------------
1 | // Code generated by hertz generator.
2 |
3 | package main
4 |
5 | import (
6 | "github.com/cloudwego/hertz/pkg/app/server"
7 |
8 | "github.com/manatee-project/manatee/app/api/biz/dal"
9 | )
10 |
11 | func Init() {
12 | dal.Init()
13 | }
14 |
15 | func main() {
16 | Init()
17 | h := server.Default(server.WithHostPorts(":8080"))
18 |
19 | register(h)
20 | h.Spin()
21 | }
22 |
--------------------------------------------------------------------------------
/app/api/router.go:
--------------------------------------------------------------------------------
1 | // Code generated by hertz generator.
2 |
3 | package main
4 |
5 | import (
6 | "github.com/cloudwego/hertz/pkg/app/server"
7 | handler "github.com/manatee-project/manatee/app/api/biz/handler"
8 | )
9 |
10 | // customizeRegister registers customize routers.
11 | func customizedRegister(r *server.Hertz) {
12 | r.GET("/health", handler.Health)
13 |
14 | // your code ...
15 | }
16 |
--------------------------------------------------------------------------------
/app/api/router_gen.go:
--------------------------------------------------------------------------------
1 | // Code generated by hertz generator. DO NOT EDIT.
2 |
3 | package main
4 |
5 | import (
6 | "github.com/cloudwego/hertz/pkg/app/server"
7 | router "github.com/manatee-project/manatee/app/api/biz/router"
8 | )
9 |
10 | // register registers all routers.
11 | func register(r *server.Hertz) {
12 |
13 | router.GeneratedRegister(r)
14 |
15 | customizedRegister(r)
16 | }
17 |
--------------------------------------------------------------------------------
/app/api/script/bootstrap.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | CURDIR=$(cd $(dirname $0); pwd)
3 | BinaryName=hertz_service
4 | echo "$CURDIR/bin/${BinaryName}"
5 | exec $CURDIR/bin/${BinaryName}
--------------------------------------------------------------------------------
/app/executor/.gitignore:
--------------------------------------------------------------------------------
1 | conf
2 | github.com
--------------------------------------------------------------------------------
/app/executor/BUILD.bazel:
--------------------------------------------------------------------------------
1 | load("@rules_oci//oci:defs.bzl", "oci_image", "oci_load")
2 | load("@rules_pkg//pkg:tar.bzl", "pkg_tar")
3 |
4 | pkg_tar(
5 | name = "gen_custom_token_tar",
6 | srcs = [
7 | "//app/executor/attestation:gen_custom_token",
8 | ],
9 | package_dir = "/home/jovyan",
10 | )
11 |
12 | oci_image(
13 | name = "image",
14 | base = "@scipy-notebook_linux_amd64",
15 | tars = [
16 | ":gen_custom_token_tar",
17 | ],
18 | # FIXME: for some reason, pkg_tar changes the owner of /home/jovyan.
19 | # run it as root for now, but it will go away once we fully switch to distroless image
20 | # https://github.com/manatee-project/manatee/issues/6
21 | user = "root:root",
22 | visibility = ["//visibility:public"],
23 | )
24 |
25 | oci_load(
26 | name = "load_image",
27 | image = ":image",
28 | repo_tags = ["executor:latest"],
29 | visibility = ["//visibility:public"],
30 | )
31 |
--------------------------------------------------------------------------------
/app/executor/attestation/BUILD.bazel:
--------------------------------------------------------------------------------
1 | load("@rules_go//go:def.bzl", "go_binary", "go_library")
2 |
3 | go_library(
4 | name = "attestation_lib",
5 | srcs = ["main.go"],
6 | importpath = "github.com/manatee-project/manatee/app/executor/attestation",
7 | visibility = ["//visibility:private"],
8 | deps = ["@com_github_pkg_errors//:errors"],
9 | )
10 |
11 | go_binary(
12 | name = "gen_custom_token",
13 | embed = [":attestation_lib"],
14 | goarch = "amd64",
15 | goos = "linux",
16 | visibility = ["//visibility:public"],
17 | )
18 |
--------------------------------------------------------------------------------
/app/executor/attestation/main.go:
--------------------------------------------------------------------------------
1 | // Copyright 2024 TikTok Pte. Ltd.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package main
16 |
17 | import (
18 | "context"
19 | "encoding/json"
20 | "flag"
21 | "fmt"
22 | "io"
23 | "log"
24 | "net"
25 | "net/http"
26 | "os"
27 | "strings"
28 |
29 | "github.com/pkg/errors"
30 | )
31 |
32 | const TikTokAudience = "https://research.tiktok.com/"
33 | const TokenFilename = "custom_token"
34 |
35 | type CustomToken struct {
36 | Audience string `json:"audience"`
37 | Nonces []string `json:"nonces"` // each nonce must be min 64bits
38 | TokenType string `json:"token_type"`
39 | }
40 |
41 | func GcsCustomAttestationToken(nonce string) ([]byte, error) {
42 | request := CustomToken{
43 | Audience: TikTokAudience,
44 | Nonces: []string{nonce},
45 | TokenType: "OIDC",
46 | }
47 | httpClient := http.Client{
48 | Transport: &http.Transport{
49 | DialContext: func(_ context.Context, _, _ string) (net.Conn, error) {
50 | return net.Dial("unix", "/run/container_launcher/teeserver.sock")
51 | },
52 | },
53 | }
54 | customJSON, err := json.Marshal(request)
55 | if err != nil {
56 | return nil, errors.Wrap(err, "failed to marshal request")
57 | }
58 | url := "http://localhost/v1/token"
59 | resp, err := httpClient.Post(url, "application/json", strings.NewReader(string(customJSON)))
60 | if err != nil {
61 | return nil, errors.Wrap(err, "faile to get custom token")
62 | }
63 | defer resp.Body.Close()
64 | tokenbytes, err := io.ReadAll(resp.Body)
65 | if err != nil {
66 | return nil, errors.Wrap(err, "faile to read from response")
67 | }
68 |
69 | return tokenbytes, nil
70 | }
71 |
72 | func generateCustomAttestationToken(nonce string) ([]byte, error) {
73 | if os.Getenv("TEE_BACKEND") == "MOCK" {
74 | return []byte(fmt.Sprintf("mock tee token with nonce %s", nonce)), nil
75 | } else {
76 | return GcsCustomAttestationToken(nonce)
77 | }
78 | }
79 |
80 | func requireParameter(name string, para string) {
81 | if para == "" {
82 | fmt.Printf("ERROR: %s parameter is required \n", name)
83 | flag.PrintDefaults()
84 | os.Exit(1)
85 | }
86 | }
87 |
88 | func main() {
89 | nonce := flag.String("nonce", "", "The nonce to generate custom token")
90 | flag.Parse()
91 | requireParameter("nonce", *nonce)
92 | customToken, err := generateCustomAttestationToken(*nonce)
93 | if err != nil {
94 | fmt.Printf("ERROR: failed to generate custom token %+v \n", err)
95 | panic(err)
96 | }
97 |
98 | err = os.WriteFile(TokenFilename, customToken, 0644)
99 | if err != nil {
100 | fmt.Printf("ERROR: failed to write custom token to file %+v \n", err)
101 | log.Fatal(err)
102 | }
103 | }
104 |
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/.dockerignore:
--------------------------------------------------------------------------------
1 | *.bundle.*
2 | lib/
3 | node_modules/
4 | *.log
5 | .eslintcache
6 | .stylelintcache
7 | *.egg-info/
8 | .ipynb_checkpoints
9 | *.tsbuildinfo
10 | labextension
11 | # Version file is handled by hatchling
12 | jupyterlab_manatee/_version.py
13 |
14 | # Integration tests
15 | ui-tests/test-results/
16 | ui-tests/playwright-report/
17 |
18 | # Created by https://www.gitignore.io/api/python
19 | # Edit at https://www.gitignore.io/?templates=python
20 |
21 | ### Python ###
22 | # Byte-compiled / optimized / DLL files
23 | __pycache__/
24 | *.py[cod]
25 | *$py.class
26 |
27 | # C extensions
28 | *.so
29 |
30 | # Distribution / packaging
31 | .Python
32 | build/
33 | develop-eggs/
34 | dist/
35 | downloads/
36 | eggs/
37 | .eggs/
38 | lib/
39 | lib64/
40 | parts/
41 | sdist/
42 | var/
43 | wheels/
44 | pip-wheel-metadata/
45 | share/python-wheels/
46 | .installed.cfg
47 | *.egg
48 | MANIFEST
49 |
50 | # PyInstaller
51 | # Usually these files are written by a python script from a template
52 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
53 | *.manifest
54 | *.spec
55 |
56 | # Installer logs
57 | pip-log.txt
58 | pip-delete-this-directory.txt
59 |
60 | # Unit test / coverage reports
61 | htmlcov/
62 | .tox/
63 | .nox/
64 | .coverage
65 | .coverage.*
66 | .cache
67 | nosetests.xml
68 | coverage/
69 | coverage.xml
70 | *.cover
71 | .hypothesis/
72 | .pytest_cache/
73 |
74 | # Translations
75 | *.mo
76 | *.pot
77 |
78 | # Scrapy stuff:
79 | .scrapy
80 |
81 | # Sphinx documentation
82 | docs/_build/
83 |
84 | # PyBuilder
85 | target/
86 |
87 | # pyenv
88 | .python-version
89 |
90 | # celery beat schedule file
91 | celerybeat-schedule
92 |
93 | # SageMath parsed files
94 | *.sage.py
95 |
96 | # Spyder project settings
97 | .spyderproject
98 | .spyproject
99 |
100 | # Rope project settings
101 | .ropeproject
102 |
103 | # Mr Developer
104 | .mr.developer.cfg
105 | .project
106 | .pydevproject
107 |
108 | # mkdocs documentation
109 | /site
110 |
111 | # mypy
112 | .mypy_cache/
113 | .dmypy.json
114 | dmypy.json
115 |
116 | # Pyre type checker
117 | .pyre/
118 |
119 | # End of https://www.gitignore.io/api/python
120 |
121 | # OSX files
122 | .DS_Store
123 |
124 | # Yarn cache
125 | .yarn/
126 |
127 | cheat-sheet.md
128 | *.yml
129 | !.yarnrc.yml
130 | build_pkg.sh
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/.gitignore:
--------------------------------------------------------------------------------
1 | *.bundle.*
2 | lib/
3 | node_modules/
4 | *.log
5 | .eslintcache
6 | .stylelintcache
7 | *.egg-info/
8 | .ipynb_checkpoints
9 | *.tsbuildinfo
10 | jupyterlab_manatee/labextension
11 | # Version file is handled by hatchling
12 | jupyterlab_manatee/_version.py
13 |
14 | # Integration tests
15 | ui-tests/test-results/
16 | ui-tests/playwright-report/
17 |
18 | # Created by https://www.gitignore.io/api/python
19 | # Edit at https://www.gitignore.io/?templates=python
20 |
21 | ### Python ###
22 | # Byte-compiled / optimized / DLL files
23 | __pycache__/
24 | *.py[cod]
25 | *$py.class
26 |
27 | # C extensions
28 | *.so
29 |
30 | # Distribution / packaging
31 | .Python
32 | build/
33 | develop-eggs/
34 | dist/
35 | downloads/
36 | eggs/
37 | .eggs/
38 | lib/
39 | lib64/
40 | parts/
41 | sdist/
42 | var/
43 | wheels/
44 | pip-wheel-metadata/
45 | share/python-wheels/
46 | .installed.cfg
47 | *.egg
48 | MANIFEST
49 |
50 | # PyInstaller
51 | # Usually these files are written by a python script from a template
52 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
53 | *.manifest
54 | *.spec
55 |
56 | # Installer logs
57 | pip-log.txt
58 | pip-delete-this-directory.txt
59 |
60 | # Unit test / coverage reports
61 | htmlcov/
62 | .tox/
63 | .nox/
64 | .coverage
65 | .coverage.*
66 | .cache
67 | nosetests.xml
68 | coverage/
69 | coverage.xml
70 | *.cover
71 | .hypothesis/
72 | .pytest_cache/
73 |
74 | # Translations
75 | *.mo
76 | *.pot
77 |
78 | # Scrapy stuff:
79 | .scrapy
80 |
81 | # Sphinx documentation
82 | docs/_build/
83 |
84 | # PyBuilder
85 | target/
86 |
87 | # pyenv
88 | .python-version
89 |
90 | # celery beat schedule file
91 | celerybeat-schedule
92 |
93 | # SageMath parsed files
94 | *.sage.py
95 |
96 | # Spyder project settings
97 | .spyderproject
98 | .spyproject
99 |
100 | # Rope project settings
101 | .ropeproject
102 |
103 | # Mr Developer
104 | .mr.developer.cfg
105 | .project
106 | .pydevproject
107 |
108 | # mkdocs documentation
109 | /site
110 |
111 | # mypy
112 | .mypy_cache/
113 | .dmypy.json
114 | dmypy.json
115 |
116 | # Pyre type checker
117 | .pyre/
118 |
119 | # End of https://www.gitignore.io/api/python
120 |
121 | # OSX files
122 | .DS_Store
123 |
124 | # Yarn cache
125 | .yarn/
126 |
127 | cheat-sheet.md
128 | *.yml
129 | !.yarnrc.yml
130 | build_pkg.sh
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/.yarnrc.yml:
--------------------------------------------------------------------------------
1 | nodeLinker: node-modules
2 |
3 | npmRegistryServer: "https://registry.yarnpkg.com/"
4 |
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/20custom-hook.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # this hook is executed before notebook starts.
4 | pip install /manatee/jupyterlab_manatee-0.0.0-py3-none-any.whl
5 | jupyter labextension disable @jupyterlab/docmanager-extension:download
6 | jupyter labextension disable @jupyterlab/filebrowser-extension:download
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/BUILD.bazel:
--------------------------------------------------------------------------------
1 | load("@bazel_tools//tools/build_defs/pkg:pkg.bzl", "pkg_tar")
2 | load("@pydeps//:requirements.bzl", "all_requirements")
3 | load("@rules_oci//oci:defs.bzl", "oci_image", "oci_load")
4 | load("@rules_python//python:pip.bzl", "compile_pip_requirements")
5 | load("@rules_python//python/entry_points:py_console_script_binary.bzl", "py_console_script_binary")
6 |
7 | compile_pip_requirements(
8 | # base name for generated targets, typically "requirements".
9 | name = "requirements",
10 | requirements_in = "requirements.in",
11 | requirements_linux = "requirements_linux.txt",
12 | requirements_txt = "requirements.txt",
13 | )
14 |
15 | py_console_script_binary(
16 | name = "jlpm",
17 | pkg = "@pydeps//jupyterlab",
18 | script = "jlpm",
19 | )
20 |
21 | # needed for "jupyter labextension" command invoked by jlpm
22 | py_console_script_binary(
23 | name = "jupyter-labextension",
24 | pkg = "@pydeps//jupyterlab",
25 | script = "jupyter-labextension",
26 | )
27 |
28 | # needed for "jupyter" command invoked by jlpm
29 | py_console_script_binary(
30 | name = "jupyter",
31 | pkg = "@pydeps//jupyter_core",
32 | script = "jupyter",
33 | )
34 |
35 | py_console_script_binary(
36 | name = "pyproject-build",
37 | pkg = "@pydeps//build",
38 | script = "pyproject-build",
39 | )
40 |
41 | # always use 0.0.0 for dev wheel
42 | dev_wheel_name = "jupyterlab_manatee-0.0.0-py3-none-any.whl"
43 |
44 | genrule(
45 | name = "build_wheel",
46 | srcs = glob([
47 | # python server
48 | "jupyterlab_manatee/*.py",
49 | "jupyter-config/**/*.json",
50 | # frontend
51 | "src/*.ts",
52 | "src/*.tsx",
53 | "style/*.css",
54 | "style/*.js",
55 | ]) + [
56 | # pyproject-build
57 | "pyproject.toml",
58 | "LICENSE",
59 | "README.md",
60 | # tsc project files
61 | "tsconfig.json",
62 | # npm dependencies
63 | "package.json",
64 | "yarn.lock",
65 | # yarnrc to use obsolete `node-modules` directory
66 | ".yarnrc.yml",
67 | ],
68 | # always use 0.0.0 for dev version
69 | outs = [dev_wheel_name],
70 | cmd = "\n".join([
71 | "export NODE=$$(realpath $(location @nodejs//:node_bin))",
72 | "export JLPM=$$(realpath $(location :jlpm))",
73 | "export JUPYTER=$$(realpath $(location :jupyter))",
74 | "export PATH=$$(dirname $$NODE):$$(dirname $$JLPM):$$(dirname $$JUPYTER):$$PATH",
75 | "export TEMP=$$(mktemp -d)",
76 | "export PYBUILD=$$(realpath $(location :pyproject-build))",
77 | "export NPM_CONFIG_USERCONFIG=$$TEMP/npmrc",
78 |
79 | # copy all source code into build directory under execroot
80 | "mkdir -p build",
81 | "cp -L -R $$(dirname $(execpath package.json))/* build",
82 | "cd build",
83 |
84 | # yarn config
85 | "jlpm config set globalFolder $$TEMP/npm",
86 | "jlpm config set nodeLinker node-modules",
87 | "jlpm config set npmRegistryServer \"https://registry.yarnpkg.com/\"",
88 |
89 | # yarn install (print log only on error)
90 | "output=$$(jlpm install 2>&1) || echo $$output",
91 |
92 | # build jupyter labextension
93 | "jlpm run build",
94 |
95 | # build python package using pyproject.toml
96 | "$$PYBUILD",
97 | "cd ..",
98 |
99 | # copy output
100 | "cp build/dist/jupyterlab_manatee-*.whl $(location {})".format(dev_wheel_name),
101 | ]),
102 | tools = [
103 | ":jlpm",
104 | ":jupyter",
105 | ":jupyter-labextension",
106 | ":pyproject-build",
107 | "@nodejs//:node_bin",
108 | ],
109 | # toolchains=["@rules_python//python:current_py_toolchain"],
110 | )
111 |
112 | pkg_tar(
113 | name = "dev_wheel_tar",
114 | srcs = [
115 | dev_wheel_name,
116 | ],
117 | package_dir = "/manatee/",
118 | )
119 |
120 | pkg_tar(
121 | name = "hooks_tar",
122 | srcs = [
123 | "20custom-hook.sh",
124 | ],
125 | package_dir = "/usr/local/bin/before-notebook.d/",
126 | )
127 |
128 | oci_image(
129 | name = "image",
130 | base = "@scipy-notebook_linux_amd64",
131 | tars = [
132 | "@noble//:flat",
133 | ":dev_wheel_tar",
134 | ":hooks_tar",
135 | ],
136 | visibility = ["//visibility:public"],
137 | )
138 |
139 | oci_load(
140 | name = "load_image",
141 | image = ":image",
142 | repo_tags = ["jupyterlab_manatee:latest"],
143 | visibility = ["//visibility:public"],
144 | )
145 |
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2023, Dayeol Lee
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | 1. Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | 3. Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/README.md:
--------------------------------------------------------------------------------
1 | # jupyterlab_manatee
2 |
3 | This is an open-source JupyterLab extension for ManaTEE framework
4 |
5 | ## Requirements
6 |
7 | - JupyterLab >= 4.0.0
8 |
9 | ## Contributing
10 |
11 | ### Development install
12 |
13 | Note: You will need NodeJS to build the extension package.
14 |
15 | The `jlpm` command is JupyterLab's pinned version of
16 | [yarn](https://yarnpkg.com/) that is installed with JupyterLab. You may use
17 | `yarn` or `npm` in lieu of `jlpm` below.
18 |
19 | ```bash
20 | # Clone the repo to your local environment
21 | # Change directory to the jupyterlab_manatee directory
22 | # Install package in development mode
23 | pip install -e "."
24 | # Link your development version of the extension with JupyterLab
25 | jupyter labextension develop . --overwrite
26 | # Rebuild extension Typescript source after making changes
27 | jlpm build
28 | ```
29 |
30 | You can watch the source directory and run JupyterLab at the same time in different terminals to watch for changes in the extension's source and automatically rebuild the extension.
31 |
32 | ```bash
33 | # Watch the source directory in one terminal, automatically rebuilding when needed
34 | jlpm watch
35 | # Run JupyterLab in another terminal
36 | jupyter lab
37 | ```
38 |
39 | With the watch command running, every saved change will immediately be built locally and available in your running JupyterLab. Refresh JupyterLab to load the change in your browser (you may need to wait several seconds for the extension to be rebuilt).
40 |
41 | By default, the `jlpm build` command generates the source maps for this extension to make it easier to debug using the browser dev tools. To also generate source maps for the JupyterLab core extensions, you can run the following command:
42 |
43 | ```bash
44 | jupyter lab build --minimize=False
45 | ```
46 |
47 | ### Development uninstall
48 |
49 | ```bash
50 | pip uninstall jupyterlab_manatee
51 | ```
52 |
53 | In development mode, you will also need to remove the symlink created by `jupyter labextension develop`
54 | command. To find its location, you can run `jupyter labextension list` to figure out where the `labextensions`
55 | folder is located. Then you can remove the symlink named `jupyterlab_manatee` within that folder.
56 |
57 | ### Testing the extension
58 |
59 | #### Frontend tests
60 |
61 | This extension is using [Jest](https://jestjs.io/) for JavaScript code testing.
62 |
63 | To execute them, execute:
64 |
65 | ```sh
66 | jlpm
67 | jlpm test
68 | ```
69 |
70 | #### Integration tests
71 |
72 | This extension uses [Playwright](https://playwright.dev/docs/intro) for the integration tests (aka user level tests).
73 | More precisely, the JupyterLab helper [Galata](https://github.com/jupyterlab/jupyterlab/tree/master/galata) is used to handle testing the extension in JupyterLab.
74 |
75 | More information are provided within the [ui-tests](./ui-tests/README.md) README.
76 |
77 | ### Packaging the extension
78 |
79 | See [RELEASE](RELEASE.md)
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/RELEASE.md:
--------------------------------------------------------------------------------
1 | # Making a new release of jupyterlab_manatee
2 |
3 | The extension can be published to `PyPI` and `npm` manually or using the [Jupyter Releaser](https://github.com/jupyter-server/jupyter_releaser).
4 |
5 | ## Manual release
6 |
7 | ### Python package
8 |
9 | This extension can be distributed as Python packages. All of the Python
10 | packaging instructions are in the `pyproject.toml` file to wrap your extension in a
11 | Python package. Before generating a package, you first need to install some tools:
12 |
13 | ```bash
14 | pip install build twine hatch
15 | ```
16 |
17 | Bump the version using `hatch`. By default this will create a tag.
18 | See the docs on [hatch-nodejs-version](https://github.com/agoose77/hatch-nodejs-version#semver) for details.
19 |
20 | ```bash
21 | hatch version
22 | ```
23 |
24 | Make sure to clean up all the development files before building the package:
25 |
26 | ```bash
27 | jlpm clean:all
28 | ```
29 |
30 | You could also clean up the local git repository:
31 |
32 | ```bash
33 | git clean -dfX
34 | ```
35 |
36 | To create a Python source package (`.tar.gz`) and the binary package (`.whl`) in the `dist/` directory, do:
37 |
38 | ```bash
39 | python -m build
40 | ```
41 |
42 | > `python setup.py sdist bdist_wheel` is deprecated and will not work for this package.
43 |
44 | Then to upload the package to PyPI, do:
45 |
46 | ```bash
47 | twine upload dist/*
48 | ```
49 |
50 | ### NPM package
51 |
52 | To publish the frontend part of the extension as a NPM package, do:
53 |
54 | ```bash
55 | npm login
56 | npm publish --access public
57 | ```
58 |
59 | ## Automated releases with the Jupyter Releaser
60 |
61 | The extension repository should already be compatible with the Jupyter Releaser.
62 |
63 | Check out the [workflow documentation](https://jupyter-releaser.readthedocs.io/en/latest/get_started/making_release_from_repo.html) for more information.
64 |
65 | Here is a summary of the steps to cut a new release:
66 |
67 | - Add tokens to the [Github Secrets](https://docs.github.com/en/actions/security-guides/encrypted-secrets) in the repository:
68 | - `ADMIN_GITHUB_TOKEN` (with "public_repo" and "repo:status" permissions); see the [documentation](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token)
69 | - `NPM_TOKEN` (with "automation" permission); see the [documentation](https://docs.npmjs.com/creating-and-viewing-access-tokens)
70 | - Set up PyPI
71 |
72 | Using PyPI trusted publisher (modern way)
73 |
74 | - Set up your PyPI project by [adding a trusted publisher](https://docs.pypi.org/trusted-publishers/adding-a-publisher/)
75 | - The _workflow name_ is `publish-release.yml` and the _environment_ should be left blank.
76 | - Ensure the publish release job as `permissions`: `id-token : write` (see the [documentation](https://docs.pypi.org/trusted-publishers/using-a-publisher/))
77 |
78 |
79 |
80 | Using PyPI token (legacy way)
81 |
82 | - If the repo generates PyPI release(s), create a scoped PyPI [token](https://packaging.python.org/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/#saving-credentials-on-github). We recommend using a scoped token for security reasons.
83 |
84 | - You can store the token as `PYPI_TOKEN` in your fork's `Secrets`.
85 |
86 | - Advanced usage: if you are releasing multiple repos, you can create a secret named `PYPI_TOKEN_MAP` instead of `PYPI_TOKEN` that is formatted as follows:
87 |
88 | ```text
89 | owner1/repo1,token1
90 | owner2/repo2,token2
91 | ```
92 |
93 | If you have multiple Python packages in the same repository, you can point to them as follows:
94 |
95 | ```text
96 | owner1/repo1/path/to/package1,token1
97 | owner1/repo1/path/to/package2,token2
98 | ```
99 |
100 |
101 |
102 | - Go to the Actions panel
103 | - Run the "Step 1: Prep Release" workflow
104 | - Check the draft changelog
105 | - Run the "Step 2: Publish Release" workflow
106 |
107 | ## Publishing to `conda-forge`
108 |
109 | If the package is not on conda forge yet, check the documentation to learn how to add it: https://conda-forge.org/docs/maintainer/adding_pkgs.html
110 |
111 | Otherwise a bot should pick up the new version publish to PyPI, and open a new PR on the feedstock repository automatically.
112 |
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/babel.config.js:
--------------------------------------------------------------------------------
1 | module.exports = require('@jupyterlab/testutils/lib/babel.config');
2 |
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/install.json:
--------------------------------------------------------------------------------
1 | {
2 | "packageManager": "python",
3 | "packageName": "jupyterlab_manatee",
4 | "uninstallInstructions": "Use your Python package manager (pip, conda, etc.) to uninstall the package jupyterlab_manatee"
5 | }
6 |
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/jest.config.js:
--------------------------------------------------------------------------------
1 | const jestJupyterLab = require('@jupyterlab/testutils/lib/jest-config');
2 |
3 | const esModules = [
4 | '@codemirror',
5 | '@jupyter/ydoc',
6 | '@jupyterlab/',
7 | 'lib0',
8 | 'nanoid',
9 | 'vscode-ws-jsonrpc',
10 | 'y-protocols',
11 | 'y-websocket',
12 | 'yjs'
13 | ].join('|');
14 |
15 | const baseConfig = jestJupyterLab(__dirname);
16 |
17 | module.exports = {
18 | ...baseConfig,
19 | automock: false,
20 | collectCoverageFrom: [
21 | 'src/**/*.{ts,tsx}',
22 | '!src/**/*.d.ts',
23 | '!src/**/.ipynb_checkpoints/*'
24 | ],
25 | coverageReporters: ['lcov', 'text'],
26 | testRegex: 'src/.*/.*.spec.ts[x]?$',
27 | transformIgnorePatterns: [`/node_modules/(?!${esModules}).+`]
28 | };
29 |
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/jupyter-config/jupyter_server_config.d/jupyterlab_manatee.json:
--------------------------------------------------------------------------------
1 | {
2 | "ServerApp": {
3 | "jpserver_extensions": {
4 | "jupyterlab_manatee": true
5 | }
6 | }
7 | }
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/jupyterlab_manatee/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 TikTok Pte. Ltd.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import jupyter_server
16 | from jupyter_server.utils import url_path_join
17 | from ._version import __version__
18 | from .handlers import *
19 |
20 | def _jupyter_server_extension_points():
21 | return [{
22 | 'module': 'jupyterlab_manatee'
23 | }]
24 |
25 | def _jupyter_labextension_paths():
26 | return [{
27 | "src": "labextension",
28 | "dest": "jupyterlab_manatee"
29 | }]
30 |
31 |
32 | def _load_jupyter_server_extension(serverapp: jupyter_server.serverapp.ServerApp):
33 | """
34 | Called when the extension is loaded.
35 | """
36 |
37 | web_app = serverapp.web_app
38 | base_url = web_app.settings['base_url']
39 | handlers = [
40 | (url_path_join(base_url, 'manatee', 'jobs'), DataCleanRoomJobHandler),
41 | (url_path_join(base_url, 'manatee', 'output'), DataCleanRoomOutputHandler), (url_path_join(base_url, 'manatee', 'attestation'), DataCleanRoomAttestationHandler),
42 | ]
43 | web_app.add_handlers('.*$', handlers)
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/noble.yaml:
--------------------------------------------------------------------------------
1 | # Packages for examples/debian_snapshot.
2 | #
3 | # Anytime this file is changed, the lockfile needs to be regenerated.
4 | #
5 | # To generate the bookworm.lock.json run the following command
6 | #
7 | # bazel run @bookworm//:lock
8 | #
9 | # See debian_package_index at WORKSPACE.bazel
10 | version: 1
11 |
12 | sources:
13 | - channel: noble main
14 | url: https://snapshot.ubuntu.com/ubuntu/20240301T030400Z
15 | - channel: noble-security main
16 | url: https://snapshot.ubuntu.com/ubuntu/20240301T030400Z
17 | - channel: noble-updates main
18 | url: https://snapshot.ubuntu.com/ubuntu/20240301T030400Z
19 |
20 | archs:
21 | - "amd64"
22 |
23 | packages:
24 | - "libblas-dev"
25 | - "liblapack-dev"
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["hatchling>=1.5.0", "jupyterlab>=4.0.0,<5", "hatch-nodejs-version>=0.3.2", "aiohttp", "aiofiles"]
3 | build-backend = "hatchling.build"
4 |
5 | [project]
6 | name = "jupyterlab_manatee"
7 | readme = "README.md"
8 | license = { file = "LICENSE" }
9 | requires-python = ">=3.8"
10 | classifiers = [
11 | "Framework :: Jupyter",
12 | "Framework :: Jupyter :: JupyterLab",
13 | "Framework :: Jupyter :: JupyterLab :: 4",
14 | "Framework :: Jupyter :: JupyterLab :: Extensions",
15 | "Framework :: Jupyter :: JupyterLab :: Extensions :: Prebuilt",
16 | "License :: OSI Approved :: BSD License",
17 | "Programming Language :: Python",
18 | "Programming Language :: Python :: 3",
19 | "Programming Language :: Python :: 3.8",
20 | "Programming Language :: Python :: 3.9",
21 | "Programming Language :: Python :: 3.10",
22 | "Programming Language :: Python :: 3.11",
23 | "Programming Language :: Python :: 3.12",
24 | ]
25 | dependencies = [
26 | "aiohttp",
27 | "aiofiles",
28 | "tornado>=6.3",
29 | ]
30 | dynamic = ["version", "description", "authors", "urls", "keywords"]
31 |
32 | [tool.hatch.version]
33 | source = "nodejs"
34 |
35 | [tool.hatch.metadata.hooks.nodejs]
36 | fields = ["description", "authors", "urls"]
37 |
38 | [tool.hatch.build.targets.sdist]
39 | artifacts = ["jupyterlab_manatee/labextension"]
40 | exclude = [".github", "binder"]
41 |
42 | [tool.hatch.build.targets.wheel.shared-data]
43 | "jupyter-config/jupyter_server_config.d" = "etc/jupyter/jupyter_server_config.d"
44 | "jupyterlab_manatee/labextension" = "share/jupyter/labextensions/jupyterlab_manatee"
45 | "install.json" = "share/jupyter/labextensions/jupyterlab_manatee/install.json"
46 |
47 | [tool.hatch.build.hooks.version]
48 | path = "jupyterlab_manatee/_version.py"
49 |
50 | [tool.hatch.build.hooks.jupyter-builder]
51 | dependencies = ["hatch-jupyter-builder>=0.5"]
52 | build-function = "hatch_jupyter_builder.npm_builder"
53 | ensured-targets = [
54 | "jupyterlab_manatee/labextension/static/style.js",
55 | "jupyterlab_manatee/labextension/package.json",
56 | ]
57 | skip-if-exists = ["jupyterlab_manatee/labextension/static/style.js"]
58 |
59 | [tool.hatch.build.hooks.jupyter-builder.build-kwargs]
60 | build_cmd = "build:prod"
61 | npm = ["jlpm"]
62 |
63 | [tool.hatch.build.hooks.jupyter-builder.editable-build-kwargs]
64 | build_cmd = "install:extension"
65 | npm = ["jlpm"]
66 | source_dir = "src"
67 | build_dir = "jupyterlab_manatee/labextension"
68 |
69 | [tool.jupyter-releaser.options]
70 | version_cmd = "hatch version"
71 |
72 | [tool.jupyter-releaser.hooks]
73 | before-build-npm = [
74 | "python -m pip install 'jupyterlab>=4.0.0,<5'",
75 | "jlpm",
76 | "jlpm build:prod"
77 | ]
78 | before-build-python = ["jlpm clean:all"]
79 |
80 | [tool.check-wheel-contents]
81 | ignore = ["W002"]
82 |
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/requirements.in:
--------------------------------------------------------------------------------
1 | jupyterhub==4.0.2
2 | jupyterlab
3 | twine
4 | hatch
5 | build
6 | setuptools
7 |
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/setup.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 TikTok Pte. Ltd.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | __import__("setuptools").setup()
16 |
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/src/__tests__/jupyterlab_manatee.spec.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2024 TikTok Pte. Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | /**
18 | * Example of [Jest](https://jestjs.io/docs/getting-started) unit tests
19 | */
20 |
21 | describe('jupyterlab_manatee', () => {
22 | it('should be tested', () => {
23 | expect(1 + 1).toEqual(2);
24 | });
25 | });
26 |
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/src/index.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2024 TikTok Pte. Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | import {
18 | JupyterFrontEnd,
19 | JupyterFrontEndPlugin,
20 | ILayoutRestorer,
21 | } from '@jupyterlab/application';
22 | import { IDocumentManager } from '@jupyterlab/docmanager';
23 | import { ITranslator } from '@jupyterlab/translation';
24 | import { DataCleanRoomSidebar } from './sidebar';
25 |
26 |
27 | async function activate(app: JupyterFrontEnd, docManager: IDocumentManager, translator: ITranslator, restorer: ILayoutRestorer | null) {
28 | console.log("JupyterLab extension jupyterlab_manatee is activated!");
29 |
30 | const sidebar = new DataCleanRoomSidebar({manager: docManager});
31 |
32 | app.shell.add(sidebar, 'right', {rank: 0});
33 |
34 | if (restorer) {
35 | restorer.add(sidebar, "data-clean-room-side-bar");
36 | }
37 | }
38 |
39 | /**
40 | * Initialization data for the jupyterlab-manatee extension.
41 | */
42 | const plugin: JupyterFrontEndPlugin = {
43 | id: 'jupyterlab_manatee:plugin',
44 | description: 'This is an open-source JupyterLab extension for ManaTEE framework',
45 | autoStart: true,
46 | requires: [IDocumentManager, ITranslator],
47 | optional: [ILayoutRestorer],
48 | activate: activate
49 | };
50 |
51 | export default plugin;
52 |
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/src/sidebar.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2024 TikTok Pte. Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | import { SidePanel, trustedIcon } from '@jupyterlab/ui-components';
18 | import { ITranslator, nullTranslator } from '@jupyterlab/translation';
19 | import { IDocumentManager } from '@jupyterlab/docmanager';
20 | import { DataCleanRoomSources } from './sources';
21 | // import { DataCleanRoomInputs } from './inputs';
22 | import { DataCleanRoomJobs } from './jobs';
23 |
24 | export class DataCleanRoomSidebar extends SidePanel {
25 | constructor(options: DataCleanRoomSidebar.IOptions) {
26 | const { manager } = options;
27 | const translator = options.translator || nullTranslator;
28 | super({ translator });
29 |
30 | const jobsPanel = new DataCleanRoomJobs({ translator });
31 | const sourcesPanel = new DataCleanRoomSources({ manager, translator });
32 | this.addClass("jp-manatee-sidebar-view")
33 |
34 | this.title.icon = trustedIcon;
35 | this.id = "jp-DCRSource-sidebar"
36 | this.addWidget(jobsPanel);
37 | this.addWidget(sourcesPanel);
38 | }
39 | }
40 |
41 | export namespace DataCleanRoomSidebar {
42 | export interface IOptions {
43 | manager: IDocumentManager;
44 | translator?: ITranslator;
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/src/sources.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2024 TikTok Pte. Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | import { Contents } from '@jupyterlab/services';
18 | import { IDocumentManager } from '@jupyterlab/docmanager';
19 | import { PanelWithToolbar, ToolbarButton, fileUploadIcon, } from '@jupyterlab/ui-components';
20 | import { filter } from '@lumino/algorithm';
21 | import { ITranslator, nullTranslator } from '@jupyterlab/translation';
22 | import { FileBrowser, FilterFileBrowserModel } from '@jupyterlab/filebrowser';
23 | import { showDialog, Dialog } from '@jupyterlab/apputils';
24 | import { ServerConnection } from '@jupyterlab/services';
25 |
26 | /*
27 | This class overrides items() to make the filebrowser list only ipynb files.
28 | We're doing this soley for the demo purpose, and the actual product may have additional files
29 | (e.g., local python modules)
30 | */
31 | class NotebookOnlyFilterFileBrowserModel extends FilterFileBrowserModel {
32 | override items(): IterableIterator {
33 | return filter(super.items(), value => {
34 | if (value.type === 'notebook') {
35 | return true;
36 | } else {
37 | return false;
38 | }
39 | });
40 | }
41 | }
42 |
43 | export class DataCleanRoomSources extends PanelWithToolbar {
44 | constructor(options: DataCleanRoomSources.IOptions) {
45 | super();
46 | const { manager } = options;
47 | this._manager = manager;
48 | const trans = (options.translator ?? nullTranslator).load('jupyterlab');
49 | this.title.label = trans.__('Sources');
50 |
51 | const fbModel = new NotebookOnlyFilterFileBrowserModel({
52 | manager: manager,
53 | });
54 | this._browser = new FileBrowser({
55 | id: 'jupyterlab_manatee:plugin:sources',
56 | model: fbModel
57 | });
58 | this.toolbar.addItem(
59 | 'submit',
60 | new ToolbarButton({
61 | icon: fileUploadIcon,
62 | onClick: () => this.sendSelectedFilesToAPI(),
63 | tooltip: trans.__('Submit Job to Data Clean Room')
64 | })
65 | );
66 |
67 | this.addWidget(this._browser);
68 | };
69 |
70 | async sendSelectedFilesToAPI() {
71 | for (const item of this._browser.selectedItems()) {
72 | const result = await showDialog({
73 | title: "Submitting a Job to Data Clean Room?",
74 | body: 'Path: ' + item.path,
75 | buttons: [Dialog.okButton(), Dialog.cancelButton()]
76 | });
77 |
78 | if (result.button.accept) {
79 | const file = await this._manager.services.contents.get(item.path);
80 | // Prepare data
81 | const data = JSON.stringify({
82 | path: item.path,
83 | filename: file.name
84 | });
85 |
86 | const settings = ServerConnection.makeSettings();
87 |
88 | console.log("Sending... %s", settings.baseUrl);
89 | ServerConnection.makeRequest(settings.baseUrl + "manatee/jobs", {
90 | body: data, method: "POST"
91 | }, settings).then(response => {
92 | if (response.status !== 200) {
93 | console.log("Error has occured!");
94 | }
95 | response.body?.getReader().read().then(({done, value}) => {
96 | if (done) {
97 | console.log("stream is closed");
98 | return;
99 | }
100 | let decoder = new TextDecoder('utf-8');
101 | console.log("value:", decoder.decode(value));
102 | });
103 | });
104 | }
105 | }
106 | }
107 |
108 | protected _manager : IDocumentManager;
109 | protected _browser : FileBrowser;
110 | }
111 |
112 | export namespace DataCleanRoomSources {
113 | export interface IOptions {
114 | manager: IDocumentManager;
115 | translator?: ITranslator;
116 | }
117 | }
118 |
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/style/base.css:
--------------------------------------------------------------------------------
1 | /*
2 | See the JupyterLab Developer Guide for useful CSS Patterns:
3 |
4 | https://jupyterlab.readthedocs.io/en/stable/developer/css.html
5 | */
6 |
7 | .jp-manatee-sidebar-view .lm-SplitPanel-child {
8 | overflow-y: auto;
9 | }
10 |
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/style/index.css:
--------------------------------------------------------------------------------
1 | @import url('base.css');
2 |
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/style/index.js:
--------------------------------------------------------------------------------
1 | import './base.css';
2 |
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "allowSyntheticDefaultImports": true,
4 | "composite": true,
5 | "declaration": true,
6 | "esModuleInterop": true,
7 | "incremental": true,
8 | "jsx": "react",
9 | "module": "esnext",
10 | "moduleResolution": "node",
11 | "noEmitOnError": true,
12 | "noImplicitAny": true,
13 | "noUnusedLocals": true,
14 | "preserveWatchOutput": true,
15 | "resolveJsonModule": true,
16 | "outDir": "lib",
17 | "rootDir": "src",
18 | "strict": true,
19 | "strictNullChecks": true,
20 | "target": "ES2018",
21 | "skipLibCheck": true,
22 | },
23 | "include": ["src/*"]
24 | }
25 |
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/tsconfig.test.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": "./tsconfig",
3 | "compilerOptions": {
4 | "types": ["jest"]
5 | }
6 | }
7 |
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/ui-tests/README.md:
--------------------------------------------------------------------------------
1 | # Integration Testing
2 |
3 | This folder contains the integration tests of the extension.
4 |
5 | They are defined using [Playwright](https://playwright.dev/docs/intro) test runner
6 | and [Galata](https://github.com/jupyterlab/jupyterlab/tree/main/galata) helper.
7 |
8 | The Playwright configuration is defined in [playwright.config.js](./playwright.config.js).
9 |
10 | The JupyterLab server configuration to use for the integration test is defined
11 | in [jupyter_server_test_config.py](./jupyter_server_test_config.py).
12 |
13 | The default configuration will produce video for failing tests and an HTML report.
14 |
15 | > There is a new experimental UI mode that you may fall in love with; see [that video](https://www.youtube.com/watch?v=jF0yA-JLQW0).
16 |
17 | ## Run the tests
18 |
19 | > All commands are assumed to be executed from the root directory
20 |
21 | To run the tests, you need to:
22 |
23 | 1. Compile the extension:
24 |
25 | ```sh
26 | jlpm install
27 | jlpm build:prod
28 | ```
29 |
30 | > Check the extension is installed in JupyterLab.
31 |
32 | 2. Install test dependencies (needed only once):
33 |
34 | ```sh
35 | cd ./ui-tests
36 | jlpm install
37 | jlpm playwright install
38 | cd ..
39 | ```
40 |
41 | 3. Execute the [Playwright](https://playwright.dev/docs/intro) tests:
42 |
43 | ```sh
44 | cd ./ui-tests
45 | jlpm playwright test
46 | ```
47 |
48 | Test results will be shown in the terminal. In case of any test failures, the test report
49 | will be opened in your browser at the end of the tests execution; see
50 | [Playwright documentation](https://playwright.dev/docs/test-reporters#html-reporter)
51 | for configuring that behavior.
52 |
53 | ## Update the tests snapshots
54 |
55 | > All commands are assumed to be executed from the root directory
56 |
57 | If you are comparing snapshots to validate your tests, you may need to update
58 | the reference snapshots stored in the repository. To do that, you need to:
59 |
60 | 1. Compile the extension:
61 |
62 | ```sh
63 | jlpm install
64 | jlpm build:prod
65 | ```
66 |
67 | > Check the extension is installed in JupyterLab.
68 |
69 | 2. Install test dependencies (needed only once):
70 |
71 | ```sh
72 | cd ./ui-tests
73 | jlpm install
74 | jlpm playwright install
75 | cd ..
76 | ```
77 |
78 | 3. Execute the [Playwright](https://playwright.dev/docs/intro) command:
79 |
80 | ```sh
81 | cd ./ui-tests
82 | jlpm playwright test -u
83 | ```
84 |
85 | > Some discrepancy may occurs between the snapshots generated on your computer and
86 | > the one generated on the CI. To ease updating the snapshots on a PR, you can
87 | > type `please update playwright snapshots` to trigger the update by a bot on the CI.
88 | > Once the bot has computed new snapshots, it will commit them to the PR branch.
89 |
90 | ## Create tests
91 |
92 | > All commands are assumed to be executed from the root directory
93 |
94 | To create tests, the easiest way is to use the code generator tool of playwright:
95 |
96 | 1. Compile the extension:
97 |
98 | ```sh
99 | jlpm install
100 | jlpm build:prod
101 | ```
102 |
103 | > Check the extension is installed in JupyterLab.
104 |
105 | 2. Install test dependencies (needed only once):
106 |
107 | ```sh
108 | cd ./ui-tests
109 | jlpm install
110 | jlpm playwright install
111 | cd ..
112 | ```
113 |
114 | 3. Start the server:
115 |
116 | ```sh
117 | cd ./ui-tests
118 | jlpm start
119 | ```
120 |
121 | 4. Execute the [Playwright code generator](https://playwright.dev/docs/codegen) in **another terminal**:
122 |
123 | ```sh
124 | cd ./ui-tests
125 | jlpm playwright codegen localhost:8888
126 | ```
127 |
128 | ## Debug tests
129 |
130 | > All commands are assumed to be executed from the root directory
131 |
132 | To debug tests, a good way is to use the inspector tool of playwright:
133 |
134 | 1. Compile the extension:
135 |
136 | ```sh
137 | jlpm install
138 | jlpm build:prod
139 | ```
140 |
141 | > Check the extension is installed in JupyterLab.
142 |
143 | 2. Install test dependencies (needed only once):
144 |
145 | ```sh
146 | cd ./ui-tests
147 | jlpm install
148 | jlpm playwright install
149 | cd ..
150 | ```
151 |
152 | 3. Execute the Playwright tests in [debug mode](https://playwright.dev/docs/debug):
153 |
154 | ```sh
155 | cd ./ui-tests
156 | jlpm playwright test --debug
157 | ```
158 |
159 | ## Upgrade Playwright and the browsers
160 |
161 | To update the web browser versions, you must update the package `@playwright/test`:
162 |
163 | ```sh
164 | cd ./ui-tests
165 | jlpm up "@playwright/test"
166 | jlpm playwright install
167 | ```
168 |
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/ui-tests/jupyter_server_test_config.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 TikTok Pte. Ltd.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Server configuration for integration tests.
16 |
17 | !! Never use this configuration in production because it
18 | opens the server to the world and provide access to JupyterLab
19 | JavaScript objects through the global window variable.
20 | """
21 | from jupyterlab.galata import configure_jupyter_server
22 |
23 | configure_jupyter_server(c)
24 |
25 | # Uncomment to set server log level to debug level
26 | # c.ServerApp.log_level = "DEBUG"
27 |
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/ui-tests/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "jupyterlab_manatee-ui-tests",
3 | "version": "1.0.0",
4 | "description": "JupyterLab jupyterlab-manatee Integration Tests",
5 | "private": true,
6 | "scripts": {
7 | "start": "jupyter lab --config jupyter_server_test_config.py",
8 | "test": "jlpm playwright test",
9 | "test:update": "jlpm playwright test --update-snapshots"
10 | },
11 | "devDependencies": {
12 | "@jupyterlab/galata": "^5.0.5",
13 | "@playwright/test": "^1.37.0"
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/ui-tests/playwright.config.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Configuration for Playwright using default from @jupyterlab/galata
3 | */
4 | const baseConfig = require('@jupyterlab/galata/lib/playwright-config');
5 |
6 | module.exports = {
7 | ...baseConfig,
8 | webServer: {
9 | command: 'jlpm start',
10 | url: 'http://localhost:8888/lab',
11 | timeout: 120 * 1000,
12 | reuseExistingServer: !process.env.CI
13 | }
14 | };
15 |
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/ui-tests/tests/jupyterlab_manatee.spec.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2024 TikTok Pte. Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | import { expect, test } from '@jupyterlab/galata';
18 |
19 | /**
20 | * Don't load JupyterLab webpage before running the tests.
21 | * This is required to ensure we capture all log messages.
22 | */
23 | test.use({ autoGoto: false });
24 |
25 | test('should emit an activation console message', async ({ page }) => {
26 | const logs: string[] = [];
27 |
28 | page.on('console', message => {
29 | logs.push(message.text());
30 | });
31 |
32 | await page.goto();
33 |
34 | expect(
35 | logs.filter(s => s === 'JupyterLab extension jupyterlab-manatee is activated!')
36 | ).toHaveLength(1);
37 | });
38 |
--------------------------------------------------------------------------------
/app/jupyterlab_manatee/ui-tests/yarn.lock:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manatee-project/manatee/07d4c7d5f6fb4f1d5e91252d30cc1f133467abdc/app/jupyterlab_manatee/ui-tests/yarn.lock
--------------------------------------------------------------------------------
/app/reconciler/BUILD.bazel:
--------------------------------------------------------------------------------
1 | load("@rules_go//go:def.bzl", "go_binary", "go_library", "go_test")
2 | load("@rules_oci//oci:defs.bzl", "oci_image", "oci_load")
3 | load("@rules_pkg//pkg:tar.bzl", "pkg_tar")
4 |
5 | go_library(
6 | name = "reconciler_lib",
7 | srcs = [
8 | "main.go",
9 | "reconciler.go",
10 | ],
11 | importpath = "github.com/manatee-project/manatee/app/reconciler",
12 | visibility = ["//visibility:private"],
13 | deps = [
14 | "//app/api/biz/dal/db",
15 | "//app/api/biz/model/job",
16 | "//app/reconciler/imagebuilder",
17 | "//app/reconciler/registry",
18 | "//app/reconciler/tee_backend",
19 | "@com_github_cloudwego_hertz//pkg/common/hlog",
20 | ],
21 | )
22 |
23 | go_binary(
24 | name = "reconciler",
25 | embed = [":reconciler_lib"],
26 | goarch = "amd64",
27 | goos = "linux",
28 | visibility = ["//visibility:public"],
29 | )
30 |
31 | pkg_tar(
32 | name = "tar",
33 | srcs = [":reconciler"],
34 | )
35 |
36 | oci_image(
37 | name = "image",
38 | base = "@distroless_base_linux_amd64",
39 | entrypoint = ["/reconciler"],
40 | tars = [
41 | ":tar",
42 | ],
43 | visibility = ["//visibility:public"],
44 | )
45 |
46 | oci_load(
47 | name = "load_image",
48 | image = ":image",
49 | repo_tags = ["reconciler:latest"],
50 | visibility = ["//visibility:public"],
51 | )
52 |
53 | go_test(
54 | name = "reconciler_test",
55 | srcs = ["reconciler_test.go"],
56 | embed = [":reconciler_lib"],
57 | deps = [
58 | "//app/api/biz/dal/db",
59 | "//app/api/biz/model/job",
60 | "//app/reconciler/imagebuilder",
61 | "@com_github_cloudwego_hertz//pkg/common/test/assert",
62 | "@io_gorm_gorm//:gorm",
63 | ],
64 | )
65 |
--------------------------------------------------------------------------------
/app/reconciler/imagebuilder/BUILD.bazel:
--------------------------------------------------------------------------------
1 | load("@rules_go//go:def.bzl", "go_library", "go_test")
2 |
3 | go_library(
4 | name = "imagebuilder",
5 | srcs = ["kaniko.go"],
6 | importpath = "github.com/manatee-project/manatee/app/reconciler/imagebuilder",
7 | visibility = ["//visibility:public"],
8 | deps = [
9 | "//app/api/biz/dal/db",
10 | "@com_github_cloudwego_hertz//pkg/common/hlog",
11 | "@com_github_pkg_errors//:errors",
12 | "@io_k8s_api//batch/v1:batch",
13 | "@io_k8s_api//core/v1:core",
14 | "@io_k8s_apimachinery//pkg/api/resource",
15 | "@io_k8s_apimachinery//pkg/apis/meta/v1:meta",
16 | "@io_k8s_client_go//kubernetes",
17 | "@io_k8s_client_go//rest",
18 | ],
19 | )
20 |
21 | go_test(
22 | name = "imagebuilder_test",
23 | srcs = ["kaniko_test.go"],
24 | embed = [":imagebuilder"],
25 | )
26 |
--------------------------------------------------------------------------------
/app/reconciler/imagebuilder/kaniko_test.go:
--------------------------------------------------------------------------------
1 | package imagebuilder
2 |
3 | import (
4 | "bufio"
5 | "strings"
6 | "testing"
7 | )
8 |
9 | func TestGetImageAndDigestFromLog(t *testing.T) {
10 | // Sample log line with a typical URL and digest
11 | logLine := "INFO[0242] Pushed example-registry.com/namespace/repository/image@sha256:1253099ce7721d3879373d411fc7938aef80000154c9c0455c2229497ed59336\n"
12 | expectedImage := "example-registry.com/namespace/repository/image@sha256:1253099ce7721d3879373d411fc7938aef80000154c9c0455c2229497ed59336"
13 | expectedDigest := "1253099ce7721d3879373d411fc7938aef80000154c9c0455c2229497ed59336"
14 |
15 | // Simulate a reader with the log line
16 | reader := bufio.NewReader(strings.NewReader(logLine))
17 |
18 | // Instantiate the struct containing the function if needed
19 | b := KanikoImageBuilder{}
20 |
21 | // Call the function to test
22 | image, digest, err := b.getImageAndDigestFromLog(reader)
23 | if err != nil {
24 | t.Fatalf("Expected no error, but got %v", err)
25 | }
26 |
27 | // Verify the output matches the expected values
28 | if image != expectedImage {
29 | t.Errorf("Expected image %v, but got %v", expectedImage, image)
30 | }
31 | if digest != expectedDigest {
32 | t.Errorf("Expected digest %v, but got %v", expectedDigest, digest)
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/app/reconciler/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "context"
5 | "time"
6 |
7 | "github.com/cloudwego/hertz/pkg/common/hlog"
8 | "github.com/manatee-project/manatee/app/api/biz/dal/db"
9 | )
10 |
11 | func main() {
12 |
13 | ctx := context.Background()
14 |
15 | db.Init()
16 |
17 | reconciler := NewReconciler(ctx)
18 |
19 | for {
20 | hlog.Info("Reconciling...")
21 | reconciler.Reconcile(ctx)
22 |
23 | time.Sleep(10 * time.Second)
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/app/reconciler/registry/BUILD.bazel:
--------------------------------------------------------------------------------
1 | load("@rules_go//go:def.bzl", "go_library")
2 |
3 | go_library(
4 | name = "registry",
5 | srcs = ["registry.go"],
6 | importpath = "github.com/manatee-project/manatee/app/reconciler/registry",
7 | visibility = ["//visibility:public"],
8 | )
9 |
--------------------------------------------------------------------------------
/app/reconciler/registry/registry.go:
--------------------------------------------------------------------------------
1 | package registry
2 |
3 | import (
4 | "fmt"
5 | "os"
6 | )
7 |
8 | type Registry interface {
9 | Url() string
10 | BaseImage() string
11 | }
12 |
13 | type GoogleDockerRegistry struct {
14 | }
15 |
16 | func (g *GoogleDockerRegistry) Url() string {
17 | projectId := os.Getenv("PROJECT_ID")
18 | if projectId == "" {
19 | panic("PROJECT_ID environment variable is not present")
20 | }
21 | env := os.Getenv("ENV")
22 | if env == "" {
23 | panic("ENV environment variable is not present")
24 | }
25 |
26 | return fmt.Sprintf("us-docker.pkg.dev/%s/dcr-%s-user-images", projectId, env)
27 | }
28 |
29 | func (g *GoogleDockerRegistry) BaseImage() string {
30 | return fmt.Sprintf("%s/manatee-executor-base:latest", g.Url())
31 | }
32 |
33 | type MinikubeDockerRegistry struct {
34 | }
35 |
36 | func (m *MinikubeDockerRegistry) Url() string {
37 | return "registry.kube-system.svc.cluster.local"
38 | }
39 |
40 | func (m *MinikubeDockerRegistry) BaseImage() string {
41 | return fmt.Sprintf("%s/executor:latest", m.Url())
42 | }
43 |
44 | func GetRegistry() Registry {
45 | registryType := os.Getenv("REGISTRY_TYPE")
46 | if registryType == "" {
47 | registryType = "GCP"
48 | }
49 | var registry Registry
50 | if registryType == "GCP" {
51 | registry = &GoogleDockerRegistry{}
52 | } else if registryType == "MINIKUBE" {
53 | registry = &MinikubeDockerRegistry{}
54 | }
55 | return registry
56 | }
57 |
--------------------------------------------------------------------------------
/app/reconciler/tee_backend/BUILD.bazel:
--------------------------------------------------------------------------------
1 | load("@rules_go//go:def.bzl", "go_library")
2 |
3 | go_library(
4 | name = "tee_backend",
5 | srcs = [
6 | "confidential_space.go",
7 | "mock_teebackend.go",
8 | ],
9 | importpath = "github.com/manatee-project/manatee/app/reconciler/tee_backend",
10 | visibility = ["//visibility:public"],
11 | deps = [
12 | "@com_github_cloudwego_hertz//pkg/common/hlog",
13 | "@com_github_pkg_errors//:errors",
14 | "@com_google_cloud_go_compute//apiv1",
15 | "@com_google_cloud_go_compute//apiv1/computepb",
16 | "@io_k8s_api//batch/v1:batch",
17 | "@io_k8s_api//core/v1:core",
18 | "@io_k8s_apimachinery//pkg/apis/meta/v1:meta",
19 | "@io_k8s_client_go//kubernetes",
20 | "@io_k8s_client_go//rest",
21 | "@org_golang_google_protobuf//proto",
22 | ],
23 | )
24 |
--------------------------------------------------------------------------------
/app/reconciler/tee_backend/mock_teebackend.go:
--------------------------------------------------------------------------------
1 | package tee_backend
2 |
3 | import (
4 | "context"
5 | "fmt"
6 | "os"
7 | "strings"
8 |
9 | "github.com/cloudwego/hertz/pkg/common/hlog"
10 | "github.com/pkg/errors"
11 | batchv1 "k8s.io/api/batch/v1"
12 | corev1 "k8s.io/api/core/v1"
13 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
14 | "k8s.io/client-go/kubernetes"
15 | "k8s.io/client-go/rest"
16 | )
17 |
18 | type MockTeeBackend struct {
19 | ctx context.Context
20 | clientSet *kubernetes.Clientset
21 | namespace string
22 | }
23 |
24 | func NewMockTeeBackend(ctx context.Context) (*MockTeeBackend, error) {
25 | clusterConfig, err := rest.InClusterConfig()
26 | if err != nil {
27 | return nil, errors.Wrap(err, "failed to init cluster config")
28 | }
29 |
30 | clientSet, err := kubernetes.NewForConfig(clusterConfig)
31 | if err != nil {
32 | return nil, errors.Wrap(err, "failed to create client")
33 | }
34 |
35 | RunningNameSpaceByte, err := os.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/namespace")
36 | if err != nil {
37 | return nil, errors.Wrap(err, "failed to get namespace")
38 | }
39 | namespace := string(RunningNameSpaceByte)
40 |
41 | return &MockTeeBackend{
42 | ctx: ctx,
43 | clientSet: clientSet,
44 | namespace: namespace,
45 | }, nil
46 | }
47 |
48 | func (m *MockTeeBackend) LaunchInstance(instanceName string, image string, digest string, extraEnvs map[string]string) error {
49 | ttlSecondsAfterFinished := int32(3600 * 3)
50 | var envs []corev1.EnvVar
51 | for key, value := range extraEnvs {
52 | envs = append(envs, corev1.EnvVar{
53 | Name: key,
54 | Value: value,
55 | })
56 | }
57 | envs = append(envs, corev1.EnvVar{
58 | Name: "TEE_BACKEND",
59 | Value: os.Getenv("TEE_BACKEND"),
60 | },
61 | )
62 | mockTeeJob := &batchv1.Job{
63 | ObjectMeta: metav1.ObjectMeta{
64 | Name: instanceName,
65 | Namespace: m.namespace,
66 | },
67 | Spec: batchv1.JobSpec{
68 | TTLSecondsAfterFinished: &ttlSecondsAfterFinished,
69 | Template: corev1.PodTemplateSpec{
70 | Spec: corev1.PodSpec{
71 | ServiceAccountName: "dcr-k8s-pod-sa",
72 | Containers: []corev1.Container{
73 | {
74 | Name: "mock-tee",
75 | Image: convertImageToLocal(image),
76 | Env: envs,
77 | },
78 | },
79 | RestartPolicy: "Never",
80 | },
81 | },
82 | },
83 | }
84 | _, err := m.clientSet.BatchV1().Jobs(m.namespace).Create(m.ctx, mockTeeJob, metav1.CreateOptions{})
85 | if err != nil {
86 | return errors.Wrap(err, "failed to create kubernetes job")
87 | }
88 | return nil
89 | }
90 |
91 | func (m *MockTeeBackend) CleanUpInstance(instanceName string) error {
92 | deletePolicy := metav1.DeletePropagationForeground
93 | if err := m.clientSet.BatchV1().Jobs(m.namespace).Delete(m.ctx, instanceName, metav1.DeleteOptions{
94 | PropagationPolicy: &deletePolicy,
95 | }); err != nil {
96 | return errors.Wrap(err, "failed to delete job")
97 | }
98 | return nil
99 | }
100 |
101 | func (m *MockTeeBackend) GetInstanceStatus(instanceName string) (string, error) {
102 | teeJob, err := m.clientSet.BatchV1().Jobs(m.namespace).Get(m.ctx, instanceName, metav1.GetOptions{})
103 | if err != nil {
104 | hlog.Errorf("[MockTeeBackend]failed to get mock tee job: %v", err)
105 | return "", errors.Wrap(err, "failed to get job")
106 | }
107 | hlog.Infof("[MockTeeBackend]mock tee name: %v, status: %v", teeJob.Name, teeJob.Status)
108 | if teeJob.Status.Active > 0 {
109 | return "RUNNING", nil
110 | } else {
111 | return "TERMINATED", nil
112 | }
113 | }
114 |
115 | func convertImageToLocal(imageName string) string {
116 | index := strings.Index(imageName, "/")
117 | if index == -1 {
118 | hlog.Errorf("[MockTeeBackend]failed to find / in image name")
119 | return ""
120 | }
121 | return fmt.Sprintf("localhost:5000/%s", imageName[index+1:])
122 | }
123 |
--------------------------------------------------------------------------------
/deployment/deploy.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2024 TikTok Pte. Ltd.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | set -e
16 |
17 | debug=false
18 |
19 | for arg in "$@"
20 | do
21 | case $arg in
22 | --namespace=*)
23 | # If we find an argument --namespace=something, split the string into a name/value array.
24 | IFS='=' read -ra NAMESPACE <<< "$arg"
25 | # Assign the second element of the array (the value of the --namespace argument) to our variable.
26 | namespace="${NAMESPACE[1]}"
27 | ;;
28 | --debug=*)
29 | IFS='=' read -ra DEBUG <<< "$arg"
30 | debug="${DEBUG[1]}"
31 | ;;
32 | esac
33 | done
34 |
35 |
36 | if [ -z "$namespace" ]; then
37 | echo "Error: the namespace parameter is required, run the script again like ./apply.sh --namespace="
38 | exit 1
39 | fi
40 |
41 | deploy_service() {
42 | app=$1
43 | pushd $app
44 | ./deploy.sh $2 $3
45 | popd
46 | }
47 |
48 | deploy_service manatee $namespace $debug
49 | deploy_service jupyterhub $namespace
50 |
--------------------------------------------------------------------------------
/deployment/jupyterhub/config.yaml:
--------------------------------------------------------------------------------
1 | # Copyright 2024 TikTok Pte. Ltd.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | singleuser:
16 | image:
17 | name: us-docker.pkg.dev/${project_id}/${artifact_repo_docker}/datascience-notebook-with-dcr
18 | tag: ${tag}
19 | pullPolicy: Always
20 | cmd: null
21 | nodeSelector: { iam.gke.io/gke-metadata-server-enabled: "true"}
22 | serviceAccountName: $single_user_pod_sa
23 | extraEnv:
24 | NOTEBOOK_ARGS: '--NotebookApp.terminals_enabled=False --NotebookApp.allow_root=False'
25 | JUPYTERHUB_SINGLEUSER_APP: "jupyter_server.serverapp.ServerApp"
26 | DATA_CLEAN_ROOM_HOST: "$api"
27 | DEPLOYMENT_ENV: "$env"
28 | PROJECT_ID: "$project_id"
29 | KEY_LOCALTION: "$region"
30 | networkPolicy:
31 | egressAllowRules:
32 | cloudMetadataServer: true
33 | cloudMetadata:
34 | blockWithIptables: false
--------------------------------------------------------------------------------
/deployment/jupyterhub/deploy.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2024 TikTok Pte. Ltd.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | VAR_FILE="../../env.bzl"
17 | if [ ! -f "$VAR_FILE" ]; then
18 | echo "Error: Variables file does not exist."
19 | exit 1
20 | fi
21 |
22 | VAR_FILE=$(realpath $VAR_FILE)
23 | source $VAR_FILE
24 |
25 | if [ -z "$1" ]
26 | then
27 | echo "Error: No namespace argument supplied."
28 | exit 1
29 | fi
30 | namespace=$1
31 |
32 | tag="latest"
33 | helm_name="jupyterhub-helm-$namespace"
34 | api="http://manatee.$namespace.svc.cluster.local"
35 |
36 | service_account="jupyter-k8s-pod-sa"
37 | docker_repo="dcr-${env}-${namespace}-images"
38 | docker_reference="us-docker.pkg.dev/${project_id}/${docker_repo}/manatee-jupyterlab-singleuser"
39 |
40 | helm repo add jupyterhub https://hub.jupyter.org/helm-chart/
41 | helm repo update
42 |
43 | helm upgrade --cleanup-on-fail \
44 | --set singleuser.image.name=${docker_reference} \
45 | --set singleuser.image.tag=${tag} \
46 | --set singleuser.serviceAccountName=${service_account} \
47 | --set singleuser.extraEnv.DATA_CLEAN_ROOM_HOST=${api} \
48 | --set singleuser.extraEnv.EXECUTION_STAGE='"1"' \
49 | --set singleuser.extraEnv.MANATEE_EXTRA_ENV_EXECUTION_STAGE='"2"' \
50 | --set singleuser.extraEnv.DEPLOYMENT_ENV=${env} \
51 | --set singleuser.extraEnv.PROJECT_ID=${project_id} \
52 | --set singleuser.extraEnv.KEY_LOCALTION=${region} \
53 | --set singleuser.networkPolicy.enabled=false \
54 | --set singleuser.storage.capacity=20Gi \
55 | --install $helm_name jupyterhub/jupyterhub \
56 | --namespace ${namespace} \
57 | --version=3.0.3 \
58 | --values config.yaml
59 |
60 | echo "Deployment Completed."
61 | echo "Try 'kubectl --namespace=$namespace get service proxy-public' to obtain external IP"
62 |
--------------------------------------------------------------------------------
/deployment/manatee/.helmignore:
--------------------------------------------------------------------------------
1 | # Patterns to ignore when building packages.
2 | # This supports shell glob matching, relative path matching, and
3 | # negation (prefixed with !). Only one pattern per line.
4 | .DS_Store
5 | # Common VCS dirs
6 | .git/
7 | .gitignore
8 | .bzr/
9 | .bzrignore
10 | .hg/
11 | .hgignore
12 | .svn/
13 | # Common backup files
14 | *.swp
15 | *.bak
16 | *.tmp
17 | *.orig
18 | *~
19 | # Various IDEs
20 | .project
21 | .idea/
22 | *.tmproj
23 | .vscode/
24 |
--------------------------------------------------------------------------------
/deployment/manatee/Chart.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v2
2 | name: manatee-chart
3 | description: A Helm chart for Kubernetes
4 |
5 | # A chart can be either an 'application' or a 'library' chart.
6 | #
7 | # Application charts are a collection of templates that can be packaged into versioned archives
8 | # to be deployed.
9 | #
10 | # Library charts provide useful utilities or functions for the chart developer. They're included as
11 | # a dependency of application charts to inject those utilities and functions into the rendering
12 | # pipeline. Library charts do not define any templates and therefore cannot be deployed.
13 | type: application
14 |
15 | # This is the chart version. This version number should be incremented each time you make changes
16 | # to the chart and its templates, including the app version.
17 | # Versions are expected to follow Semantic Versioning (https://semver.org/)
18 | version: 0.1.0
19 |
20 | # This is the version number of the application being deployed. This version number should be
21 | # incremented each time you make changes to the application. Versions are not expected to
22 | # follow Semantic Versioning. They should reflect the version the application is using.
23 | # It is recommended to use it with quotes.
24 | appVersion: "1.16.0"
25 |
--------------------------------------------------------------------------------
/deployment/manatee/config.yaml:
--------------------------------------------------------------------------------
1 | cloudSql:
2 | connection_name: $connection_name
3 | serviceAccount:
4 | create: false
5 | name: $service_account
6 | apiImage:
7 | repository: $api_reference
8 | tag: $tag
9 | monitorImage:
10 | repository: $monitor_reference
11 | tag: $tag
12 | nodeSelector: { iam.gke.io/gke-metadata-server-enabled: "true"}
13 | mysql:
14 | host: localhost
15 | port: 9910
16 | namespace: ""
--------------------------------------------------------------------------------
/deployment/manatee/deploy.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2024 TikTok Pte. Ltd.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | VAR_FILE="../../env.bzl"
17 | if [ ! -f "$VAR_FILE" ]; then
18 | echo "Error: Variables file does not exist."
19 | exit 1
20 | fi
21 |
22 | VAR_FILE=$(realpath $VAR_FILE)
23 | source $VAR_FILE
24 |
25 | if [ -z "$1" ]
26 | then
27 | echo "Error: No namespace argument supplied."
28 | exit 1
29 | fi
30 | namespace=$1
31 | debug=$2
32 | tag="latest"
33 | helm_name="manatee-helm"
34 |
35 | connection_name="${project_id}:${region}:dcr-${env}-db-instance"
36 | service_account="dcr-k8s-pod-sa"
37 | docker_repo="dcr-${env}-${namespace}-images"
38 | api_docker_reference="us-docker.pkg.dev/${project_id}/${docker_repo}/manatee-api"
39 | monitor_docker_reference="us-docker.pkg.dev/${project_id}/${docker_repo}/manatee-reconciler"
40 |
41 | helm upgrade --cleanup-on-fail \
42 | --set apiImage.repository=${api_docker_reference} \
43 | --set apiImage.tag=${tag} \
44 | --set monitorImage.repository=${monitor_docker_reference} \
45 | --set monitorImage.tag=${tag} \
46 | --set serviceAccount.name=${service_account} \
47 | --set cloudSql.connection_name=${connection_name} \
48 | --set namespace=${namespace} \
49 | --set config.env=${env} \
50 | --set config.projectId=${project_id} \
51 | --set config.zone=${zone} \
52 | --set config.region=${region} \
53 | --set config.debug=${debug} \
54 | --install $helm_name ./ \
55 | --namespace $namespace \
56 | --values config.yaml
57 |
--------------------------------------------------------------------------------
/deployment/manatee/templates/NOTES.txt:
--------------------------------------------------------------------------------
1 | 1. Get the application URL by running these commands:
2 | {{- if .Values.ingress.enabled }}
3 | {{- range $host := .Values.ingress.hosts }}
4 | {{- range .paths }}
5 | http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
6 | {{- end }}
7 | {{- end }}
8 | {{- else if contains "NodePort" .Values.service.type }}
9 | export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "manatee-chart.fullname" . }})
10 | export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
11 | echo http://$NODE_IP:$NODE_PORT
12 | {{- else if contains "LoadBalancer" .Values.service.type }}
13 | NOTE: It may take a few minutes for the LoadBalancer IP to be available.
14 | You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "manatee-chart.fullname" . }}'
15 | export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "manatee-chart.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
16 | echo http://$SERVICE_IP:{{ .Values.service.port }}
17 | {{- else if contains "ClusterIP" .Values.service.type }}
18 | export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "manatee-chart.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
19 | export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
20 | echo "Visit http://127.0.0.1:8080 to use your application"
21 | kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
22 | {{- end }}
23 |
--------------------------------------------------------------------------------
/deployment/manatee/templates/_helpers.tpl:
--------------------------------------------------------------------------------
1 | {{/*
2 | Expand the name of the chart.
3 | */}}
4 | {{- define "manatee-chart.name" -}}
5 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
6 | {{- end }}
7 |
8 | {{/*
9 | Create a default fully qualified app name.
10 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
11 | If release name contains chart name it will be used as a full name.
12 | */}}
13 | {{- define "manatee-chart.fullname" -}}
14 | {{- if .Values.fullnameOverride }}
15 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
16 | {{- else }}
17 | {{- $name := default .Chart.Name .Values.nameOverride }}
18 | {{- if contains $name .Release.Name }}
19 | {{- .Release.Name | trunc 63 | trimSuffix "-" }}
20 | {{- else }}
21 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
22 | {{- end }}
23 | {{- end }}
24 | {{- end }}
25 |
26 | {{/*
27 | Create chart name and version as used by the chart label.
28 | */}}
29 | {{- define "manatee-chart.chart" -}}
30 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
31 | {{- end }}
32 |
33 | {{/*
34 | Common labels
35 | */}}
36 | {{- define "manatee-chart.labels" -}}
37 | helm.sh/chart: {{ include "manatee-chart.chart" . }}
38 | {{ include "manatee-chart.selectorLabels" . }}
39 | {{- if .Chart.AppVersion }}
40 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
41 | {{- end }}
42 | app.kubernetes.io/managed-by: {{ .Release.Service }}
43 | {{- end }}
44 |
45 | {{/*
46 | Selector labels
47 | */}}
48 | {{- define "manatee-chart.selectorLabels" -}}
49 | app.kubernetes.io/name: {{ include "manatee-chart.name" . }}
50 | app.kubernetes.io/instance: {{ .Release.Name }}
51 | {{- end }}
52 |
53 | {{/*
54 | Create the name of the service account to use
55 | */}}
56 | {{- define "manatee-chart.serviceAccountName" -}}
57 | {{- if .Values.serviceAccount.create }}
58 | {{- default (include "manatee-chart.fullname" .) .Values.serviceAccount.name }}
59 | {{- else }}
60 | {{- default "default" .Values.serviceAccount.name }}
61 | {{- end }}
62 | {{- end }}
63 |
--------------------------------------------------------------------------------
/deployment/manatee/templates/configmap.yaml:
--------------------------------------------------------------------------------
1 | # configmap.yaml
2 | apiVersion: v1
3 | kind: ConfigMap
4 | metadata:
5 | name: manatee-configmap
6 | data:
7 | env: {{ .Values.config.env | quote }}
8 | projectId: {{ .Values.config.projectId | quote }}
9 | zone: {{ .Values.config.zone | quote }}
10 | region: {{ .Values.config.region | quote }}
11 | debug: {{ .Values.config.debug | quote }}
12 | teeBackend: {{.Values.config.teeBackend | quote }}
13 | registryType: {{.Values.config.registryType | quote }}
14 | storageType: {{.Values.config.storageType | quote }}
15 | minioEndpoint: {{ .Values.config.minioEndpoint | quote }}
16 | minioAccessKey: {{ .Values.config.minioAccessKey | quote }}
17 | minioSecretKey: {{ .Values.config.minioSecretKey | quote }}
18 | minioRegion: {{ .Values.config.minioRegion | quote }}
--------------------------------------------------------------------------------
/deployment/manatee/templates/ingress.yaml:
--------------------------------------------------------------------------------
1 | {{- if .Values.ingress.enabled -}}
2 | {{- $fullName := include "manatee-chart.fullname" . -}}
3 | {{- $svcPort := .Values.service.port -}}
4 | {{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
5 | {{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }}
6 | {{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}}
7 | {{- end }}
8 | {{- end }}
9 | {{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
10 | apiVersion: networking.k8s.io/v1
11 | {{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
12 | apiVersion: networking.k8s.io/v1beta1
13 | {{- else -}}
14 | apiVersion: extensions/v1beta1
15 | {{- end }}
16 | kind: Ingress
17 | metadata:
18 | name: {{ $fullName }}
19 | labels:
20 | {{- include "manatee-chart.labels" . | nindent 4 }}
21 | {{- with .Values.ingress.annotations }}
22 | annotations:
23 | {{- toYaml . | nindent 4 }}
24 | {{- end }}
25 | spec:
26 | {{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
27 | ingressClassName: {{ .Values.ingress.className }}
28 | {{- end }}
29 | {{- if .Values.ingress.tls }}
30 | tls:
31 | {{- range .Values.ingress.tls }}
32 | - hosts:
33 | {{- range .hosts }}
34 | - {{ . | quote }}
35 | {{- end }}
36 | secretName: {{ .secretName }}
37 | {{- end }}
38 | {{- end }}
39 | rules:
40 | {{- range .Values.ingress.hosts }}
41 | - host: {{ .host | quote }}
42 | http:
43 | paths:
44 | {{- range .paths }}
45 | - path: {{ .path }}
46 | {{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
47 | pathType: {{ .pathType }}
48 | {{- end }}
49 | backend:
50 | {{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
51 | service:
52 | name: {{ $fullName }}
53 | port:
54 | number: {{ $svcPort }}
55 | {{- else }}
56 | serviceName: {{ $fullName }}
57 | servicePort: {{ $svcPort }}
58 | {{- end }}
59 | {{- end }}
60 | {{- end }}
61 | {{- end }}
62 |
--------------------------------------------------------------------------------
/deployment/manatee/templates/service.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Service
3 | metadata:
4 | name: {{ include "manatee-chart.fullname" . }}
5 | labels:
6 | {{- include "manatee-chart.labels" . | nindent 4 }}
7 | spec:
8 | type: {{ .Values.service.type }}
9 | ports:
10 | - port: {{ .Values.service.port }}
11 | targetPort: http
12 | protocol: TCP
13 | selector:
14 | {{- include "manatee-chart.selectorLabels" . | nindent 4 }}
15 |
--------------------------------------------------------------------------------
/deployment/manatee/templates/tests/test-connection.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Pod
3 | metadata:
4 | name: "{{ include "manatee-chart.fullname" . }}-test-connection"
5 | labels:
6 | {{- include "manatee-chart.labels" . | nindent 4 }}
7 | annotations:
8 | "helm.sh/hook": test
9 | spec:
10 | containers:
11 | - name: wget
12 | image: busybox
13 | command: ['wget']
14 | args: ['{{ include "manatee-chart.fullname" . }}:{{ .Values.service.port }}']
15 | restartPolicy: Never
16 |
--------------------------------------------------------------------------------
/deployment/manatee/values.yaml:
--------------------------------------------------------------------------------
1 | # Default values for manatee-chart.
2 | # This is a YAML-formatted file.
3 | # Declare variables to be passed into your templates.
4 |
5 | replicaCount: 1
6 |
7 | apiImage:
8 | repository: ""
9 | pullPolicy: Always
10 | # Overrides the image tag whose default is the chart appVersion.
11 | tag: ""
12 |
13 | monitorImage:
14 | repository: ""
15 | pullPolicy: Always
16 | # Overrides the image tag whose default is the chart appVersion.
17 | tag: ""
18 |
19 |
20 | imagePullSecrets: []
21 | nameOverride: "manatee"
22 | fullnameOverride: "manatee"
23 |
24 | serviceAccount:
25 | # Specifies whether a service account should be created
26 | create: false
27 | # Automatically mount a ServiceAccount's API credentials?
28 | automount: true
29 | # Annotations to add to the service account
30 | annotations: {}
31 | # The name of the service account to use.
32 | # If not set and create is true, a name is generated using the fullname template
33 | name: ""
34 |
35 | podAnnotations: {}
36 | podLabels: {}
37 |
38 | podSecurityContext: {}
39 | # fsGroup: 2000
40 |
41 | securityContext: {}
42 | # capabilities:
43 | # drop:
44 | # - ALL
45 | # readOnlyRootFilesystem: true
46 | # runAsNonRoot: true
47 | # runAsUser: 1000
48 |
49 | service:
50 | type: ClusterIP
51 | port: 80
52 |
53 | api:
54 | port: 8080
55 |
56 | ingress:
57 | enabled: false
58 | className: ""
59 | annotations: {}
60 | # kubernetes.io/ingress.class: nginx
61 | # kubernetes.io/tls-acme: "true"
62 | hosts:
63 | - host: chart-example.local
64 | paths:
65 | - path: /
66 | pathType: ImplementationSpecific
67 | tls: []
68 | # - secretName: chart-example-tls
69 | # hosts:
70 | # - chart-example.local
71 |
72 | resources: {}
73 | # We usually recommend not to specify default resources and to leave this as a conscious
74 | # choice for the user. This also increases chances charts run on environments with little
75 | # resources, such as Minikube. If you do want to specify resources, uncomment the following
76 | # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
77 | # limits:
78 | # cpu: 100m
79 | # memory: 128Mi
80 | # requests:
81 | # cpu: 100m
82 | # memory: 128Mi
83 |
84 | autoscaling:
85 | enabled: false
86 | minReplicas: 1
87 | maxReplicas: 100
88 | targetCPUUtilizationPercentage: 80
89 | # targetMemoryUtilizationPercentage: 80
90 |
91 | # Additional volumes on the output Deployment definition.
92 | volumes: []
93 | # - name: foo
94 | # secret:
95 | # secretName: mysecret
96 | # optional: false
97 |
98 | # Additional volumeMounts on the output Deployment definition.
99 | volumeMounts: []
100 | # - name: foo
101 | # mountPath: "/etc/foo"
102 | # readOnly: true
103 |
104 | nodeSelector: {}
105 |
106 | tolerations: []
107 |
108 | affinity: {}
109 |
110 | cloudSql:
111 | connection_name: ""
112 |
113 | mysql:
114 | host: "localhost"
115 | port: "9910"
116 |
117 | useMinikube: false
118 |
119 | # every minute
120 | schedule: "*/1 * * * *"
121 |
122 | namespace: ""
123 |
124 | config:
125 | env: ""
126 | projectId: ""
127 | zone: ""
128 | region: ""
129 | debug: "false"
130 | teeBackend: "GCP"
131 | storageType: "GCP"
132 | registryType: "GCP"
133 | minioEndpoint: ""
134 | minioAccessKey: ""
135 | minioSecretKey: ""
136 | minioRegion: "us"
137 |
--------------------------------------------------------------------------------
/deployment/minikube/deploy.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2024 TikTok Pte. Ltd.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | env="minikube"
17 | namespace="manatee"
18 | project_id="mock-gcp-project-id"
19 | region="us-west2"
20 | zone="us-west2-a"
21 | helm_name="manatee-helm"
22 | eval $(minikube docker-env)
23 | kubectl apply -f mysql-deployment.yaml -n $namespace
24 | kubectl apply -f mysql-service.yaml -n $namespace
25 | kubectl apply -f minio-dev.yaml
26 | # deploy dcr api
27 | helm upgrade --cleanup-on-fail \
28 | --set apiImage.repository=docker.io/library/api \
29 | --set apiImage.tag=latest \
30 | --set apiImage.pullPolicy=Never \
31 | --set monitorImage.repository=docker.io/library/reconciler \
32 | --set monitorImage.tag=latest \
33 | --set monitorImage.pullPolicy=Never \
34 | --set serviceAccount.name=dcr-k8s-pod-sa \
35 | --set serviceAccount.create=false \
36 | --set cloudSql.connection_name="" \
37 | --set namespace=${namespace} \
38 | --set config.env=${env} \
39 | --set config.projectId=${project_id} \
40 | --set config.zone=${zone} \
41 | --set config.region=${region} \
42 | --set config.debug=true \
43 | --set config.teeBackend=MOCK \
44 | --set config.registryType=MINIKUBE \
45 | --set config.storageType=MINIO \
46 | --set config.minioSecretKey=minioadmin \
47 | --set config.minioAccessKey=minioadmin \
48 | --set config.minioEndpoint=minio-service:9000 \
49 | --set mysql.host=mysql-service \
50 | --set mysql.port=3306 \
51 | --set useMinikube=true \
52 | --install $helm_name ../manatee \
53 | --namespace $namespace
54 |
55 | helm repo add jupyterhub https://hub.jupyter.org/helm-chart/
56 | helm repo update
57 |
58 | service_account="jupyter-k8s-pod-sa"
59 | helm_name="jupyterhub-helm"
60 | api="http://manatee.$namespace.svc.cluster.local"
61 |
62 | helm upgrade --cleanup-on-fail \
63 | --set singleuser.image.name=docker.io/library/jupyterlab_manatee \
64 | --set singleuser.image.tag=latest \
65 | --set singleuser.image.pullPolicy=Never \
66 | --set singleuser.serviceAccountName=${service_account} \
67 | --set singleuser.extraEnv.DATA_CLEAN_ROOM_HOST=${api} \
68 | --set singleuser.extraEnv.DEPLOYMENT_ENV=${env} \
69 | --set singleuser.extraEnv.PROJECT_ID=${project_id} \
70 | --set singleuser.extraEnv.KEY_LOCALTION=${region} \
71 | --set singleuser.networkPolicy.enabled=false \
72 | --set singleuser.nodeSelector=null \
73 | --set prePuller.continuous.enabled=false \
74 | --set prePuller.hook.enabled=false \
75 | --install $helm_name jupyterhub/jupyterhub \
76 | --namespace ${namespace} \
77 | --version=3.0.3 \
78 | --values ../jupyterhub/config.yaml
79 |
80 | echo "Deployment Completed."
81 | echo "Try 'kubectl --namespace=$namespace get service proxy-public' to obtain external IP"
82 |
--------------------------------------------------------------------------------
/deployment/minikube/minio-dev.yaml:
--------------------------------------------------------------------------------
1 | # Deploys a new MinIO Pod into the metadata.namespace Kubernetes namespace
2 | #
3 | # The `spec.containers[0].args` contains the command run on the pod
4 | # The `/data` directory corresponds to the `spec.containers[0].volumeMounts[0].mountPath`
5 | # That mount path corresponds to a Kubernetes HostPath which binds `/data` to a local drive or volume on the worker node where the pod runs
6 | #
7 | apiVersion: v1
8 | kind: Pod
9 | metadata:
10 | labels:
11 | app: minio
12 | name: minio
13 | namespace: manatee # Change this value to match the namespace metadata.name
14 | spec:
15 | containers:
16 | - name: minio
17 | image: quay.io/minio/minio:latest
18 | command:
19 | - /bin/bash
20 | - -c
21 | args:
22 | - minio server /data --console-address :9090
23 | volumeMounts:
24 | - mountPath: /data
25 | name: localvolume # Corresponds to the `spec.volumes` Persistent Volume
26 | volumes:
27 | - name: localvolume
28 | hostPath: # MinIO generally recommends using locally-attached volumes
29 | path: /mnt/disk1/data # Specify a path to a local drive or volume on the Kubernetes worker node
30 | type: DirectoryOrCreate # The path to the last directory must exist
31 | ---
32 | # Deploys a new MinIO Service into the metadata.namespace Kubernetes namespace
33 | apiVersion: v1
34 | kind: Service
35 | metadata:
36 | name: minio-service
37 | namespace: manatee
38 | spec:
39 | type: NodePort
40 | ports:
41 | - port: 9000
42 | targetPort: 9000
43 | selector:
44 | app: minio
--------------------------------------------------------------------------------
/deployment/minikube/mysql-deployment.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: mysql-deployment
5 | spec:
6 | replicas: 1
7 | selector:
8 | matchLabels:
9 | app: mysql-server
10 | template:
11 | metadata:
12 | labels:
13 | app: mysql-server
14 | spec:
15 | containers:
16 | - name: mysql
17 | image: mysql:8.0
18 | env:
19 | - name: MYSQL_DATABASE
20 | valueFrom:
21 | secretKeyRef:
22 | name: mysql-secret
23 | key: mysql-database
24 | - name: MYSQL_USER
25 | valueFrom:
26 | secretKeyRef:
27 | name: mysql-secret
28 | key: mysql-username
29 | - name: MYSQL_PASSWORD
30 | valueFrom:
31 | secretKeyRef:
32 | name: mysql-secret
33 | key: mysql-password
34 | - name: MYSQL_RANDOM_ROOT_PASSWORD
35 | value: "yes"
36 | ports:
37 | - containerPort: 3306
--------------------------------------------------------------------------------
/deployment/minikube/mysql-service.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Service
3 | metadata:
4 | name: mysql-service
5 | spec:
6 | type: NodePort
7 | ports:
8 | - port: 3306
9 | targetPort: 3306
10 | selector:
11 | app: mysql-server
--------------------------------------------------------------------------------
/docs/assets/img/arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manatee-project/manatee/07d4c7d5f6fb4f1d5e91252d30cc1f133467abdc/docs/assets/img/arch.png
--------------------------------------------------------------------------------
/docs/assets/img/jobs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manatee-project/manatee/07d4c7d5f6fb4f1d5e91252d30cc1f133467abdc/docs/assets/img/jobs.png
--------------------------------------------------------------------------------
/docs/assets/img/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manatee-project/manatee/07d4c7d5f6fb4f1d5e91252d30cc1f133467abdc/docs/assets/img/logo.png
--------------------------------------------------------------------------------
/docs/assets/img/manatee-architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manatee-project/manatee/07d4c7d5f6fb4f1d5e91252d30cc1f133467abdc/docs/assets/img/manatee-architecture.png
--------------------------------------------------------------------------------
/docs/assets/img/manatee-white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manatee-project/manatee/07d4c7d5f6fb4f1d5e91252d30cc1f133467abdc/docs/assets/img/manatee-white.png
--------------------------------------------------------------------------------
/docs/assets/img/manatee.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manatee-project/manatee/07d4c7d5f6fb4f1d5e91252d30cc1f133467abdc/docs/assets/img/manatee.png
--------------------------------------------------------------------------------
/docs/assets/img/plugin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manatee-project/manatee/07d4c7d5f6fb4f1d5e91252d30cc1f133467abdc/docs/assets/img/plugin.png
--------------------------------------------------------------------------------
/docs/assets/img/stage-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manatee-project/manatee/07d4c7d5f6fb4f1d5e91252d30cc1f133467abdc/docs/assets/img/stage-1.png
--------------------------------------------------------------------------------
/docs/assets/img/two-stage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manatee-project/manatee/07d4c7d5f6fb4f1d5e91252d30cc1f133467abdc/docs/assets/img/two-stage.png
--------------------------------------------------------------------------------
/docs/assets/img/unzip.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manatee-project/manatee/07d4c7d5f6fb4f1d5e91252d30cc1f133467abdc/docs/assets/img/unzip.png
--------------------------------------------------------------------------------
/docs/blog/index.md:
--------------------------------------------------------------------------------
1 | # Blog
2 |
3 |
--------------------------------------------------------------------------------
/docs/blog/posts/2025-01-community-release.md:
--------------------------------------------------------------------------------
1 | ---
2 | date: 2025-01-07
3 | ---
4 |
5 | # First Community Release of ManaTEE
6 |
7 | We are thrilled to announce the first public community release of ManaTEE, an open-source framework for private data analytics.
8 | ManaTEE was introduced as a [key privacy solution](https://developers.tiktok.com/blog/privacygo-data-clean-room-open-source) for private data collaboration at TikTok, which built [one of its products](https://developers.tiktok.com/doc/vce-getting-started) on top of this solution. The team decided to improve and consolidate the solution by open-sourcing it.
9 | To further its momentum as an open-source private data analytics framework, TikTok has [donated the project](https://developers.tiktok.com/blog/tiktok-open-source-project-donation-manatee) to the Confidential Computing Consortium under the Linux Foundation.
10 | After months of development, testing, and refinement, we’re excited to share this project with the broader community.
11 |
12 | ## What is New?
13 |
14 | In the community release, we are providing the following features:
15 |
16 | * [Test deployment in minikube](../../getting-started/minikube.md) without cloud accounts (e.g., GCP)
17 | * Full [tutorial](../../getting-started/tutorials.md) to reproduce the demo
18 |
19 | We also worked hard to refactor the code, to make it much more extensible. It now leverages Bazel for hermetic and reproducible builds, and has a basic CI/CD pipeline setup. The project is now ready to get contribution from the community!
20 |
21 | ## What's Next?
22 |
23 | This is just the beginning. There are still many work to be done, such as:
24 |
25 | * **Diverse backend support**: ManaTEE currently only supports Google Confidential Space as the TEE backend, but different use cases may need diifferent backend. For example, some may want to use an on-prem TEE cluster, or a different cloud. Some might even want to deploy the system in multiple clouds.
26 | * **Integrated data pipeline**: One of the big challenge for organizations to share data is to process or filter the data to protect privacy and maintain data compliance. To ensure end-to-end data privacy, the data management should be closely integrated with the framework that consumes the data.
27 | * **Output privacy**: Although TEE provides data privacy during execution, the outputs of the execution needs extra efforts to protect data privacy.
28 | * **Support for confidential GPUs**: Data analytics these days often rely on large AI models requiring hardware accelerators such as GPUs. Now that confidential GPUs are readily available, we are ready to support GPU workloads seemlessly in ManaTEE framework.
29 |
30 | We are in the process of forming a Technical Steering Committee (TSC) to govern the project and drive its roadmap. Stay tuned for more updates in future posts.
31 |
32 | ## Join Us
33 |
34 | We’d love your feedback to help shape the future of ManaTEE and private data research framework.
35 | Please feel free to open issues, contribute code, or suggest ideas on GitHub. Please subscribe to our [mailing list](https://groups.google.com/u/1/g/manatee-project) for updates, too!
--------------------------------------------------------------------------------
/docs/developer/architecture.md:
--------------------------------------------------------------------------------
1 | # Architecture
2 |
3 |
--------------------------------------------------------------------------------
/docs/getting-started/building.md:
--------------------------------------------------------------------------------
1 | # Building
2 |
3 | ManaTEE uses [Bazel](https://bazel.build/install) for hermetic builds.
4 | Bazel is aware of all required tools and dependencies, thus building images is as easy as:
5 |
6 | ```
7 | bazel build //...
8 | ```
9 |
10 | Find individual rules from corresponding `BUILD.bazel` files.
11 |
12 | ## Components
13 |
14 | `app` directory contains the source codes of the data clean room which has three components:
15 |
16 | * `executor` contains tools that are used in the base image of stage2 such as a tool generates custom attestation report within GCP confidential space.
17 | * `api` is the backend service of the data clean room that processes the request from jupyterlab.
18 | * `reconciler` is a reconciler that monitors in-progress jobs and take actions.
19 | * `jupyterlab_manatee` is an JupyterLab extension for data clean room that submits a job on the fronted and queries the status of the jobs.
20 |
21 | ## Loading Container Images
22 |
23 | If you'd like to load the images in your local container runtime (e.g., Docker), you can use `oci_load` rules.
24 |
25 | ```shell
26 | bazel query 'kind("oci_load", "//app/...")' | xargs -n1 bazel run
27 | ```
28 |
29 | # Testing
30 |
31 | To run all tests, run:
32 |
33 | ```
34 | bazel test //...
35 | ```
--------------------------------------------------------------------------------
/docs/getting-started/deployment.md:
--------------------------------------------------------------------------------
1 | # GCP Deployment
2 |
3 | ## Prerequisites
4 |
5 | Currently, ManaTEE requires Google Cloud Platform (GCP) for deployment, as it requires cloud-provided TEE.
6 | In the future, we will support more cloud backends as well as local test deployment (See [#31](https://github.com/manatee-project/manatee/issues/31)).
7 |
8 | Because of the cloud resource requirement, we recommend a cloud admin to create all the resources by following the steps below.
9 |
10 | ### Cloud Setup
11 |
12 | * A valid GCP account that has ability to create/destroy resources. For a GCP project, please enable the following apis:
13 | - serviceusage.googleapis.com
14 | - compute.googleapis.com
15 | - container.googleapis.com
16 | - cloudkms.googleapis.com
17 | - servicenetworking.googleapis.com
18 | - cloudresourcemanager.googleapis.com
19 | - sqladmin.googleapis.com
20 | - confidentialcomputing.googleapis.com
21 |
22 | ### Tools
23 | * [Gcloud CLI](https://cloud.google.com/sdk/docs/install) Login to the GCP `gcloud auth login && gcloud auth application-default login && gcloud components install gke-gcloud-auth-plugin`
24 | * [Terraform](https://developer.hashicorp.com/terraform/tutorials/aws-get-started/install-cli) Terraform is an infrastructure as code tool that enables you to safely and predictably provision and manage infrastructure in any cloud.
25 | * [Helm](https://helm.sh/docs/intro/install/) Helm is a package manager for Kubernetes that allows developers and operators to more easily package, configure, and deploy applications and services onto Kubernetes clusters.
26 | * [Hertz](https://github.com/cloudwego/hertz) Hertz is a high-performance, high-usability, extensible HTTP framework for Go. It’s designed to make it easy for developers to build microservices.
27 |
28 | ## Create Resources
29 |
30 | The resources are created and managed by the project administrator who has the `Owner` role in the GCP project. Make sure you have correctly defined environment variables in the `env.bzl`. Only the project administrator is responsible to run these commands to create resources.
31 |
32 | `resources/global` directory contains the global resources including: clusters, cloud sql instance, database, docker repositories, and service accounts. These resource are global and only created once.
33 | ```
34 | pushd resources/global
35 | ./apply.sh
36 | popd
37 | ```
38 |
39 | `resources/deployment` directory includes the resources releated to kunernates including: kubernetes namespace, role, secret. These resources are created under different namespace. So the namespace parameter is required, and you can create different deployments under different namespaces.
40 | ```shell
41 | pushd resources/deployment
42 | ./apply.sh --namespace=
43 | popd
44 | ```
45 |
46 | ## Pushing Images
47 |
48 | ```shell
49 | gcloud auth configure-docker us-docker.pkg.dev # authenticate to artifact registry
50 | bazel run //:push_all_images --action_env=namespace=
51 | ```
52 |
53 | > [!IMPORTANT]
54 | > the `--action_env=namespace=` flag is required.
55 |
56 | You can also push images separately by this command. Replace `` by the directory name under `/app` (e.g., api)
57 |
58 | ```
59 | bazel run //:push__image --action_env=namespace=
60 | ```
61 |
62 | ## Deploying in Google Cloud Platform (GCP)
63 |
64 | ### Defining environment variables
65 | First, copy the example environment variables template to the existing directory.
66 | ```
67 | cp .env.example env.bzl
68 | ```
69 | Edit the variables in `env.bzl`. The `env.bzl` file is the one that really takes effect, the other files are just templates. The double quotes around a variable name are needed. For example:
70 |
71 | ``` sh title="env.bzl"
72 | env="dev" # the deployment environment
73 | project_id="you project id" # gcp project id
74 | region="" # the region that the resources created in
75 | zone="" # the zone that the resources created in
76 | ```
77 |
78 | ### Deploy
79 |
80 | Deploy data clean room and jupyterhub by helm chart.
81 | ```shell
82 | source env.bzl
83 | gcloud container clusters get-credentials dcr-$env-cluster --zone $zone --project $project_id
84 |
85 | pushd deployment
86 | ./deploy.sh --namespace=
87 | popd
88 | ```
89 | When deployment is complete, you can follow the output of the script to get the public ip of jupyterhub.
90 | ```
91 | kubectl --namespace= get service proxy-public
92 | ```
93 |
--------------------------------------------------------------------------------
/docs/getting-started/llm-model-evaluation.md:
--------------------------------------------------------------------------------
1 | # Trusted LLM Model Evaluation Example
2 |
3 | This doc demonstrates how to use manatee for trusted evaluation of LLM models. Manatee seamlessly integrates with lm-evaluation-harness, enabling comprehensive testing of LLM models across a wide range of evaluation tasks.
4 |
5 | Scenario:
6 | Suppose a model provider owns a proprietary LLM model. The provider wishes to prove that their model performs as publicly claimed (e.g., in terms of fairness or accuracy). This evaluation process is divided into two stages:
7 | - Stage 1: The script runs on a mock (fake) model to illustrate the workflow.
8 | - Stage 2: The script runs on the actual model, producing real evaluation results along with cryptographic attestation.
9 |
10 | The attestation process cryptographically binds the evaluation results to a TEE (Trusted Execution Environment) quote. This quote serves as proof that a specific model (identified by its hash) was executed within a legitimate TEE, and that the reported outputs are authentic and trustworthy.
11 |
12 |
13 | ## Install lm-evaluation-harness
14 | `lm-evaluation-harness` provides a unified framework to test generative language models on a large number of different evaluation tasks.
15 |
16 | ```python
17 | !git clone --depth 1 https://github.com/EleutherAI/lm-evaluation-harness
18 | %pip install -e ./lm-evaluation-harness[wandb]
19 | ```
20 |
21 | ## Model Selection(HuggingFace for Example)
22 |
23 | ```
24 | HG_MODEL="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
25 | import lm_eval
26 | from lm_eval.utils import setup_logging
27 | lm_obj = lm_eval.models.huggingface.HFLM(pretrained=HG_MODEL, device="cpu")
28 | task_manager = lm_eval.tasks.TaskManager()
29 | lm_obj.get_model_info()
30 | ```
31 |
32 | ## Tasks Selection
33 |
34 | ### BBQ: A Hand-Built Bias Benchmark for Question Answering
35 |
36 | BBQ measures the bias in the output for the question answering task. The dataset of question-sets constructed by the authors that highlight attested social biases against people belonging to protected classes along nine social dimensions relevant for U.S. English-speaking contexts. BBQ evaluates model responses at two levels: (i) given an under-informative context, how strongly responses reflect social biases (AMBIGUOUS CONTEXT), and (ii) given an adequately informative context, whether the model's biases override a correct answer choice (DISAMBIGUATED CONTEXT).
37 |
38 | ### CrowS-Pairs: A Challenge Dataset for Measuring Social Biases in Masked Language Models
39 |
40 | CrowS-Pairs is a challenge set for evaluating what language models (LMs) on their tendency to generate biased outputs. CrowS-Pairs comes in 2 languages and the English subset has a newer version which fixes some of the issues with the original version
41 |
42 | ### Simple Cooccurrence Bias
43 |
44 | This bias evaluation relies on simple templates for prompting LMs and tests for bias in the next word prediction. For instance, when given a context such as "The {occupation} was a", masculine gender identifiers are found to be more likely to follow than feminine gender ones. Following Brown et al. (2020), this occupation bias is measured as the average log-likelihood of choosing a female gender identifier (woman, female) minus the log-likelihood of choosing a male gender identifier (man, male).
45 |
46 | ### Winogender: Gender Bias in Coreference Resolution
47 | Winogender is designed to measure gender bias in coreference resolution systems, but has also been used for evaluating language models. The dataset consists of simple sentences with an occupation, participant, and pronoun, where the pronoun refers to either the occupation or participant. Each example consists of three variations, where only the gender of the pronoun is changed, to test how the pronoun affects the prediction. An example of the Winogender schema is "The paramedic performed CPR on the passenger even though he/she/they knew it was too late." This implementation follows the description from the paper "Language Models are Few-Shot Learners", which uses prompts.
48 |
49 | ```python
50 | import datasets
51 | datasets.config.HF_DATASETS_TRUST_REMOTE_CODE = True
52 | tee_llm_evaluation_result = lm_eval.simple_evaluate( # call simple_evaluate
53 | model=lm_obj,
54 | tasks=["winogender","simple_cooccurrence_bias", "crows_pairs_english"],
55 | num_fewshot=0,
56 | task_manager=task_manager,
57 | log_samples=True,
58 | batch_size=1024,
59 | confirm_run_unsafe_code=True
60 | )
61 | tee_llm_evaluation_result["results"]
62 | ```
63 |
64 | ## Get Result and TEE Attestation Report
65 | After the job finished, downloaded the result along with the attestation report. The `eat_nonce` in the attestation report is the hash of the output file.
66 |
--------------------------------------------------------------------------------
/docs/getting-started/minikube.md:
--------------------------------------------------------------------------------
1 | # Test Deployment on Minikube
2 |
3 | We also made it possible to deploy and test ManaTEE without having any cloud account.
4 | Our test deployment uses a local Minikube cluster with a few components that replaces cloud resources.
5 | With this, users can quickly test and try ManaTEE JupyterLab extension and the API without having an actual TEE backend.
6 |
7 | ## Prerequisite
8 |
9 | First, Install [Minikube CLI](https://minikube.sigs.k8s.io/docs/start/).
10 |
11 | Then, create a minikube cluster with enough memory. We need larger memory because of the Kaniko jobs.
12 |
13 | ```
14 | minikube start --memory=12192mb --cpus=8 --disk-size=50g --insecure-registry "10.0.0.0/24"
15 | ```
16 |
17 | ## Create Cluster Resources
18 |
19 | Once minikube cluster is up and running, create the resources in the minikube cluster
20 |
21 | ```
22 | pushd resources/minikube
23 | ./apply.sh
24 | popd
25 | ```
26 |
27 | ## Build Images
28 |
29 | Now, build the images and load it into the Docker.
30 | Minikube has its own Docker engine running inside the cluster.
31 | Thus, we first need to point the local Docker client to the Docker engine inside minikube
32 |
33 | ```
34 | eval $(minikube docker-env)
35 | ```
36 |
37 | Then, run the following command to load all images
38 |
39 | ```
40 | bazelisk run //:load_all_images
41 | ```
42 |
43 | ## Setup Registry
44 |
45 | The API requires artifact registry to store the TEE base image.
46 | Thus, we use minikube's registry addon to host the image.
47 |
48 | Enable the registry
49 | ```
50 | minikube addons enable registry
51 | ```
52 |
53 | RUN a proxy to connect to minikube registry and push executor image to minikube registry.
54 | ```
55 | docker run --rm -it --network=host alpine ash -c "apk add socat && socat TCP-LISTEN:5000,reuseaddr,fork TCP:$(minikube ip):5000"
56 | ```
57 |
58 | Open another terminal, and run
59 |
60 | ```
61 | eval $(minikube docker-env)
62 | docker tag executor localhost:5000/executor && docker push localhost:5000/executor
63 | ```
64 |
65 | You can close the proxy after the docker push.
66 |
67 | ## Deploy
68 |
69 | Now, you can deploy ManaTEE to minikube.
70 |
71 | ```
72 | pushd deployment/minikube
73 | ./deploy.sh
74 | popd
75 | ```
76 |
77 | ## Accessing JupyterHub
78 |
79 | You can port-forward traffic to the k8s Service proxy-public with kubectl to access it from your computer. `kubectl --namespace=manatee port-forward service/proxy-public 8080:http`.
80 |
81 | Try insecure HTTP access: http://localhost:8080
82 |
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | # Welcome to ManaTEE Project
2 |
3 | 
4 |
5 | ManaTEE is an open-source framework for secure data analytics in public research. It leverages Privacy Enhancing Technologies, including confidential computing, to protect sensitive data while maintaining usability.
6 |
7 | ManaTEE Project was initiated in 2024 as a core use case of TikTok. Now part of the Confidential Computing Consortium, ManaTEE addresses the growing challenges of balancing privacy, usability, and accuracy in enterprise data collaboration.
8 |
9 | ## Two-Stage Data Analytics Platform
10 |
11 | ManaTEE introduces a two-stage data clean room model to provide an interactive interface for exploring data while protecting private data during processing. It combines different privacy-enhancing technologies (PETs) across two stages:
12 |
13 | * **Programming Stage**: Data consumers explore datasets using low-risk data, employing different PETs such as pseudonymization or differentially private synthetic data generation.
14 | * **Secure Execution Stage**: Workloads run in a trusted execution environment (TEE), which provides attestable integrity and confidentiality guarantees for the workload in the cloud.
15 |
16 | 
17 | /// Caption
18 | Two-stage data clean room model
19 | ///
20 |
21 | ## Key Features
22 |
23 | ManaTEE provides following key benefits:
24 |
25 | * **Interactive Programming**: Integrated with Jupyter Notebook, allowing data consumers to work with Python and other popular languages.
26 | * **Cloud-Ready**: ManaTEE can be easily deployed to existing cloud TEE backends such as Google Cloud. We plan to support other backends as well, eliminating the need to build the entire infrastructure from scratch.
27 | * **Flexible PET**: Data providers can control the protection mechanisms at each stage to tailor to specific privacy requirements of the data.
28 | * **Trusted Execution Environment**: By leveraging TEEs, ManaTEE ensures a high level of confidence in data confidentiality and program integrity for both data providers and data consumers.
29 | * **Accuracy and Utility**: ManaTEE employs a two-stage design to ensure that result accuracy is not compromised for the sake of privacy.
30 |
31 |
32 | ## Use Cases
33 |
34 | Potential use cases for ManaTEE include:
35 |
36 | * **Trusted Research Environments (TREs)**: Secure data analysis for public health, economics, and more, while maintaining data privacy.
37 | * **Advertising & Marketing**: Lookalike segment analysis and private ad tracking without compromising user data.
38 | * **Machine Learning**: Enables private model training without exposing sensitive data or algorithms.
39 |
--------------------------------------------------------------------------------
/docs/project-status.md:
--------------------------------------------------------------------------------
1 | # Project Roadmap
2 |
3 | A few necessary components such as data SDK are not included in the open source version.
4 | However, you can still try to reproduce our demo by following [tutorials](getting-started/tutorials.md).
5 |
6 | ## Feature Status
7 |
8 | Many parts of ManaTEE are still under active development.
9 |
10 | | | Current (Alpha) | Future |
11 | |-------------------------|--------------------------|---------------------------|
12 | | **Users** | One-Way Data Sharing | Multi-Way Data Sharing |
13 | | **Backend** | Single Backend (Goole Cloud Platform) | Multiple Backend |
14 | | **Data Provisioning** | Manual | Automated |
15 | | **Policy and Attestation** | Manual | Automated |
16 | | **Compute** | CPU | CPU/GPU |
17 |
18 | * **Data Provisioning, Policy, and Attestation**: Currently, the data owner is responsible for manually setting all the infrastructure including data and the access control. However, future versions will make this easier by including a generic interface for uploading data and configuring the data access policies based on attestation.
19 |
20 | * **Backend**: We only support a single TEE backend called [Confidential Space](https://cloud.google.com/confidential-computing/confidential-space/docs/confidential-space-overview) provided by Google Cloud Platform (GCP). In the future, it will be extended to support more TEE backends including other cloud providers or native confidential VMs/containers.
21 |
22 | * **Compute**: ManaTEE currently does not support confidential GPU or any accelerator-based computation.
23 |
24 | ## Roadmap
25 |
26 | We are currently forming Technical Steering Committee (TSC) for governing the project and driving the roadmap.
27 | If you're interested in joining the project, please reach out to the team via our [mailing list](manatee-project@googlegroups.com).
28 |
--------------------------------------------------------------------------------
/docs/stylesheets/extra.css:
--------------------------------------------------------------------------------
1 | [data-md-color-scheme="manatee"] {
2 | --md-primary-fg-color: #757575;
3 | --md-primary-fg-color--light: #a8a8a8;
4 | --md-primary-fg-color--dark: #424242;
5 | }
--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
1 | site_name: ManaTEE
2 | site_url: https://manatee-project.github.io
3 | theme:
4 | name: material
5 | features:
6 | - navigation.sections
7 | - navigation.expand
8 | - navigation.footer
9 | - content.action.edit
10 | - content.code.copy
11 | - content.code.annotate
12 | logo: assets/img/manatee.png
13 | palette:
14 | primary: black
15 | repo_url: https://github.com/manatee-project/manatee
16 | repo_name: manatee-project/manatee
17 |
18 | extra:
19 | social:
20 | - icon: fontawesome/brands/github
21 | link: https://github.com/manatee-project/manatee
22 |
23 | markdown_extensions:
24 | - attr_list
25 | - md_in_html
26 | - pymdownx.blocks.caption
27 | - pymdownx.highlight:
28 | anchor_linenums: true
29 | line_spans: __span
30 | pygments_lang_class: true
31 | - pymdownx.inlinehilite
32 | - pymdownx.snippets
33 | - pymdownx.superfences
34 |
35 |
36 | plugins:
37 | - blog
38 |
39 | # Page tree
40 | nav:
41 | - Introduction: index.md
42 | - Roadmap: project-status.md
43 | - Getting Started:
44 | - Build: getting-started/building.md
45 | - Deploy:
46 | - GCP: getting-started/deployment.md
47 | - Minikube: getting-started/minikube.md
48 | - Tutorials:
49 | - Tutorials: getting-started/tutorials.md
50 | - LLM Model Evaluation: getting-started/llm-model-evaluation.md
51 | - Blog:
52 | - blog/index.md
--------------------------------------------------------------------------------
/resources/.gitignore:
--------------------------------------------------------------------------------
1 | .terraform/
2 | terraform.tfstate
3 | terraform.tfstate.backup
4 | terraform.tfvars
5 |
--------------------------------------------------------------------------------
/resources/deployment/apply.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2024 TikTok Pte. Ltd.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | set -e
16 |
17 | for arg in "$@"
18 | do
19 | case $arg in
20 | --namespace=*)
21 | # If we find an argument --namespace=something, split the string into a name/value array.
22 | IFS='=' read -ra NAMESPACE <<< "$arg"
23 | # Assign the second element of the array (the value of the --namespace argument) to our variable.
24 | namespace="${NAMESPACE[1]}"
25 | ;;
26 | --database-user=*)
27 | IFS='=' read -ra DBUSER <<< "$arg"
28 | dbuser="${DBUSER[1]}"
29 | ;;
30 | --database-password=*)
31 | IFS='=' read -ra DBPWD <<< "$arg"
32 | dbpwd="${DBPWD[1]}"
33 | esac
34 | done
35 |
36 |
37 | if [ -z "$namespace" ]; then
38 | echo -e "Error: the namespace parameter is missing, please run the script like ./apply.sh --namespace=xxx"
39 | exit
40 | fi
41 |
42 | if [ -z "$dbuser" ]; then
43 | dbuser=$namespace
44 | echo -e "\033[1;33mWarning: the database-user parameters doesn't exist using default database user: ${namespace}\033[0m"
45 |
46 | fi
47 |
48 | if [ -z "$dbpwd" ]; then
49 | dbpwd=$(cat /dev/urandom | tr -dc 'a-zA-Z0-9' | fold -w 12 | head -n 1)
50 | echo -e "\033[1;33mWarning: the database-password parameters doesn't exist using random database password: ${dbpwd}\033[0m"
51 | fi
52 |
53 | # Check if gcloud is installed
54 | if ! [ -x "$(command -v gcloud)" ]; then
55 | echo "Error: gcloud is not installed." >&2
56 | exit 1
57 | fi
58 |
59 | # Check if gcloud logged in
60 | if ! gcloud auth list | grep -q 'ACTIVE'; then
61 | echo "Error: No active gcloud account found." >&2
62 | exit 1
63 | fi
64 |
65 | # check whether variables has been set
66 | VAR_FILE="../../env.bzl"
67 | if [ ! -f "$VAR_FILE" ]; then
68 | echo "Error: Variables file does not exist."
69 | exit 1
70 | fi
71 | VAR_FILE=$(realpath $VAR_FILE)
72 | source $VAR_FILE
73 |
74 | zone=$region-a
75 | # get kubernete cluster credentials
76 | gcloud container clusters get-credentials dcr-$env-cluster --zone $zone --project $project_id
77 |
78 | cp $VAR_FILE terraform.tfvars
79 |
80 | echo -e "\nnamespace=\"$namespace\"" >> terraform.tfvars
81 | echo -e "mysql_username=\"$dbuser\"" >> terraform.tfvars
82 | echo -e "mysql_password=\"$dbpwd\"" >> terraform.tfvars
83 | terraform init -reconfigure -backend-config="bucket=dcr-tf-state-$env" -backend-config="prefix=$namespace"
84 |
85 | terraform apply
86 |
--------------------------------------------------------------------------------
/resources/deployment/backend.tf:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2024 TikTok Pte. Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | terraform {
18 | backend "gcs" {}
19 | }
20 |
--------------------------------------------------------------------------------
/resources/deployment/cluster_rolebinding.tf:
--------------------------------------------------------------------------------
1 | resource "kubernetes_cluster_role_binding" "cluster_admin_binding" {
2 | metadata {
3 | name = "cluster-admin-binding"
4 | }
5 | role_ref {
6 | api_group = "rbac.authorization.k8s.io"
7 | kind = "ClusterRole"
8 | name = "cluster-admin"
9 | }
10 | subject {
11 | kind = "User"
12 | name = data.google_client_openid_userinfo.me.email
13 | api_group = "rbac.authorization.k8s.io"
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/resources/deployment/db_account.tf:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2024 TikTok Pte. Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | resource "google_sql_database" "database" {
18 | name = "dcr-${var.namespace}-database"
19 | project = var.project_id
20 | instance = "dcr-${var.env}-db-instance"
21 | }
22 |
23 | resource "google_sql_user" "dcr_db_user" {
24 | name = var.mysql_username
25 | instance = "dcr-${var.env}-db-instance"
26 | password = var.mysql_password
27 | project = var.project_id
28 | }
29 |
--------------------------------------------------------------------------------
/resources/deployment/namespace.tf:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2024 TikTok Pte. Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | resource "kubernetes_namespace" "data_clean_room_k8s_namespace" {
18 | metadata {
19 | name = var.namespace
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/resources/deployment/providers.tf:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2024 TikTok Pte. Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | terraform {
18 | required_providers {
19 | google = {
20 | source = "hashicorp/google"
21 | }
22 | kubernetes = {
23 | source = "hashicorp/kubernetes"
24 | }
25 | }
26 |
27 | }
28 |
29 | provider "kubernetes" {
30 | config_path = "~/.kube/config"
31 | }
32 |
33 | data "google_client_openid_userinfo" "me" {}
34 |
--------------------------------------------------------------------------------
/resources/deployment/repositories.tf:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2024 TikTok Pte. Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | resource "google_artifact_registry_repository" "data_clean_room_images" {
18 | project = var.project_id
19 | location = "us"
20 | repository_id = "dcr-${var.env}-${var.namespace}-images"
21 | description = "Data Clean Room Images"
22 | format = "DOCKER"
23 | }
24 |
--------------------------------------------------------------------------------
/resources/deployment/role.tf:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2024 TikTok Pte. Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | resource "kubernetes_role" "role" {
18 | metadata {
19 | name = "dcr-pod-role"
20 | namespace = var.namespace
21 | }
22 |
23 | rule {
24 | api_groups = ["batch", ""]
25 | resources = ["jobs", "pods", "pods/log"]
26 | verbs = ["get", "list", "watch", "create", "update", "patch", "delete"]
27 | }
28 | }
29 |
30 | resource "kubernetes_role_binding" "role_binding" {
31 | metadata {
32 | name = "dcr-pod-role-binding"
33 | namespace = var.namespace
34 | }
35 | role_ref {
36 | api_group = "rbac.authorization.k8s.io"
37 | kind = "Role"
38 | name = kubernetes_role.role.metadata[0].name
39 | }
40 | subject {
41 | kind = "ServiceAccount"
42 | name = kubernetes_service_account.k8s_dcr_pod_service_account.metadata[0].name
43 | namespace = var.namespace
44 | }
45 | }
46 |
47 |
--------------------------------------------------------------------------------
/resources/deployment/secret.tf:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2024 TikTok Pte. Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | resource "kubernetes_secret" "secret" {
18 | metadata {
19 | name = "mysql-secret"
20 | namespace = kubernetes_namespace.data_clean_room_k8s_namespace.metadata[0].name
21 | }
22 | data = {
23 | mysql-username = var.mysql_username,
24 | mysql-password = var.mysql_password,
25 | mysql-database = local.database
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/resources/deployment/service_accounts.tf:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2024 TikTok Pte. Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | locals {
18 | gcp_dcr_pod_sa_email = "${local.gcp_dcr_pod_sa}@${var.project_id}.iam.gserviceaccount.com"
19 | gcp_jupyter_pod_sa_email = "${local.gcp_jupyter_pod_sa}@${var.project_id}.iam.gserviceaccount.com"
20 | }
21 |
22 | resource "kubernetes_service_account" "k8s_dcr_pod_service_account" {
23 | metadata {
24 | name = "dcr-k8s-pod-sa"
25 | namespace = var.namespace
26 | annotations = {
27 | "iam.gke.io/gcp-service-account" = local.gcp_dcr_pod_sa_email
28 | }
29 | }
30 | automount_service_account_token = true
31 | depends_on = [kubernetes_namespace.data_clean_room_k8s_namespace]
32 | }
33 |
34 | resource "kubernetes_service_account" "k8s_jupyter_pod_service_account" {
35 | metadata {
36 | name = "jupyter-k8s-pod-sa"
37 | namespace = var.namespace
38 | annotations = {
39 | "iam.gke.io/gcp-service-account" = local.gcp_jupyter_pod_sa_email
40 | }
41 | }
42 | automount_service_account_token = true
43 | depends_on = [kubernetes_namespace.data_clean_room_k8s_namespace]
44 | }
45 |
46 |
47 | resource "google_service_account_iam_member" "dcr_pod_sa_iam_member" {
48 | service_account_id = "projects/${var.project_id}/serviceAccounts/${local.gcp_dcr_pod_sa}@${var.project_id}.iam.gserviceaccount.com"
49 | role = "roles/iam.workloadIdentityUser"
50 | member = "serviceAccount:${var.project_id}.svc.id.goog[${var.namespace}/${kubernetes_service_account.k8s_dcr_pod_service_account.metadata[0].name}]"
51 | depends_on = [kubernetes_namespace.data_clean_room_k8s_namespace]
52 | }
53 |
54 | resource "google_service_account_iam_member" "jupyter_pod_sa_iam_member" {
55 | service_account_id = "projects/${var.project_id}/serviceAccounts/${local.gcp_jupyter_pod_sa}@${var.project_id}.iam.gserviceaccount.com"
56 | role = "roles/iam.workloadIdentityUser"
57 | member = "serviceAccount:${var.project_id}.svc.id.goog[${var.namespace}/${kubernetes_service_account.k8s_jupyter_pod_service_account.metadata[0].name}]"
58 | depends_on = [kubernetes_namespace.data_clean_room_k8s_namespace]
59 | }
60 |
--------------------------------------------------------------------------------
/resources/deployment/variables.tf:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2024 TikTok Pte. Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | variable "namespace" {
18 | type = string
19 | description = "Kubernetes namespaces"
20 | default = ""
21 | }
22 |
23 | variable "env" {
24 | type = string
25 | description = "Deployment environment, e.g., dev, prod, oss"
26 | }
27 |
28 | variable "project_id" {
29 | type = string
30 | description = "The GCP project ID"
31 | }
32 |
33 | variable "mysql_username" {
34 | type = string
35 | description = "Mysql username"
36 | }
37 |
38 | variable "mysql_password" {
39 | type = string
40 | description = "Mysql password"
41 | }
42 |
43 | variable "region" {
44 | type = string
45 | description = "Region to create the gcp resources"
46 | }
47 |
48 | variable "zone" {
49 | type = string
50 | description = "Zone to create the gcp resources"
51 | }
52 |
53 | locals {
54 | gcp_dcr_pod_sa = "dcr-${var.env}-pod-sa"
55 | gcp_jupyter_pod_sa = "jupyter-${var.env}-pod-sa"
56 | database = "dcr-${var.namespace}-database"
57 | }
58 |
--------------------------------------------------------------------------------
/resources/global/apply.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2024 TikTok Pte. Ltd.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | echo "You are creating the gcp resources and this should only be done once."
17 |
18 | # Check if gcloud is installed
19 | if ! [ -x "$(command -v gcloud)" ]; then
20 | echo "Error: gcloud is not installed." >&2
21 | exit 1
22 | fi
23 |
24 | # Check if gcloud logged in
25 | if ! gcloud auth list | grep -q 'ACTIVE'; then
26 | echo "Error: No active gcloud account found." >&2
27 | exit 1
28 | fi
29 |
30 | # check whether variables has been set
31 | VAR_FILE="../../env.bzl"
32 | if [ ! -f "$VAR_FILE" ]; then
33 | echo "Error: Variables file does not exist."
34 | exit 1
35 | fi
36 | VAR_FILE=$(realpath $VAR_FILE)
37 | source $VAR_FILE
38 |
39 | if ! gsutil ls gs://dcr-tf-state-$env > /dev/null 2>&1; then
40 | gsutil mb -l us gs://dcr-tf-state-$env
41 | fi
42 |
43 | cp $VAR_FILE terraform.tfvars
44 | terraform init -reconfigure -backend-config="bucket=dcr-tf-state-$env" -backend-config="prefix=cloud"
45 | terraform apply
46 |
--------------------------------------------------------------------------------
/resources/global/backend.tf:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2024 TikTok Pte. Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | terraform {
18 | backend "gcs" {}
19 | }
20 |
--------------------------------------------------------------------------------
/resources/global/buckets.tf:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2024 TikTok Pte. Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | resource "google_storage_bucket" "data_clean_room_hub" {
18 | name = "dcr-${var.env}-hub"
19 | location = "us"
20 | project = var.project_id
21 | public_access_prevention = "enforced"
22 | uniform_bucket_level_access = true
23 | }
24 |
--------------------------------------------------------------------------------
/resources/global/cluster.tf:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2024 TikTok Pte. Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | locals {
18 | cluster_name = "dcr-${var.env}-cluster"
19 | node_pool_name = "dcr-${var.env}-node-pool"
20 | }
21 |
22 | # GKE Cluster
23 | resource "google_container_cluster" "dcr_cluster" {
24 | project = var.project_id
25 | name = local.cluster_name
26 | # if use region, each zone will create a node
27 | location = var.zone
28 | # We can't create a cluster with no node pool defined, but we want to only use
29 | # separately managed node pools. So we create the smallest possible default
30 | # node pool and immediately delete it.
31 | deletion_protection = false
32 | remove_default_node_pool = true
33 | enable_l4_ilb_subsetting = true
34 | initial_node_count = 1
35 | workload_identity_config {
36 | workload_pool = "${var.project_id}.svc.id.goog"
37 | }
38 | ip_allocation_policy {
39 | stack_type = "IPV4_IPV6"
40 | }
41 | datapath_provider = "ADVANCED_DATAPATH"
42 | network = google_compute_network.data_clean_room_network.self_link
43 | subnetwork = google_compute_subnetwork.data_clean_room_subnetwork.self_link
44 | }
45 |
46 |
47 | # Note pool for GKE cluster
48 | resource "google_container_node_pool" "dcr_node_pool" {
49 | project = var.project_id
50 | name = local.node_pool_name
51 | location = var.zone
52 | cluster = google_container_cluster.dcr_cluster.name
53 | node_count = var.num_nodes
54 |
55 | node_config {
56 | service_account = google_service_account.gcp_dcr_cluster_sa.email
57 | preemptible = false
58 | machine_type = var.type
59 | }
60 |
61 | depends_on = [
62 | google_service_account.gcp_dcr_cluster_sa,
63 | ]
64 | autoscaling {
65 | max_node_count = 3
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/resources/global/database.tf:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2024 TikTok Pte. Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | resource "google_sql_database_instance" "dcr_database_instance" {
18 | name = "dcr-${var.env}-db-instance"
19 | database_version = "MYSQL_8_0"
20 | project = var.project_id
21 | region = var.region
22 | settings {
23 | tier = "db-f1-micro"
24 | ip_configuration {
25 | ipv4_enabled = false
26 | private_network = google_compute_network.data_clean_room_network.id
27 | enable_private_path_for_google_cloud_services = true
28 | }
29 | }
30 | lifecycle {
31 | prevent_destroy = false
32 | }
33 | depends_on = [ google_compute_subnetwork.data_clean_room_subnetwork, google_compute_global_address.dcr_private_address, google_service_networking_connection.private_vpc_connection ]
34 | }
35 |
--------------------------------------------------------------------------------
/resources/global/network.tf:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2024 TikTok Pte. Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | resource "google_compute_network" "data_clean_room_network" {
18 | name = "dcr-${var.env}-network"
19 | auto_create_subnetworks = false
20 | project = var.project_id
21 | }
22 |
23 | resource "google_compute_global_address" "dcr_private_address" {
24 | name = "dcr-${var.env}-private-address"
25 | project = var.project_id
26 | purpose = "VPC_PEERING"
27 | address_type = "INTERNAL"
28 | prefix_length = 16
29 | network = google_compute_network.data_clean_room_network.self_link
30 | }
31 |
32 | resource "google_service_networking_connection" "private_vpc_connection" {
33 | network = google_compute_network.data_clean_room_network.self_link
34 | service = "servicenetworking.googleapis.com"
35 | reserved_peering_ranges = [google_compute_global_address.dcr_private_address.name]
36 | depends_on = [ google_compute_global_address.dcr_private_address ]
37 | }
38 |
39 | resource "google_compute_subnetwork" "data_clean_room_subnetwork" {
40 | name = "dcr-${var.env}-subnetwork"
41 | project = var.project_id
42 | ip_cidr_range = "10.0.0.0/22"
43 | region = var.region
44 |
45 | stack_type = "IPV4_IPV6"
46 | ipv6_access_type = "EXTERNAL"
47 |
48 | network = google_compute_network.data_clean_room_network.id
49 | depends_on = [ google_service_networking_connection.private_vpc_connection ]
50 | }
51 |
--------------------------------------------------------------------------------
/resources/global/providers.tf:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2024 TikTok Pte. Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | terraform {
18 | required_providers {
19 | google = {
20 | source = "hashicorp/google"
21 | }
22 | }
23 | }
24 |
25 | provider "kubernetes" {
26 | config_path = "~/.kube/config"
27 | }
28 |
29 | data "google_client_openid_userinfo" "me" {}
30 |
--------------------------------------------------------------------------------
/resources/global/repositories.tf:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2024 TikTok Pte. Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | resource "google_artifact_registry_repository" "dcr_user_images" {
18 | project = var.project_id
19 | location = "us"
20 | repository_id = "dcr-${var.env}-user-images"
21 | description = "The repository stores the images that are built by data clean room API and running in the confidential space."
22 | format = "DOCKER"
23 | }
24 |
--------------------------------------------------------------------------------
/resources/global/service_accounts.tf:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2024 TikTok Pte. Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | # A service account used for data clean room cluster
18 | resource "google_service_account" "gcp_dcr_cluster_sa" {
19 | account_id = "dcr-${var.env}-cluster-sa"
20 | display_name = "A Service account for data clean room cluster"
21 | project = var.project_id
22 | }
23 |
24 | resource "google_service_account" "gcp_cvm_sa" {
25 | account_id = "dcr-${var.env}-cvm-sa"
26 | display_name = "A Service account for confidential vm"
27 | project = var.project_id
28 | }
29 |
30 | resource "google_service_account" "gcp_dcr_pod_sa" {
31 | account_id = "dcr-${var.env}-pod-sa"
32 | display_name = "A Service account for data clean room api pod"
33 | project = var.project_id
34 | }
35 |
36 | resource "google_service_account" "gcp_jupyter_pod_sa" {
37 | account_id = "jupyter-${var.env}-pod-sa"
38 | display_name = "A Service account for jupyterhub single user pod"
39 | project = var.project_id
40 | }
41 |
--------------------------------------------------------------------------------
/resources/global/variables.tf:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2024 TikTok Pte. Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | variable "env" {
18 | type = string
19 | description = "Deployment environment, e.g., dev, prod, oss"
20 | }
21 |
22 | variable "region" {
23 | type = string
24 | description = "Region to create the gcp resources"
25 | }
26 |
27 | variable "zone" {
28 | type = string
29 | description = "Zone to create the gcp resources"
30 | }
31 |
32 | variable "project_id" {
33 | type = string
34 | description = "The GCP project ID"
35 | }
36 |
37 | variable "type" {
38 | type = string
39 | description = "Instance type for the GKE instances"
40 | default = "c3-highcpu-22"
41 | }
42 |
43 | variable "num_nodes" {
44 | type = number
45 | description = "Number of nodes to create in the GKE cluster"
46 | default = 1
47 | }
48 |
--------------------------------------------------------------------------------
/resources/minikube/apply.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2024 TikTok Pte. Ltd.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | if ! command -v minikube &> /dev/null
17 | then
18 | echo "Minikube is not installed. Please install it first. https://minikube.sigs.k8s.io/docs/start/"
19 | exit 1
20 | fi
21 |
22 | env="minikube"
23 | namespace="manatee"
24 | dbuser="manatee"
25 | dbpwd=$(LC_ALL=C tr -dc 'a-zA-Z0-9' terraform.tfvars
29 | echo -e "namespace=\"$namespace\"" >> terraform.tfvars
30 | echo -e "mysql_username=\"$dbuser\"" >> terraform.tfvars
31 | echo -e "mysql_password=\"$dbpwd\"" >> terraform.tfvars
32 |
33 | terraform init -reconfigure
34 | terraform apply
35 |
36 | eval $(minikube docker-env)
--------------------------------------------------------------------------------
/resources/minikube/namespace.tf:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2024 TikTok Pte. Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | resource "kubernetes_namespace" "data_clean_room_k8s_namespace" {
18 | metadata {
19 | name = var.namespace
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/resources/minikube/providers.tf:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2024 TikTok Pte. Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | terraform {
18 | required_providers {
19 | kubernetes = {
20 | source = "hashicorp/kubernetes"
21 | }
22 | }
23 |
24 | }
25 |
26 | provider "kubernetes" {
27 | config_path = "~/.kube/config"
28 | }
29 |
--------------------------------------------------------------------------------
/resources/minikube/role.tf:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2024 TikTok Pte. Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | resource "kubernetes_role" "role" {
18 | metadata {
19 | name = "dcr-pod-role"
20 | namespace = var.namespace
21 | }
22 |
23 | rule {
24 | api_groups = ["batch", ""]
25 | resources = ["jobs", "pods", "pods/log"]
26 | verbs = ["get", "list", "watch", "create", "update", "patch", "delete"]
27 | }
28 | }
29 |
30 | resource "kubernetes_role_binding" "role_binding" {
31 | metadata {
32 | name = "dcr-pod-role-binding"
33 | namespace = var.namespace
34 | }
35 | role_ref {
36 | api_group = "rbac.authorization.k8s.io"
37 | kind = "Role"
38 | name = kubernetes_role.role.metadata[0].name
39 | }
40 | subject {
41 | kind = "ServiceAccount"
42 | name = kubernetes_service_account.k8s_dcr_pod_service_account.metadata[0].name
43 | namespace = var.namespace
44 | }
45 | }
46 |
47 |
--------------------------------------------------------------------------------
/resources/minikube/secret.tf:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2024 TikTok Pte. Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | resource "kubernetes_secret" "secret" {
18 | metadata {
19 | name = "mysql-secret"
20 | namespace = kubernetes_namespace.data_clean_room_k8s_namespace.metadata[0].name
21 | }
22 | data = {
23 | mysql-username = var.mysql_username,
24 | mysql-password = var.mysql_password,
25 | mysql-database = local.database
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/resources/minikube/service_accounts.tf:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2024 TikTok Pte. Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | resource "kubernetes_service_account" "k8s_dcr_pod_service_account" {
18 | metadata {
19 | name = "dcr-k8s-pod-sa"
20 | namespace = var.namespace
21 | }
22 | automount_service_account_token = true
23 | depends_on = [kubernetes_namespace.data_clean_room_k8s_namespace]
24 | }
25 |
26 | resource "kubernetes_service_account" "k8s_jupyter_pod_service_account" {
27 | metadata {
28 | name = "jupyter-k8s-pod-sa"
29 | namespace = var.namespace
30 | }
31 | automount_service_account_token = true
32 | depends_on = [kubernetes_namespace.data_clean_room_k8s_namespace]
33 | }
34 |
--------------------------------------------------------------------------------
/resources/minikube/variables.tf:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2024 TikTok Pte. Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | variable "namespace" {
18 | type = string
19 | description = "Kubernetes namespaces"
20 | default = ""
21 | }
22 |
23 | variable "env" {
24 | type = string
25 | description = "Deployment environment, e.g., dev, prod, oss"
26 | }
27 |
28 | variable "mysql_username" {
29 | type = string
30 | description = "Mysql username"
31 | }
32 |
33 | variable "mysql_password" {
34 | type = string
35 | description = "Mysql password"
36 | }
37 |
38 | locals {
39 | database = "dcr-${var.namespace}-database"
40 | }
41 |
--------------------------------------------------------------------------------
/tutorials/code/insurance.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "id": "fc104190-b96c-4bf8-a7e7-4978c3f11259",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "!pip install google-cloud-resource-manager google-cloud-storage numpy seaborn matplotlib pandas scikit-learn xgboost > /dev/null"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": null,
16 | "id": "460f04a1-1f02-441b-94b9-084971624bd6",
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "import sdk\n",
21 | "import pandas\n",
22 | "import io\n",
23 | "import matplotlib.pyplot as plt\n",
24 | "import seaborn as sns"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": null,
30 | "id": "28a1e860-278c-4b02-aac6-badd8be70af7",
31 | "metadata": {},
32 | "outputs": [],
33 | "source": [
34 | "sdk.gcp.init(\"\", \"\",\"\")\n",
35 | "repo = sdk.DataRepo(\"gs://\", \"gs://\")\n",
36 | "raw = repo.get_data(\"insurance.csv\")\n",
37 | "data = pandas.read_csv(io.StringIO(raw))"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": null,
43 | "id": "8fc82891-1dc8-4f32-80ac-2d34d481067c",
44 | "metadata": {},
45 | "outputs": [],
46 | "source": [
47 | "data.info()"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": null,
53 | "id": "e5b6fd79-08f1-4d29-9274-b78f6553ef3e",
54 | "metadata": {},
55 | "outputs": [],
56 | "source": [
57 | "data.head()"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": null,
63 | "id": "d18097a2-4da1-4de2-a939-8b59caf4703e",
64 | "metadata": {},
65 | "outputs": [],
66 | "source": [
67 | "# Distribution Histogram\n",
68 | "fig, axes = plt.subplots(3, 3, figsize=(15, 15))\n",
69 | "for i, column in enumerate(data.columns):\n",
70 | " sns.histplot(data[column], ax=axes[i//3, i%3])\n",
71 | " axes[i//3, i%3].set_title(column)\n",
72 | "plt.tight_layout()\n",
73 | "plt.show()"
74 | ]
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": null,
79 | "id": "54ee0cc4-8dfd-457e-805b-c336fcc0556e",
80 | "metadata": {},
81 | "outputs": [],
82 | "source": [
83 | "\n",
84 | "# Correlation Heatmap\n",
85 | "data['sex'] = data['sex'].apply({'male':0,'female':1}.get) \n",
86 | "data['smoker'] = data['smoker'].apply({'yes':1, 'no':0}.get)\n",
87 | "data['region'] = data['region'].apply({'southwest':1, 'southeast':2, 'northwest':3, 'northeast':4}.get)\n",
88 | "plt.figure(figsize=(12, 10))\n",
89 | "sns.heatmap(data.corr(), annot=True, cmap='coolwarm', fmt='.2f', square=True, cbar_kws={\"shrink\": 0.75})\n",
90 | "plt.title('Correlation Heatmap - Strength of Relationships Between Features', fontsize=16)\n",
91 | "plt.xlabel('Features', fontsize=14)\n",
92 | "plt.ylabel('Features', fontsize=14)\n",
93 | "\n",
94 | "# Display the plot\n",
95 | "plt.show()"
96 | ]
97 | }
98 | ],
99 | "metadata": {
100 | "kernelspec": {
101 | "display_name": "Python 3 (ipykernel)",
102 | "language": "python",
103 | "name": "python3"
104 | },
105 | "language_info": {
106 | "codemirror_mode": {
107 | "name": "ipython",
108 | "version": 3
109 | },
110 | "file_extension": ".py",
111 | "mimetype": "text/x-python",
112 | "name": "python",
113 | "nbconvert_exporter": "python",
114 | "pygments_lexer": "ipython3",
115 | "version": "3.11.9"
116 | }
117 | },
118 | "nbformat": 4,
119 | "nbformat_minor": 5
120 | }
121 |
--------------------------------------------------------------------------------
/tutorials/code/regression.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "id": "37cb4626-0ba4-4a58-a08b-0087d55d286e",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "!pip install google-cloud-resource-manager google-cloud-storage numpy seaborn matplotlib pandas scikit-learn xgboost > /dev/null"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": null,
16 | "id": "680d25da-5522-4234-ad98-2cf13f80bae1",
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "from sklearn.model_selection import train_test_split\n",
21 | "from sklearn.metrics import root_mean_squared_error\n",
22 | "from xgboost import XGBRegressor\n",
23 | "import sdk\n",
24 | "import io"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": null,
30 | "id": "204ecd06-7d0b-41ae-af2b-e4ca1277b409",
31 | "metadata": {},
32 | "outputs": [],
33 | "source": [
34 | "\n",
35 | "sdk.gcp.init(\"\", \"\",\"\")\n",
36 | "repo = sdk.DataRepo(\"gs://\", \"gs://\")\n",
37 | "raw = repo.get_data(\"insurance.csv\")\n",
38 | "data = pandas.read_csv(io.StringIO(raw))"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": null,
44 | "id": "d7d34424",
45 | "metadata": {},
46 | "outputs": [],
47 | "source": [
48 | "data['sex'] = data['sex'].apply({'male':0,'female':1}.get) \n",
49 | "data['smoker'] = data['smoker'].apply({'yes':1, 'no':0}.get)\n",
50 | "data['region'] = data['region'].apply({'southwest':1, 'southeast':2, 'northwest':3, 'northeast':4}.get)"
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": null,
56 | "id": "43f5f736-f987-48dc-977f-d75b9d683038",
57 | "metadata": {},
58 | "outputs": [],
59 | "source": [
60 | "X = data[['age','bmi', 'smoker']]\n",
61 | "y = data[['charges']]"
62 | ]
63 | },
64 | {
65 | "cell_type": "code",
66 | "execution_count": null,
67 | "id": "340ca175-db00-40fe-b2ae-0962f6fe6708",
68 | "metadata": {},
69 | "outputs": [],
70 | "source": [
71 | "# Assume 'X' contains features and 'y' is the target variable (charges)\n",
72 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": null,
78 | "id": "34276ee8-bc8b-4ec5-9be2-d2e817fe86ff",
79 | "metadata": {},
80 | "outputs": [],
81 | "source": [
82 | "model = XGBRegressor()\n",
83 | "model.fit(X_train, y_train)"
84 | ]
85 | },
86 | {
87 | "cell_type": "code",
88 | "execution_count": null,
89 | "id": "98468cf0-244d-49f1-814d-2dfd17f1eeba",
90 | "metadata": {},
91 | "outputs": [],
92 | "source": [
93 | "predictions = model.predict(X_test)"
94 | ]
95 | },
96 | {
97 | "cell_type": "code",
98 | "execution_count": null,
99 | "id": "ac29bb1e-944f-4672-b9e0-38c2f0218680",
100 | "metadata": {},
101 | "outputs": [],
102 | "source": [
103 | "rmse = root_mean_squared_error(y_test, predictions)\n",
104 | "print(f'Root Mean Squared Error: {rmse}')"
105 | ]
106 | }
107 | ],
108 | "metadata": {
109 | "kernelspec": {
110 | "display_name": "Python 3 (ipykernel)",
111 | "language": "python",
112 | "name": "python3"
113 | },
114 | "language_info": {
115 | "codemirror_mode": {
116 | "name": "ipython",
117 | "version": 3
118 | },
119 | "file_extension": ".py",
120 | "mimetype": "text/x-python",
121 | "name": "python",
122 | "nbconvert_exporter": "python",
123 | "pygments_lexer": "ipython3",
124 | "version": "3.11.9"
125 | }
126 | },
127 | "nbformat": 4,
128 | "nbformat_minor": 5
129 | }
130 |
--------------------------------------------------------------------------------
/tutorials/code/sdk/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints/
2 |
--------------------------------------------------------------------------------
/tutorials/code/sdk/__init__.py:
--------------------------------------------------------------------------------
1 | from .data import *
2 |
3 | __all__ = ["DataRepo", "gcp"]
--------------------------------------------------------------------------------
/tutorials/code/sdk/__version__.py:
--------------------------------------------------------------------------------
1 | VERSION = (0, 0, 1)
2 |
3 | __version__ = '.'.join(map(str, VERSION))
4 |
--------------------------------------------------------------------------------
/tutorials/code/sdk/data.py:
--------------------------------------------------------------------------------
1 | import google.cloud.storage as gcs
2 | from google.cloud import resourcemanager_v3
3 | from google.auth import load_credentials_from_dict
4 | from enum import Enum
5 | import pandas as pd
6 | from urllib.parse import urlparse
7 | import os
8 | import logging
9 | import io
10 |
11 | logger = logging.getLogger(__name__)
12 |
13 | class Gcp():
14 | def __init__(self):
15 | self.project_id = ""
16 | self.pool_name = ""
17 | self.project_number = ""
18 | self.service_account = ""
19 |
20 | def init(self, project_id, pool_name, service_account):
21 | self.project_id = project_id
22 | self.pool_name = pool_name
23 | self.service_account = service_account
24 | self.project_number = self.get_project_number(project_id)
25 |
26 | def get_project_number(self, project_id):
27 | client = resourcemanager_v3.ProjectsClient()
28 | project = client.get_project(name=f"projects/{project_id}")
29 | return project.name.split('/')[1]
30 |
31 | gcp = Gcp()
32 |
33 | class Stage(Enum):
34 | UNKNOWN = 0
35 | STAGE1 = 1
36 | STAGE2 = 2
37 |
38 | class DataRepo():
39 | def __init__(self, stage_1_bucket, stage_2_bucket):
40 | self.stage1 = RemoteStorage.init(Stage.STAGE1, stage_1_bucket)
41 | self.stage2 = RemoteStorage.init(Stage.STAGE2, stage_2_bucket)
42 |
43 | def get_data(self, filename):
44 | if self.get_stage() == 1:
45 | return self.stage1.get_data(filename)
46 | elif self.get_stage() == 2:
47 | return self.stage2.get_data(filename)
48 | else:
49 | logger.warning("Unknown stage")
50 | return filename
51 |
52 | def get_stage(self):
53 | stage = int(os.getenv('EXECUTION_STAGE', '').strip('\'"'))
54 | return stage
55 |
56 | class RemoteStorage():
57 | def __init__(self):
58 | pass
59 |
60 | def get_data(self, filename):
61 | pass
62 |
63 | @staticmethod
64 | def init(stage, url):
65 | try:
66 | o = urlparse(url, allow_fragments=False)
67 | except Exception as e:
68 | raise ValueError("Invalid URL: " + url)
69 |
70 | if o.scheme == "gs":
71 | return RemoteStorageGCS(stage, o.netloc, o.path)
72 | elif o.scheme == "s3":
73 | raise NotImplementedError("S3 storage not implemented")
74 | elif o.scheme == "https":
75 | raise NotImplementedError("HTTPS storage not implemented")
76 | else:
77 | raise ValueError("Invalid scheme: " + o.scheme)
78 |
79 |
80 | class RemoteStorageGCS(RemoteStorage):
81 | def __init__(self, stage, bucket_name, path):
82 | super().__init__()
83 | self.bucket = bucket_name
84 | self.path = path
85 |
86 | if stage == Stage.STAGE1:
87 | self.client = gcs.Client()
88 | elif stage == Stage.STAGE2:
89 | credentials_dict = {
90 | "type": "external_account",
91 | "audience": "//iam.googleapis.com/projects/%s/locations/global/workloadIdentityPools/%s/providers/attestation-verifier"%(gcp.project_number, gcp.pool_name),
92 | "subject_token_type": "urn:ietf:params:oauth:token-type:jwt",
93 | "token_url": "https://sts.googleapis.com/v1/token",
94 | "credential_source": {
95 | "file": "/run/container_launcher/attestation_verifier_claims_token"
96 | },
97 | "service_account_impersonation_url": "https://iamcredentials.googleapis.com/v1/projects/-/serviceAccounts/%s@%s.iam.gserviceaccount.com:generateAccessToken"%(gcp.service_account, gcp.project_id),
98 | }
99 | credentials, _ = load_credentials_from_dict(credentials_dict)
100 | self.client = gcs.Client(credentials=credentials)
101 |
102 | def get_data(self, filename):
103 | # join the path and filename
104 | full_path = os.path.join(self.path, filename)
105 | blob = self.client.get_bucket(self.bucket).blob(full_path)
106 | data = blob.download_as_text()
107 | return data
108 |
--------------------------------------------------------------------------------
/tutorials/tutorial.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | #### Please fill the following variables before running ####
4 | STAGE_1_BUCKET=
5 | STAGE_2_BUCKET=
6 | WORKLOAD_IDENTITY_POOL_NAME=
7 | TEE_SERVICE_ACCOUNT=
8 | ############################################################
9 |
10 | VAR_FILE="../env.bzl"
11 | if [ ! -f "$VAR_FILE" ]; then
12 | echo "Error: Variables file does not exist."
13 | exit 1
14 | fi
15 |
16 | VAR_FILE=$(realpath $VAR_FILE)
17 | source $VAR_FILE
18 |
19 | # data provisioning
20 |
21 | gcloud storage buckets create gs://$STAGE_1_BUCKET
22 | gcloud storage buckets create gs://$STAGE_2_BUCKET
23 |
24 | gcloud storage cp data/stage1/insurance.csv gs://$STAGE_1_BUCKET
25 | gcloud storage cp data/stage2/insurance.csv gs://$STAGE_2_BUCKET
26 |
27 | # data permissions: stage 1
28 |
29 | gcloud storage buckets add-iam-policy-binding gs://$STAGE_1_BUCKET \
30 | --member=serviceAccount:jupyter-$env-pod-sa@$project_id.iam.gserviceaccount.com \
31 | --role=roles/storage.objectViewer
32 |
33 | # data permissions: stage 2
34 |
35 | gcloud iam service-accounts create $TEE_SERVICE_ACCOUNT
36 |
37 | gcloud storage buckets add-iam-policy-binding gs://$STAGE_2_BUCKET \
38 | --member=serviceAccount:$TEE_SERVICE_ACCOUNT@$project_id.iam.gserviceaccount.com \
39 | --role=roles/storage.objectViewer
40 |
41 | gcloud iam workload-identity-pools create $WORKLOAD_IDENTITY_POOL_NAME \
42 | --location=global
43 |
44 | gcloud iam service-accounts add-iam-policy-binding \
45 | $TEE_SERVICE_ACCOUNT@$project_id.iam.gserviceaccount.com \
46 | --member="principalSet://iam.googleapis.com/projects/"$(gcloud projects describe $project_id \
47 | --format="value(projectNumber)")"/locations/global/workloadIdentityPools/$WORKLOAD_IDENTITY_POOL_NAME/*" \
48 | --role=roles/iam.workloadIdentityUser
49 |
50 | gcloud iam workload-identity-pools providers create-oidc attestation-verifier \
51 | --location=global \
52 | --workload-identity-pool=$WORKLOAD_IDENTITY_POOL_NAME \
53 | --issuer-uri="https://confidentialcomputing.googleapis.com/" \
54 | --allowed-audiences="https://sts.googleapis.com" \
55 | --attribute-mapping="google.subject=\"gcpcs::\"+assertion.submods.container.image_digest+\"::\"+assertion.submods.gce.project_number+\"::\"+assertion.submods.gce.instance_id" \
56 | --attribute-condition="assertion.swname == 'CONFIDENTIAL_SPACE' && 'STABLE' in assertion.submods.confidential_space.support_attributes"
57 |
--------------------------------------------------------------------------------