├── .bazelrc ├── .env.example ├── .github └── workflows │ ├── docs.yml │ ├── lint.yaml │ └── pr.yml ├── .gitignore ├── BUILD ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── MODULE.bazel ├── Manatee_technical_charter_final_9-26-2024.pdf ├── README.md ├── app ├── api │ ├── .gitignore │ ├── .hz │ ├── BUILD.bazel │ ├── biz │ │ ├── dal │ │ │ ├── BUILD.bazel │ │ │ ├── db │ │ │ │ ├── BUILD.bazel │ │ │ │ ├── init.go │ │ │ │ └── job.go │ │ │ └── init.go │ │ ├── handler │ │ │ ├── BUILD.bazel │ │ │ ├── health.go │ │ │ └── job │ │ │ │ ├── BUILD.bazel │ │ │ │ └── job_handler.go │ │ ├── model │ │ │ └── job │ │ │ │ ├── BUILD.bazel │ │ │ │ └── job.go │ │ ├── pkg │ │ │ ├── errno │ │ │ │ ├── BUILD.bazel │ │ │ │ └── errno.go │ │ │ ├── storage │ │ │ │ ├── BUILD.bazel │ │ │ │ ├── gcs.go │ │ │ │ ├── minio.go │ │ │ │ ├── mock.go │ │ │ │ └── storage.go │ │ │ └── utils │ │ │ │ ├── BUILD.bazel │ │ │ │ └── resp.go │ │ ├── router │ │ │ ├── BUILD.bazel │ │ │ ├── job │ │ │ │ ├── BUILD.bazel │ │ │ │ ├── job.go │ │ │ │ └── middleware.go │ │ │ └── register.go │ │ └── service │ │ │ ├── BUILD.bazel │ │ │ ├── job_service.go │ │ │ └── job_service_test.go │ ├── idl │ │ └── job.thrift │ ├── main.go │ ├── router.go │ ├── router_gen.go │ └── script │ │ └── bootstrap.sh ├── executor │ ├── .gitignore │ ├── BUILD.bazel │ └── attestation │ │ ├── BUILD.bazel │ │ └── main.go ├── jupyterlab_manatee │ ├── .dockerignore │ ├── .gitignore │ ├── .yarnrc.yml │ ├── 20custom-hook.sh │ ├── BUILD.bazel │ ├── LICENSE │ ├── README.md │ ├── RELEASE.md │ ├── babel.config.js │ ├── install.json │ ├── jest.config.js │ ├── jupyter-config │ │ └── jupyter_server_config.d │ │ │ └── jupyterlab_manatee.json │ ├── jupyterlab_manatee │ │ ├── __init__.py │ │ └── handlers.py │ ├── noble.lock.json │ ├── noble.yaml │ ├── package.json │ ├── pyproject.toml │ ├── requirements.in │ ├── requirements.txt │ ├── requirements_linux.txt │ ├── setup.py │ ├── src │ │ ├── __tests__ │ │ │ └── jupyterlab_manatee.spec.ts │ │ ├── index.ts │ │ ├── jobs.tsx │ │ ├── sidebar.ts │ │ └── sources.ts │ ├── style │ │ ├── base.css │ │ ├── index.css │ │ └── index.js │ ├── tsconfig.json │ ├── tsconfig.test.json │ ├── ui-tests │ │ ├── README.md │ │ ├── jupyter_server_test_config.py │ │ ├── package.json │ │ ├── playwright.config.js │ │ ├── tests │ │ │ └── jupyterlab_manatee.spec.ts │ │ └── yarn.lock │ └── yarn.lock └── reconciler │ ├── BUILD.bazel │ ├── imagebuilder │ ├── BUILD.bazel │ ├── kaniko.go │ └── kaniko_test.go │ ├── main.go │ ├── reconciler.go │ ├── reconciler_test.go │ ├── registry │ ├── BUILD.bazel │ └── registry.go │ └── tee_backend │ ├── BUILD.bazel │ ├── confidential_space.go │ └── mock_teebackend.go ├── deployment ├── deploy.sh ├── jupyterhub │ ├── config.yaml │ └── deploy.sh ├── manatee │ ├── .helmignore │ ├── Chart.yaml │ ├── config.yaml │ ├── deploy.sh │ ├── templates │ │ ├── NOTES.txt │ │ ├── _helpers.tpl │ │ ├── configmap.yaml │ │ ├── deployment.yaml │ │ ├── ingress.yaml │ │ ├── reconciler.yaml │ │ ├── service.yaml │ │ └── tests │ │ │ └── test-connection.yaml │ └── values.yaml └── minikube │ ├── deploy.sh │ ├── minio-dev.yaml │ ├── mysql-deployment.yaml │ └── mysql-service.yaml ├── docs ├── assets │ └── img │ │ ├── arch.png │ │ ├── jobs.png │ │ ├── logo.png │ │ ├── manatee-architecture.png │ │ ├── manatee-white.png │ │ ├── manatee.png │ │ ├── plugin.png │ │ ├── stage-1.png │ │ ├── two-stage.png │ │ └── unzip.png ├── blog │ ├── index.md │ └── posts │ │ └── 2025-01-community-release.md ├── developer │ └── architecture.md ├── getting-started │ ├── building.md │ ├── deployment.md │ ├── llm-model-evaluation.md │ ├── minikube.md │ └── tutorials.md ├── index.md ├── project-status.md └── stylesheets │ └── extra.css ├── go.mod ├── go.sum ├── mkdocs.yml ├── resources ├── .gitignore ├── deployment │ ├── apply.sh │ ├── backend.tf │ ├── cluster_rolebinding.tf │ ├── db_account.tf │ ├── namespace.tf │ ├── providers.tf │ ├── repositories.tf │ ├── role.tf │ ├── secret.tf │ ├── service_accounts.tf │ └── variables.tf ├── global │ ├── apply.sh │ ├── backend.tf │ ├── buckets.tf │ ├── cluster.tf │ ├── database.tf │ ├── iam.tf │ ├── network.tf │ ├── providers.tf │ ├── repositories.tf │ ├── service_accounts.tf │ └── variables.tf └── minikube │ ├── apply.sh │ ├── namespace.tf │ ├── providers.tf │ ├── role.tf │ ├── secret.tf │ ├── service_accounts.tf │ └── variables.tf └── tutorials ├── code ├── insurance.ipynb ├── regression.ipynb └── sdk │ ├── .gitignore │ ├── __init__.py │ ├── __version__.py │ └── data.py ├── data ├── stage1 │ └── insurance.csv └── stage2 │ └── insurance.csv └── tutorial.sh /.bazelrc: -------------------------------------------------------------------------------- 1 | common --enable_bzlmod 2 | build --host_macos_minimum_os=13.3 3 | build --macos_minimum_os=13.3 4 | build --apple_platform_type=macos 5 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | env="" 2 | project_id="" 3 | region="" 4 | zone="" 5 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: docs 2 | on: 3 | push: 4 | branches: [main] 5 | permissions: 6 | contents: write 7 | jobs: 8 | deploy: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v4 12 | - name: Configure Git Credentials 13 | run: | 14 | git config user.name github-actions[bot] 15 | git config user.email 41898282+github-actions[bot]@users.noreply.github.com 16 | - uses: actions/setup-python@v5 17 | with: 18 | python-version: 3.x 19 | - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV 20 | - uses: actions/cache@v4 21 | with: 22 | key: mkdocs-material-${{ env.cache_id }} 23 | path: .cache 24 | restore-keys: | 25 | mkdocs-material- 26 | - run: pip install mkdocs-material 27 | - run: mkdocs gh-deploy --force 28 | -------------------------------------------------------------------------------- /.github/workflows/lint.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | pull_request: 3 | branches: [main] 4 | jobs: 5 | format: 6 | name: Enforce Code Format 7 | runs-on: ubuntu-latest 8 | steps: 9 | - name: Check out code 10 | uses: actions/checkout@v4 11 | - name: Set up Bazel 12 | uses: bazel-contrib/setup-bazel@0.9.0 13 | with: 14 | bazelisk-cache: true 15 | disk-cache: ${{ github.workflow }} 16 | repository-cache: true 17 | - name: Check code format 18 | run: | 19 | bazelisk run @go_sdk//:bin/gofmt -- -l . > gofmt_output.txt || true 20 | if [ -s gofmt_output.txt ]; then 21 | echo "Following files are not properly formatted:" 22 | cat gofmt_output.txt 23 | echo "Please run: bazelisk run @go_sdk//:bin/gofmt -- -w ." 24 | exit 1 25 | else 26 | echo "All files are properly formatted!" 27 | fi 28 | 29 | -------------------------------------------------------------------------------- /.github/workflows/pr.yml: -------------------------------------------------------------------------------- 1 | on: 2 | pull_request: 3 | branches: [main] 4 | jobs: 5 | build: 6 | name: Build & Test Everything 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v4 10 | - uses: bazel-contrib/setup-bazel@0.9.0 11 | with: 12 | # Avoid downloading Bazel every time. 13 | bazelisk-cache: true 14 | # Store build cache per workflow. 15 | disk-cache: ${{ github.workflow }} 16 | # Share repository cache between workflows. 17 | repository-cache: true 18 | - run: cp .env.example env.bzl 19 | - run: bazel build //... 20 | - run: bazel test //... 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .terraform.lock.hcl 2 | site/* 3 | *.bazel.lock 4 | .gitconfig 5 | bazel-* 6 | env.bzl 7 | .DS_Store 8 | 9 | -------------------------------------------------------------------------------- /BUILD: -------------------------------------------------------------------------------- 1 | load("@gazelle//:def.bzl", "gazelle") 2 | load("@rules_multirun//:defs.bzl", "multirun") 3 | load("@rules_oci//oci:defs.bzl", "oci_push") 4 | load("//:env.bzl", "env", "project_id", "region", "zone") 5 | 6 | # gazelle:prefix github.com/manatee-project/manatee 7 | gazelle(name = "gazelle") 8 | 9 | REPOS = { 10 | "api": "us-docker.pkg.dev/{}/dcr-{}-$$namespace-images/manatee-api".format(project_id, env), 11 | "reconciler": "us-docker.pkg.dev/{}/dcr-{}-$$namespace-images/manatee-reconciler".format(project_id, env), 12 | "jupyterlab_manatee": "us-docker.pkg.dev/{}/dcr-{}-$$namespace-images/manatee-jupyterlab-singleuser".format(project_id, env), 13 | "executor": "us-docker.pkg.dev/{}/dcr-{}-user-images/manatee-executor-base".format(project_id, env), 14 | } 15 | 16 | [ 17 | genrule( 18 | name = "{}_repo".format(k), 19 | outs = ["{}_repo.txt".format(k)], 20 | cmd = "echo '{}' | envsubst > $@".format(v), 21 | ) 22 | for (k, v) in REPOS.items() 23 | ] 24 | 25 | [ 26 | oci_push( 27 | name = "push_{}_image".format(k), 28 | image = "//app/{}:image".format(k), 29 | remote_tags = ["latest"], 30 | repository_file = ":{}_repo".format(k), 31 | ) 32 | for k in REPOS.keys() 33 | ] 34 | 35 | multirun( 36 | name = "push_all_images", 37 | commands = [ 38 | "push_{}_image".format(k) 39 | for k in REPOS.keys() 40 | ], 41 | jobs = 0, 42 | ) 43 | 44 | multirun( 45 | name = "load_all_images", 46 | commands = [ 47 | "//app/{}:load_image".format(k) 48 | for k in REPOS.keys() 49 | ], 50 | ) 51 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to ManaTEE 2 | 3 | We happily welcome contributions to the ManaTEE. We use [GitHub Issues](https://github.com/manatee-project/manatee/issues) to track community reported issues and [GitHub Pull Requests](https://github.com/manatee-project/manatee/pulls) for accepting changes. 4 | 5 | 6 | Read our [Code of Coduct](./CODE_OF_CONDUCT.md) to keep our community approachable and respectable. 7 | 8 | This guide details how to use issues and pull requests to improve the project. 9 | 10 | ## General Guidelines 11 | 12 | ### Pull Requests 13 | 14 | Make sure to keep Pull Requests small and functional to make them easier to review, understand, and look up in commit history. 15 | 16 | Adding the appropriate documentation, unit tests and e2e tests as part of a feature is the responsibility of the feature owner, whether it is done in the same Pull Request or not. 17 | 18 | Pull Requests should follow the "Title: Description" format, where the Description describes what part of the code is being modified. 19 | 20 | ### Design Docs 21 | 22 | A contributor proposes a design with a PR on the repository to allow for revisions and discussions. If a design needs to be discussed before formulating a document for it, make use of Google doc and [GitHub issue](https://github.com/manatee-project/manatee/issues) to involve the community on the discussion. 23 | 24 | ### GitHub Issues 25 | 26 | GitHub Issues are used to file bugs, work items, and feature requests with actionable items/issues (Please refer to the "Reporting Bugs/Feature Requests" section below for more information). 27 | 28 | ### Reporting Bugs/Feature Requests 29 | 30 | We welcome you to use the GitHub issue tracker to report bugs or suggest features that have actionable items/issues (as opposed to introducing a feature request on GitHub Discussions). 31 | 32 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 33 | 34 | - A reproducible test case or series of steps 35 | - The version of the code being used 36 | - Any modifications you've made relevant to the bug 37 | - Anything unusual about your environment or deployment 38 | 39 | ## Contributing via Pull Requests 40 | 41 | ### Find interesting issue 42 | 43 | If you spot a problem, [search if an issue already exists](https://github.com/manatee-project/manatee/issues). If a related issue doesn't exist, you can open a new issue by clicking the [New issue](https://github.com/manatee-project/manatee/issues/new). 44 | 45 | 46 | ### Open a Pull request. 47 | 48 | When you're done making the changes, open a [Pull Requests](https://github.com/manatee-project/manatee/pulls) and fill PR template so we can better review your PR. The [template](https://github.com/manatee-project/manatee/issues/new) helps reviewers understand your changes and the purpose of your pull request. 49 | 50 | Don't forget to link PR to issue if you are solving one. 51 | 52 | If you run into any merge issues, checkout this [github tutorial](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/addressing-merge-conflicts) to help you resolve merge conflicts and other issues. 53 | 54 | 55 | ## Finding contributions to work on 56 | 57 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, uses the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' and 'good first issue' issues are a great place to start. 58 | -------------------------------------------------------------------------------- /MODULE.bazel: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Bazel now uses Bzlmod by default to manage external dependencies. 3 | # Please consider migrating your external dependencies from WORKSPACE to MODULE.bazel. 4 | # 5 | # For more details, please check https://github.com/bazelbuild/bazel/issues/18958 6 | ############################################################################### 7 | 8 | # rules_proto 9 | bazel_dep(name = "rules_proto", version = "7.1.0") 10 | 11 | # rules_python 12 | bazel_dep(name = "rules_python", version = "0.40.0") 13 | 14 | pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip") 15 | pip.parse( 16 | hub_name = "pydeps", 17 | python_version = "3.11", 18 | requirements_linux = "//app/jupyterlab_manatee:requirements_linux.txt", 19 | requirements_lock = "//app/jupyterlab_manatee:requirements.txt", 20 | ) 21 | use_repo(pip, "pydeps") 22 | 23 | # rules nodejs 24 | bazel_dep(name = "aspect_rules_js", version = "2.0.2") 25 | bazel_dep(name = "rules_nodejs", version = "6.2.0") 26 | 27 | node = use_extension("@rules_nodejs//nodejs:extensions.bzl", "node", dev_dependency = True) 28 | node.toolchain( 29 | name = "nodejs", 30 | node_version = "18.17.1", 31 | ) 32 | use_repo(node, "nodejs", "nodejs_toolchains") 33 | 34 | # rules_oci 35 | bazel_dep(name = "rules_oci", version = "2.0.0") 36 | 37 | oci = use_extension("@rules_oci//oci:extensions.bzl", "oci") 38 | 39 | # pull base image for scipy-notebook 40 | oci.pull( 41 | name = "scipy-notebook", 42 | digest = "sha256:dc0f8efb6f288d5fc67a94715963282f8066cb3b93324131585edaa0a7a46780", 43 | image = "quay.io/jupyter/scipy-notebook", 44 | platforms = [ 45 | "linux/amd64", 46 | ], 47 | ) 48 | oci.pull( 49 | name = "distroless_base", 50 | digest = "sha256:ccaef5ee2f1850270d453fdf700a5392534f8d1a8ca2acda391fbb6a06b81c86", 51 | image = "gcr.io/distroless/base", 52 | platforms = [ 53 | "linux/amd64", 54 | "linux/arm64", 55 | ], 56 | ) 57 | use_repo( 58 | oci, 59 | "distroless_base", 60 | "distroless_base_linux_amd64", 61 | "distroless_base_linux_arm64", 62 | "scipy-notebook", 63 | "scipy-notebook_linux_amd64", 64 | ) 65 | 66 | # A multi-arch base image with variants, note that it won't work with just "linux/arm64" 67 | 68 | # rules_pkg 69 | bazel_dep(name = "rules_pkg", version = "1.0.1") 70 | bazel_dep(name = "rules_go", version = "0.50.1") 71 | bazel_dep(name = "gazelle", version = "0.43.0") 72 | 73 | go_sdk = use_extension("@rules_go//go:extensions.bzl", "go_sdk") 74 | go_sdk.download( 75 | name = "go_sdk", 76 | version = "1.23.8", 77 | ) 78 | use_repo(go_sdk, "go_sdk") 79 | 80 | register_toolchains("@go_sdk//:all") 81 | 82 | # gazelle:proto disable_global 83 | go_deps = use_extension("@gazelle//:extensions.bzl", "go_deps") 84 | go_deps.from_file(go_mod = "//:go.mod") 85 | go_deps.gazelle_default_attributes( 86 | build_extra_args = [ 87 | "-go_naming_convention_external=go_default_library", 88 | ], 89 | build_file_generation = "on", 90 | directives = [ 91 | "gazelle:proto disable", 92 | ], 93 | ) 94 | go_deps.gazelle_override( 95 | build_file_generation = "clean", 96 | directives = [ 97 | "gazelle:build_file_name BUILD.bazel", 98 | ], 99 | path = "github.com/envoyproxy/protoc-gen-validate", 100 | ) 101 | use_repo( 102 | go_deps, 103 | "com_github_apache_thrift", 104 | "com_github_cloudwego_hertz", 105 | "com_github_gin_gonic_gin", 106 | "com_github_google_uuid", 107 | "com_github_minio_minio_go_v7", 108 | "com_github_pkg_errors", 109 | "com_google_cloud_go_compute", 110 | "com_google_cloud_go_iam", 111 | "com_google_cloud_go_storage", 112 | "io_gorm_driver_mysql", 113 | "io_gorm_gorm", 114 | "io_k8s_api", 115 | "io_k8s_apimachinery", 116 | "io_k8s_client_go", 117 | "org_golang_google_protobuf", 118 | ) 119 | 120 | bazel_dep(name = "rules_multirun", version = "0.10.0") 121 | bazel_dep(name = "rules_distroless", version = "0.5.1") 122 | 123 | apt = use_extension( 124 | "@rules_distroless//apt:extensions.bzl", 125 | "apt", 126 | dev_dependency = True, 127 | ) 128 | apt.install( 129 | name = "noble", 130 | lock = "//app/jupyterlab_manatee:noble.lock.json", 131 | manifest = "//app/jupyterlab_manatee:noble.yaml", 132 | ) 133 | use_repo(apt, "noble") 134 | -------------------------------------------------------------------------------- /Manatee_technical_charter_final_9-26-2024.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manatee-project/manatee/07d4c7d5f6fb4f1d5e91252d30cc1f133467abdc/Manatee_technical_charter_final_9-26-2024.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # ManaTEE Project 4 | 5 | ManaTEE is an open-source project for easily building and deploying data collaboration framework to the cloud using trusted execution environments (TEEs). 6 | It allows users to easily collaborate on private datasets without leaking privacy of individual data. 7 | ManaTEE achieves this by combining different privacy-enhancing technologies (PETs) in different stages. 8 | 9 | # What does it offer? 10 | 11 | ManaTEE allows organizations to quickly customize and deploy data collaboration framework in the cloud. 12 | The organizations can provide an programming environment to the external data scientists to conduct research, while protecting the data privacy with a custom policy. 13 | 14 | > Note: ManaTEE is under active development, and it is not production-ready. We are looking forward to your feedback and contributions. 15 | 16 | # Quick Start 17 | 18 | Install Bazel with [Bazelisk](https://github.com/bazelbuild/bazelisk): 19 | ```sh 20 | brew install bazelisk # on MacOS 21 | choco install bazelisk # on Windows 22 | ``` 23 | On Ubuntu, download the latest Bazelisk binary via [Releases](https://github.com/bazelbuild/bazelisk/releases) 24 | 25 | Build all images 26 | ``` 27 | bazelisk build //... 28 | ``` 29 | 30 | Run all tests 31 | ``` 32 | bazelisk test //... 33 | ``` 34 | 35 | See [documents](https://manatee-project.github.io/manatee) for more details including cloud deployment. 36 | # License 37 | 38 | ManaTEE is licensed under the Apache License 2.0. 39 | See [LICENSE](LICENSE) for details. -------------------------------------------------------------------------------- /app/api/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.a 3 | *.so 4 | _obj 5 | _test 6 | *.[568vq] 7 | [568vq].out 8 | *.cgo1.go 9 | *.cgo2.c 10 | _cgo_defun.c 11 | _cgo_gotypes.go 12 | _cgo_export.* 13 | _testmain.go 14 | *.exe 15 | *.exe~ 16 | *.test 17 | *.prof 18 | *.rar 19 | *.zip 20 | *.gz 21 | *.psd 22 | *.bmd 23 | *.cfg 24 | *.pptx 25 | *.log 26 | *nohup.out 27 | *settings.pyc 28 | *.sublime-project 29 | *.sublime-workspace 30 | !.gitkeep 31 | .DS_Store 32 | /.idea 33 | /.vscode 34 | /output 35 | *.local.yml 36 | dumped_hertz_remote_config.json 37 | conf 38 | github.com 39 | api 40 | -------------------------------------------------------------------------------- /app/api/.hz: -------------------------------------------------------------------------------- 1 | // Code generated by hz. DO NOT EDIT. 2 | 3 | hz version: v0.9.1 4 | handlerDir: "" 5 | modelDir: biz/model 6 | routerDir: "" 7 | -------------------------------------------------------------------------------- /app/api/BUILD.bazel: -------------------------------------------------------------------------------- 1 | load("@rules_go//go:def.bzl", "go_binary", "go_library") 2 | load("@rules_oci//oci:defs.bzl", "oci_image", "oci_load") 3 | load("@rules_pkg//pkg:tar.bzl", "pkg_tar") 4 | 5 | go_binary( 6 | name = "api", 7 | embed = [":api_lib"], 8 | goarch = "amd64", 9 | goos = "linux", 10 | visibility = ["//visibility:public"], 11 | ) 12 | 13 | go_library( 14 | name = "api_lib", 15 | srcs = [ 16 | "main.go", 17 | "router.go", 18 | "router_gen.go", 19 | ], 20 | importpath = "github.com/manatee-project/manatee/app/api", 21 | visibility = ["//visibility:private"], 22 | deps = [ 23 | "//app/api/biz/dal", 24 | "//app/api/biz/handler", 25 | "//app/api/biz/router", 26 | "@com_github_cloudwego_hertz//pkg/app/server", 27 | ], 28 | ) 29 | 30 | pkg_tar( 31 | name = "tar", 32 | srcs = [":api"], 33 | ) 34 | 35 | oci_image( 36 | name = "image", 37 | base = "@distroless_base_linux_amd64", 38 | entrypoint = ["/api"], 39 | tars = [ 40 | ":tar", 41 | ], 42 | visibility = ["//visibility:public"], 43 | ) 44 | 45 | oci_load( 46 | name = "load_image", 47 | image = ":image", 48 | repo_tags = ["api:latest"], 49 | visibility = ["//visibility:public"], 50 | ) 51 | -------------------------------------------------------------------------------- /app/api/biz/dal/BUILD.bazel: -------------------------------------------------------------------------------- 1 | load("@rules_go//go:def.bzl", "go_library") 2 | 3 | go_library( 4 | name = "dal", 5 | srcs = ["init.go"], 6 | importpath = "github.com/manatee-project/manatee/app/api/biz/dal", 7 | visibility = ["//visibility:public"], 8 | deps = ["//app/api/biz/dal/db"], 9 | ) 10 | -------------------------------------------------------------------------------- /app/api/biz/dal/db/BUILD.bazel: -------------------------------------------------------------------------------- 1 | load("@rules_go//go:def.bzl", "go_library") 2 | 3 | go_library( 4 | name = "db", 5 | srcs = [ 6 | "init.go", 7 | "job.go", 8 | ], 9 | importpath = "github.com/manatee-project/manatee/app/api/biz/dal/db", 10 | visibility = ["//visibility:public"], 11 | deps = [ 12 | "@com_github_pkg_errors//:errors", 13 | "@io_gorm_driver_mysql//:mysql", 14 | "@io_gorm_gorm//:gorm", 15 | "@io_gorm_gorm//logger", 16 | ], 17 | ) 18 | -------------------------------------------------------------------------------- /app/api/biz/dal/db/init.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 TikTok Pte. Ltd. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package db 16 | 17 | import ( 18 | "fmt" 19 | "os" 20 | 21 | "gorm.io/driver/mysql" 22 | "gorm.io/gorm" 23 | "gorm.io/gorm/logger" 24 | ) 25 | 26 | var DB *gorm.DB 27 | 28 | func Init() { 29 | var err error 30 | mysqlDsn := fmt.Sprintf("%s:%s@tcp(%s:%s)/%s?charset=utf8&parseTime=True&loc=Local", os.Getenv("MYSQL_USERNAME"), os.Getenv("MYSQL_PASSWORD"), os.Getenv("MYSQL_HOST"), os.Getenv("MYSQL_PORT"), os.Getenv("MYSQL_DATABASE")) 31 | DB, err = gorm.Open(mysql.Open(mysqlDsn), &gorm.Config{ 32 | SkipDefaultTransaction: true, 33 | PrepareStmt: true, 34 | Logger: logger.Default.LogMode(logger.Info), 35 | }) 36 | if err != nil { 37 | panic(err) 38 | } 39 | 40 | // Auto database schema migration 41 | // This has caveat: see https://gorm.io/docs/migration.html 42 | err = DB.AutoMigrate(&Job{}) 43 | if err != nil { 44 | panic(err) 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /app/api/biz/dal/init.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 TikTok Pte. Ltd. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package dal 16 | 17 | import "github.com/manatee-project/manatee/app/api/biz/dal/db" 18 | 19 | func Init() { 20 | db.Init() 21 | } 22 | -------------------------------------------------------------------------------- /app/api/biz/handler/BUILD.bazel: -------------------------------------------------------------------------------- 1 | load("@rules_go//go:def.bzl", "go_library") 2 | 3 | go_library( 4 | name = "handler", 5 | srcs = ["health.go"], 6 | importpath = "github.com/manatee-project/manatee/app/api/biz/handler", 7 | visibility = ["//visibility:public"], 8 | deps = [ 9 | "@com_github_cloudwego_hertz//pkg/app", 10 | "@com_github_cloudwego_hertz//pkg/common/utils", 11 | "@com_github_cloudwego_hertz//pkg/protocol/consts", 12 | ], 13 | ) 14 | -------------------------------------------------------------------------------- /app/api/biz/handler/health.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 TikTok Pte. Ltd. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Code generated by hertz generator. 16 | 17 | package handler 18 | 19 | import ( 20 | "context" 21 | 22 | "github.com/cloudwego/hertz/pkg/app" 23 | "github.com/cloudwego/hertz/pkg/common/utils" 24 | "github.com/cloudwego/hertz/pkg/protocol/consts" 25 | ) 26 | 27 | // Health . 28 | func Health(ctx context.Context, c *app.RequestContext) { 29 | c.JSON(consts.StatusOK, utils.H{ 30 | "message": "pong", 31 | }) 32 | } 33 | -------------------------------------------------------------------------------- /app/api/biz/handler/job/BUILD.bazel: -------------------------------------------------------------------------------- 1 | load("@rules_go//go:def.bzl", "go_library") 2 | 3 | go_library( 4 | name = "job", 5 | srcs = ["job_handler.go"], 6 | importpath = "github.com/manatee-project/manatee/app/api/biz/handler/job", 7 | visibility = ["//visibility:public"], 8 | deps = [ 9 | "//app/api/biz/model/job", 10 | "//app/api/biz/pkg/errno", 11 | "//app/api/biz/pkg/utils", 12 | "//app/api/biz/service", 13 | "@com_github_cloudwego_hertz//pkg/app", 14 | "@com_github_cloudwego_hertz//pkg/common/hlog", 15 | "@com_github_cloudwego_hertz//pkg/protocol/consts", 16 | ], 17 | ) 18 | -------------------------------------------------------------------------------- /app/api/biz/model/job/BUILD.bazel: -------------------------------------------------------------------------------- 1 | load("@rules_go//go:def.bzl", "go_library") 2 | 3 | go_library( 4 | name = "job", 5 | srcs = ["job.go"], 6 | importpath = "github.com/manatee-project/manatee/app/api/biz/model/job", 7 | visibility = ["//visibility:public"], 8 | deps = ["@com_github_apache_thrift//lib/go/thrift"], 9 | ) 10 | -------------------------------------------------------------------------------- /app/api/biz/pkg/errno/BUILD.bazel: -------------------------------------------------------------------------------- 1 | load("@rules_go//go:def.bzl", "go_library") 2 | 3 | go_library( 4 | name = "errno", 5 | srcs = ["errno.go"], 6 | importpath = "github.com/manatee-project/manatee/app/api/biz/pkg/errno", 7 | visibility = ["//visibility:public"], 8 | ) 9 | -------------------------------------------------------------------------------- /app/api/biz/pkg/errno/errno.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 TikTok Pte. Ltd. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package errno 16 | 17 | import ( 18 | "fmt" 19 | ) 20 | 21 | const ( 22 | SuccessCode = 0 23 | ServiceErrCode = iota + 10000 24 | ReachJobLimitErrCode 25 | ) 26 | 27 | const ( 28 | SuccessMsg = "Success" 29 | ServiceErrMsg = "Service internal error" 30 | ReachJobLimitErrMsg = "The number of in progress jobs has reached the limit" 31 | ) 32 | 33 | type ErrNo struct { 34 | ErrCode int32 35 | ErrMsg string 36 | } 37 | 38 | func (e ErrNo) Error() string { 39 | return fmt.Sprintf("err_code=%d, err_msg=%s", e.ErrCode, e.ErrMsg) 40 | } 41 | 42 | func NewErrNo(code int32, msg string) ErrNo { 43 | return ErrNo{code, msg} 44 | } 45 | 46 | func (e ErrNo) WithMessage(msg string) ErrNo { 47 | e.ErrMsg = msg 48 | return e 49 | } 50 | 51 | var ( 52 | Success = NewErrNo(SuccessCode, SuccessMsg) 53 | ServiceErr = NewErrNo(ServiceErrCode, ServiceErrMsg) 54 | ReachJobLimitErr = NewErrNo(ReachJobLimitErrCode, ReachJobLimitErrMsg) 55 | ) 56 | -------------------------------------------------------------------------------- /app/api/biz/pkg/storage/BUILD.bazel: -------------------------------------------------------------------------------- 1 | load("@rules_go//go:def.bzl", "go_library") 2 | 3 | go_library( 4 | name = "storage", 5 | srcs = [ 6 | "gcs.go", 7 | "minio.go", 8 | "mock.go", 9 | "storage.go", 10 | ], 11 | importpath = "github.com/manatee-project/manatee/app/api/biz/pkg/storage", 12 | visibility = ["//visibility:public"], 13 | deps = [ 14 | "@com_github_minio_minio_go_v7//:minio-go", 15 | "@com_github_minio_minio_go_v7//pkg/credentials", 16 | "@com_github_pkg_errors//:errors", 17 | "@com_google_cloud_go_iam//credentials/apiv1", 18 | "@com_google_cloud_go_iam//credentials/apiv1/credentialspb", 19 | "@com_google_cloud_go_storage//:storage", 20 | ], 21 | ) 22 | -------------------------------------------------------------------------------- /app/api/biz/pkg/storage/gcs.go: -------------------------------------------------------------------------------- 1 | package storage 2 | 3 | import ( 4 | "compress/gzip" 5 | "context" 6 | "fmt" 7 | "io" 8 | "net/http" 9 | "time" 10 | 11 | "cloud.google.com/go/storage" 12 | "github.com/pkg/errors" 13 | 14 | credentials "cloud.google.com/go/iam/credentials/apiv1" 15 | credentialspb "cloud.google.com/go/iam/credentials/apiv1/credentialspb" 16 | ) 17 | 18 | type GoogleCloudStorage struct { 19 | ctx context.Context 20 | bucket string 21 | client *storage.Client 22 | iamClient *credentials.IamCredentialsClient 23 | googleAccessId string 24 | } 25 | 26 | func NewGoogleCloudStorage(ctx context.Context, bucket string) (*GoogleCloudStorage, error) { 27 | client, err := storage.NewClient(ctx) 28 | if err != nil { 29 | return nil, errors.Wrap(err, "failed to create storage client") 30 | } 31 | serviceAccount, err := getGoogleServiceAccount() 32 | if err != nil { 33 | return nil, errors.Wrap(err, "failed to get google service account") 34 | } 35 | iamClient, err := credentials.NewIamCredentialsClient(ctx) 36 | if err != nil { 37 | return nil, errors.Wrap(err, "failed to create iam client") 38 | } 39 | return &GoogleCloudStorage{ 40 | ctx: ctx, 41 | bucket: bucket, 42 | iamClient: iamClient, 43 | client: client, 44 | googleAccessId: serviceAccount, 45 | }, nil 46 | } 47 | 48 | func (g *GoogleCloudStorage) Close() { 49 | g.client.Close() 50 | } 51 | 52 | func (g *GoogleCloudStorage) BucketPath() string { 53 | return fmt.Sprintf("gs://%s", g.bucket) 54 | } 55 | 56 | func (g *GoogleCloudStorage) UploadFile(reader io.Reader, remotePath string, compress bool) error { 57 | writer := g.client.Bucket(g.bucket).Object(remotePath).NewWriter(g.ctx) 58 | defer writer.Close() 59 | if compress { 60 | gzipWriter := gzip.NewWriter(writer) 61 | if _, err := io.Copy(gzipWriter, reader); err != nil { 62 | return errors.Wrap(err, "failed to copy content to gzip writer") 63 | } 64 | defer gzipWriter.Close() 65 | } else { 66 | if _, err := io.Copy(writer, reader); err != nil { 67 | return errors.Wrap(err, "failed to copy content to writer") 68 | } 69 | } 70 | return nil 71 | } 72 | 73 | func (g *GoogleCloudStorage) IssueSignedUrl(remotePath string, method string, expires time.Duration) (string, error) { 74 | if method != "GET" && method != "PUT" { 75 | return "", errors.Wrap(fmt.Errorf("unkown method for signed url, supported are GET and PUT"), "") 76 | } 77 | 78 | opts := &storage.SignedURLOptions{ 79 | Scheme: storage.SigningSchemeV4, 80 | Method: method, 81 | Expires: time.Now().Add(expires), 82 | GoogleAccessID: g.googleAccessId, 83 | SignBytes: func(b []byte) ([]byte, error) { 84 | req := &credentialspb.SignBlobRequest{ 85 | Payload: b, 86 | Name: g.googleAccessId, 87 | } 88 | resp, err := g.iamClient.SignBlob(g.ctx, req) 89 | if err != nil { 90 | return nil, errors.Wrap(err, "failed to sign blocb") 91 | } 92 | return resp.SignedBlob, err 93 | }, 94 | } 95 | url, err := storage.SignedURL(g.bucket, remotePath, opts) 96 | if err != nil { 97 | return "", errors.Wrap(err, "failed to sign url") 98 | } 99 | return url, nil 100 | } 101 | 102 | func getGoogleServiceAccount() (string, error) { 103 | url := "http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/email" 104 | req, err := http.NewRequest("GET", url, nil) 105 | if err != nil { 106 | return "", errors.Wrap(err, "failed to create http client") 107 | } 108 | req.Header.Add("Metadata-Flavor", "Google") 109 | client := &http.Client{} 110 | resp, err := client.Do(req) 111 | if err != nil { 112 | return "", errors.Wrap(err, "failed to request google meta service account") 113 | } 114 | defer resp.Body.Close() 115 | account, err := io.ReadAll(resp.Body) 116 | if err != nil { 117 | return "", errors.Wrap(err, "failed to request google meta service account") 118 | } 119 | return string(account), nil 120 | } 121 | -------------------------------------------------------------------------------- /app/api/biz/pkg/storage/minio.go: -------------------------------------------------------------------------------- 1 | package storage 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "io" 7 | "net/url" 8 | "os" 9 | "time" 10 | 11 | "github.com/minio/minio-go/v7" 12 | "github.com/minio/minio-go/v7/pkg/credentials" 13 | "github.com/pkg/errors" 14 | ) 15 | 16 | type MinioStorage struct { 17 | ctx context.Context 18 | bucket string 19 | minioClient minio.Client 20 | } 21 | 22 | func NewMinioStorage(ctx context.Context, bucket string) (*MinioStorage, error) { 23 | accessKeyID := os.Getenv("AWS_ACCESS_KEY_ID") 24 | if accessKeyID == "" { 25 | return nil, fmt.Errorf("AWS_ACCESS_KEY_ID environment variable is not present") 26 | } 27 | secretAccessKey := os.Getenv("AWS_SECRET_ACCESS_KEY") 28 | if secretAccessKey == "" { 29 | return nil, fmt.Errorf("AWS_SECRET_ACCESS_KEY environment variable is not present") 30 | } 31 | endpoint := os.Getenv("S3_ENDPOINT") 32 | if endpoint == "" { 33 | return nil, fmt.Errorf("S3_ENDPOINT environment variable is not present") 34 | } 35 | minioClient, err := minio.New(endpoint, &minio.Options{ 36 | Creds: credentials.NewStaticV4(accessKeyID, secretAccessKey, ""), 37 | Secure: false, 38 | }) 39 | if err != nil { 40 | return nil, err 41 | } 42 | 43 | exist, err := minioClient.BucketExists(ctx, bucket) 44 | if err != nil { 45 | return nil, err 46 | } 47 | 48 | if !exist { 49 | err = minioClient.MakeBucket(ctx, bucket, minio.MakeBucketOptions{Region: "us"}) 50 | if err != nil { 51 | return nil, err 52 | } 53 | } 54 | 55 | return &MinioStorage{ 56 | ctx: ctx, 57 | bucket: bucket, 58 | minioClient: *minioClient, 59 | }, nil 60 | } 61 | 62 | func (m *MinioStorage) Close() { 63 | } 64 | 65 | func (m *MinioStorage) BucketPath() string { 66 | return fmt.Sprintf("s3://%s", m.bucket) 67 | } 68 | 69 | // compress parameter hasn't been implemented for minio client 70 | func (m *MinioStorage) UploadFile(reader io.Reader, remotePath string, compress bool) error { 71 | _, err := m.minioClient.PutObject(m.ctx, m.bucket, remotePath, reader, -1, minio.PutObjectOptions{ContentType: "application/octet-stream"}) 72 | if err != nil { 73 | return errors.Wrap(err, "failed to upload to minio") 74 | } 75 | return nil 76 | } 77 | 78 | func (m *MinioStorage) IssueSignedUrl(remotePath string, method string, expires time.Duration) (string, error) { 79 | reqParams := make(url.Values) 80 | var url *url.URL 81 | var err error 82 | if method == "GET" { 83 | url, err = m.minioClient.PresignedGetObject(m.ctx, m.bucket, remotePath, expires, reqParams) 84 | if err != nil { 85 | return "", err 86 | } 87 | } else if method == "PUT" { 88 | url, err = m.minioClient.PresignedPutObject(m.ctx, m.bucket, remotePath, expires) 89 | if err != nil { 90 | return "", err 91 | } 92 | } else { 93 | return "", errors.Wrap(fmt.Errorf("unkown method for signed url, supported are GET and PUT"), "") 94 | } 95 | return url.String(), nil 96 | } 97 | -------------------------------------------------------------------------------- /app/api/biz/pkg/storage/mock.go: -------------------------------------------------------------------------------- 1 | package storage 2 | 3 | import ( 4 | "context" 5 | "io" 6 | "time" 7 | ) 8 | 9 | // A mock storage only used for job service testing 10 | type MockStorage struct { 11 | ctx context.Context 12 | } 13 | 14 | func NewMockStorage(ctx context.Context) *MockStorage { 15 | return &MockStorage{ 16 | ctx: ctx, 17 | } 18 | } 19 | 20 | func (m *MockStorage) Close() { 21 | } 22 | 23 | func (m *MockStorage) BucketPath() string { 24 | return "" 25 | } 26 | 27 | func (m *MockStorage) UploadFile(reader io.Reader, remotePath string, compress bool) error { 28 | return nil 29 | } 30 | 31 | func (m *MockStorage) IssueSignedUrl(remotePath string, method string, expires time.Duration) (string, error) { 32 | return "", nil 33 | } 34 | -------------------------------------------------------------------------------- /app/api/biz/pkg/storage/storage.go: -------------------------------------------------------------------------------- 1 | package storage 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "io" 7 | "os" 8 | "time" 9 | 10 | "github.com/pkg/errors" 11 | ) 12 | 13 | type Storage interface { 14 | BucketPath() string 15 | UploadFile(reader io.Reader, remotePath string, compress bool) error 16 | IssueSignedUrl(remotePath string, method string, expiry time.Duration) (string, error) 17 | Close() 18 | } 19 | 20 | func getBucket() (string, error) { 21 | env := os.Getenv("ENV") 22 | if env == "" { 23 | return "", errors.Wrap(fmt.Errorf("ENV environment variable is not present"), "") 24 | } 25 | return fmt.Sprintf("dcr-%s-hub", env), nil 26 | } 27 | 28 | func GetStorage(ctx context.Context) (Storage, error) { 29 | storageType := os.Getenv("STORAGE_TYPE") 30 | if storageType == "" { 31 | storageType = "MOCK" 32 | } 33 | var storage Storage 34 | bucket, err := getBucket() 35 | if err != nil { 36 | return storage, err 37 | } 38 | if storageType == "GCP" { 39 | storage, err = NewGoogleCloudStorage(ctx, bucket) 40 | } else if storageType == "MINIO" { 41 | storage, err = NewMinioStorage(ctx, bucket) 42 | } else if storageType == "MOCK" { 43 | storage = NewMockStorage(ctx) 44 | } 45 | return storage, err 46 | } 47 | -------------------------------------------------------------------------------- /app/api/biz/pkg/utils/BUILD.bazel: -------------------------------------------------------------------------------- 1 | load("@rules_go//go:def.bzl", "go_library") 2 | 3 | go_library( 4 | name = "utils", 5 | srcs = ["resp.go"], 6 | importpath = "github.com/manatee-project/manatee/app/api/biz/pkg/utils", 7 | visibility = ["//visibility:public"], 8 | deps = [ 9 | "//app/api/biz/pkg/errno", 10 | "@com_github_cloudwego_hertz//pkg/app", 11 | "@com_github_gin_gonic_gin//:gin", 12 | ], 13 | ) 14 | -------------------------------------------------------------------------------- /app/api/biz/pkg/utils/resp.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 TikTok Pte. Ltd. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package utils 16 | 17 | import ( 18 | "errors" 19 | "net/http" 20 | 21 | "github.com/cloudwego/hertz/pkg/app" 22 | "github.com/gin-gonic/gin" 23 | 24 | "github.com/manatee-project/manatee/app/api/biz/pkg/errno" 25 | ) 26 | 27 | type BaseResp struct { 28 | StatusCode int32 29 | StatusMsg string 30 | } 31 | 32 | // BuildBaseResp convert error and build BaseResp 33 | func BuildBaseResp(err error) *BaseResp { 34 | if err == nil { 35 | return baseResp(errno.Success) 36 | } 37 | 38 | e := errno.ErrNo{} 39 | if errors.As(err, &e) { 40 | return baseResp(e) 41 | } 42 | 43 | s := errno.ServiceErr.WithMessage(err.Error()) 44 | return baseResp(s) 45 | } 46 | 47 | // baseResp build BaseResp from error 48 | func baseResp(err errno.ErrNo) *BaseResp { 49 | return &BaseResp{ 50 | StatusCode: err.ErrCode, 51 | StatusMsg: err.ErrMsg, 52 | } 53 | } 54 | 55 | func ReturnsJSONError(c *app.RequestContext, err error) { 56 | resp := BuildBaseResp(err) 57 | c.JSON(http.StatusOK, gin.H{"code": resp.StatusCode, "msg": resp.StatusMsg}) 58 | c.Abort() 59 | } 60 | -------------------------------------------------------------------------------- /app/api/biz/router/BUILD.bazel: -------------------------------------------------------------------------------- 1 | load("@rules_go//go:def.bzl", "go_library") 2 | 3 | go_library( 4 | name = "router", 5 | srcs = ["register.go"], 6 | importpath = "github.com/manatee-project/manatee/app/api/biz/router", 7 | visibility = ["//visibility:public"], 8 | deps = [ 9 | "//app/api/biz/router/job", 10 | "@com_github_cloudwego_hertz//pkg/app/server", 11 | ], 12 | ) 13 | -------------------------------------------------------------------------------- /app/api/biz/router/job/BUILD.bazel: -------------------------------------------------------------------------------- 1 | load("@rules_go//go:def.bzl", "go_library") 2 | 3 | go_library( 4 | name = "job", 5 | srcs = [ 6 | "job.go", 7 | "middleware.go", 8 | ], 9 | importpath = "github.com/manatee-project/manatee/app/api/biz/router/job", 10 | visibility = ["//visibility:public"], 11 | deps = [ 12 | "//app/api/biz/handler/job", 13 | "@com_github_cloudwego_hertz//pkg/app", 14 | "@com_github_cloudwego_hertz//pkg/app/server", 15 | ], 16 | ) 17 | -------------------------------------------------------------------------------- /app/api/biz/router/job/job.go: -------------------------------------------------------------------------------- 1 | // Code generated by hertz generator. DO NOT EDIT. 2 | 3 | package job 4 | 5 | import ( 6 | "github.com/cloudwego/hertz/pkg/app/server" 7 | job "github.com/manatee-project/manatee/app/api/biz/handler/job" 8 | ) 9 | 10 | /* 11 | This file will register all the routes of the services in the master idl. 12 | And it will update automatically when you use the "update" command for the idl. 13 | So don't modify the contents of the file, or your code will be deleted when it is updated. 14 | */ 15 | 16 | // Register register routes based on the IDL 'api.${HTTP Method}' annotation. 17 | func Register(r *server.Hertz) { 18 | 19 | root := r.Group("/", rootMw()...) 20 | { 21 | _v1 := root.Group("/v1", _v1Mw()...) 22 | { 23 | _job := _v1.Group("/job", _jobMw()...) 24 | { 25 | _attestation := _job.Group("/attestation", _attestationMw()...) 26 | _attestation.POST("/", append(_queryjobattestationreportMw(), job.QueryJobAttestationReport)...) 27 | } 28 | { 29 | _delete := _job.Group("/delete", _deleteMw()...) 30 | _delete.POST("/", append(_deletejobMw(), job.DeleteJob)...) 31 | } 32 | { 33 | _output := _job.Group("/output", _outputMw()...) 34 | { 35 | _download := _output.Group("/download", _downloadMw()...) 36 | _download.POST("/", append(_downloadjoboutputMw(), job.DownloadJobOutput)...) 37 | } 38 | } 39 | { 40 | _query := _job.Group("/query", _queryMw()...) 41 | _query.POST("/", append(_queryjobMw(), job.QueryJob)...) 42 | } 43 | { 44 | _submit := _job.Group("/submit", _submitMw()...) 45 | _submit.POST("/", append(_submitjobMw(), job.SubmitJob)...) 46 | } 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /app/api/biz/router/job/middleware.go: -------------------------------------------------------------------------------- 1 | // Code generated by hertz generator. 2 | 3 | package job 4 | 5 | import ( 6 | "github.com/cloudwego/hertz/pkg/app" 7 | ) 8 | 9 | func rootMw() []app.HandlerFunc { 10 | // your code... 11 | return nil 12 | } 13 | 14 | func _v1Mw() []app.HandlerFunc { 15 | // your code... 16 | return nil 17 | } 18 | 19 | func _jobMw() []app.HandlerFunc { 20 | // your code... 21 | return nil 22 | } 23 | 24 | func _deleteMw() []app.HandlerFunc { 25 | // your code... 26 | return nil 27 | } 28 | 29 | func _deletejobMw() []app.HandlerFunc { 30 | // your code... 31 | return nil 32 | } 33 | 34 | func _queryMw() []app.HandlerFunc { 35 | // your code... 36 | return nil 37 | } 38 | 39 | func _queryjobMw() []app.HandlerFunc { 40 | // your code... 41 | return nil 42 | } 43 | 44 | func _submitMw() []app.HandlerFunc { 45 | // your code... 46 | return nil 47 | } 48 | 49 | func _createjobMw() []app.HandlerFunc { 50 | // your code... 51 | return nil 52 | } 53 | 54 | func _updateMw() []app.HandlerFunc { 55 | // your code... 56 | return nil 57 | } 58 | 59 | func _fileMw() []app.HandlerFunc { 60 | // your code... 61 | return nil 62 | } 63 | 64 | func _attrsMw() []app.HandlerFunc { 65 | // your code... 66 | return nil 67 | } 68 | 69 | func _queryjoboutputattrMw() []app.HandlerFunc { 70 | // your code... 71 | return nil 72 | } 73 | 74 | func _downloadMw() []app.HandlerFunc { 75 | // your code... 76 | return nil 77 | } 78 | 79 | func _downloadjoboutputMw() []app.HandlerFunc { 80 | // your code... 81 | return nil 82 | } 83 | 84 | func _attestationMw() []app.HandlerFunc { 85 | // your code... 86 | return nil 87 | } 88 | 89 | func _queryjobattestationreportMw() []app.HandlerFunc { 90 | // your code... 91 | return nil 92 | } 93 | 94 | func _outputMw() []app.HandlerFunc { 95 | // your code... 96 | return nil 97 | } 98 | 99 | func _submitjobMw() []app.HandlerFunc { 100 | // your code... 101 | return nil 102 | } 103 | -------------------------------------------------------------------------------- /app/api/biz/router/register.go: -------------------------------------------------------------------------------- 1 | // Code generated by hertz generator. DO NOT EDIT. 2 | 3 | package router 4 | 5 | import ( 6 | "github.com/cloudwego/hertz/pkg/app/server" 7 | job "github.com/manatee-project/manatee/app/api/biz/router/job" 8 | ) 9 | 10 | // GeneratedRegister registers routers generated by IDL. 11 | func GeneratedRegister(r *server.Hertz) { 12 | //INSERT_POINT: DO NOT DELETE THIS LINE! 13 | job.Register(r) 14 | } 15 | -------------------------------------------------------------------------------- /app/api/biz/service/BUILD.bazel: -------------------------------------------------------------------------------- 1 | load("@rules_go//go:def.bzl", "go_library", "go_test") 2 | 3 | go_library( 4 | name = "service", 5 | srcs = ["job_service.go"], 6 | importpath = "github.com/manatee-project/manatee/app/api/biz/service", 7 | visibility = ["//visibility:public"], 8 | deps = [ 9 | "//app/api/biz/dal/db", 10 | "//app/api/biz/model/job", 11 | "//app/api/biz/pkg/errno", 12 | "//app/api/biz/pkg/storage", 13 | "@com_github_cloudwego_hertz//pkg/common/hlog", 14 | "@com_github_google_uuid//:uuid", 15 | "@com_github_pkg_errors//:errors", 16 | ], 17 | ) 18 | 19 | go_test( 20 | name = "service_test", 21 | srcs = ["job_service_test.go"], 22 | embed = [":service"], 23 | ) 24 | -------------------------------------------------------------------------------- /app/api/biz/service/job_service_test.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import ( 4 | "context" 5 | "os" 6 | "strings" 7 | "testing" 8 | ) 9 | 10 | var expectedDockerfile1 string = `ARG BASE_IMAGE 11 | FROM $BASE_IMAGE 12 | ARG OUTPUTPATH 13 | ARG JUPYTER_FILENAME 14 | ARG USER_WORKSPACE 15 | ARG CUSTOMTOKEN_CLOUDSTORAGE_PATH 16 | 17 | ENV OUTPUTPATH=$OUTPUTPATH 18 | ENV JUPYTER_FILENAME=$JUPYTER_FILENAME 19 | ENV CUSTOMTOKEN_CLOUDSTORAGE_PATH=$CUSTOMTOKEN_CLOUDSTORAGE_PATH 20 | 21 | WORKDIR /home/jovyan 22 | COPY $USER_WORKSPACE/* ./ 23 | 24 | 25 | ENTRYPOINT jupyter nbconvert --execute --to notebook --inplace $JUPYTER_FILENAME --ExecutePreprocessor.timeout=-1 --allow-errors \ 26 | && hash=$(md5sum $JUPYTER_FILENAME | awk '{ print $1 }') \ 27 | && ./gscp $JUPYTER_FILENAME $OUTPUTPATH \ 28 | && ./gen_custom_token --nonce $hash \ 29 | && ./gscp custom_token $CUSTOMTOKEN_CLOUDSTORAGE_PATH 30 | ` 31 | 32 | func TestGenerateDockerfile(t *testing.T) { 33 | os.Setenv("STORAGE_TYPE", "MOCK") 34 | os.Setenv("ENV", "minikube") 35 | js := NewJobService(context.Background()) 36 | 37 | content := js.generateDockerfile([]string{}) 38 | if strings.Contains(content, `LABEL "tee.launch_policy.allow_env_override"`) { 39 | t.Errorf("Dockerfile contains wrong allow_env_override policy") 40 | } 41 | content = js.generateDockerfile([]string{"USER_TOKEN"}) 42 | if !strings.Contains(content, `LABEL "tee.launch_policy.allow_env_override"="USER_TOKEN"`) { 43 | t.Errorf("Dockerfile does not contain correct allow_env_override policy") 44 | } 45 | content = js.generateDockerfile([]string{"USER_TOKEN", "CUSTOM_ENV_VAR", "BREAKPOINT"}) 46 | if !strings.Contains(content, `LABEL "tee.launch_policy.allow_env_override"="USER_TOKEN,CUSTOM_ENV_VAR,BREAKPOINT"`) { 47 | t.Errorf("Dockerfile does not contain correct allow_env_override policy") 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /app/api/idl/job.thrift: -------------------------------------------------------------------------------- 1 | namespace go job 2 | 3 | enum JobStatus { 4 | Created = 0 5 | ImageBuilding = 1 6 | ImageBuildingFailed = 2 7 | VMWaiting = 3 8 | VMRunning = 4 9 | VMFinished = 5 10 | VMKilled = 6 11 | VMFailed = 7 12 | VMOther = 8 13 | VMLaunchFailed = 9 14 | } 15 | 16 | struct Job { 17 | 1: i64 id 18 | 2: string uuid 19 | 3: string creator 20 | 4: JobStatus job_status 21 | 5: string jupyter_file_name 22 | 6: string created_at 23 | 7: string updated_at 24 | } 25 | 26 | struct Env { 27 | 1: string key 28 | 2: string value 29 | } 30 | 31 | struct SubmitJobRequest{ 32 | 1: string jupyter_file_name (api.body="filename", api.vd="len($) > 0 && len($) < 128 && regexp('^.*\\.ipynb$') && !regexp('.*\\.\\..*')") 33 | 2: string creator (api.body="creator", api.vd="len($) > 0 && len($) < 32 && !regexp('.*\\.\\..*')") 34 | 3: list envs (api.body="envs", api.json="envs") 35 | 255: required string access_token (api.header="Authorization") 36 | } 37 | 38 | struct SubmitJobResponse{ 39 | 1: i32 code 40 | 2: string msg 41 | 3: string uuid 42 | } 43 | 44 | struct QueryJobRequest { 45 | 1: i64 page (api.body="page", api.query="page",api.vd="$>0") 46 | 2: i64 page_size (api.body="page_size", api.query="page_size", api.vd="$ > 0 || $ <= 100") 47 | 3: string creator (api.body="creator", api.vd="len($) > 0 && len($) < 32 && !regexp('.*\\.\\..*')") 48 | 255: required string access_token (api.header="Authorization") 49 | } 50 | 51 | struct QueryJobResponse { 52 | 1: i32 code 53 | 2: string msg 54 | 3: list jobs 55 | 4: i64 total 56 | } 57 | 58 | struct DeleteJobRequest { 59 | 1: string uuid (api.body="uuid", api.query="uuid") 60 | 2: string creator (api.body="creator", api.vd="len($) > 0 && len($) < 32 && !regexp('.*\\.\\..*')") 61 | 255: required string access_token (api.header="Authorization") 62 | } 63 | 64 | struct DeleteJobResponse { 65 | 1: i32 code 66 | 2: string msg 67 | } 68 | 69 | struct DownloadJobOutputRequest { 70 | 1: i64 id (api.body="id", api.query="id", api.vd="$>0") 71 | 2: string creator (api.body="creator", api.vd="len($) > 0 && len($) < 32 && !regexp('.*\\.\\..*')") 72 | 255: required string access_token (api.header="Authorization") 73 | } 74 | 75 | struct DownloadJobOutputResponse { 76 | 1: i32 code 77 | 2: string msg 78 | 3: string signed_url 79 | 4: string filename 80 | } 81 | 82 | struct QueryJobAttestationRequest { 83 | 1: i64 id (api.body="id", api.query="id", api.vd="$>0") 84 | 2: string creator (api.body="creator", api.vd="len($) > 0 && len($) < 32 && !regexp('.*\\.\\..*')") 85 | } 86 | 87 | struct QueryJobAttestationResponse { 88 | 1: i32 code 89 | 2: string msg 90 | 3: string signed_url 91 | } 92 | 93 | service JobHandler { 94 | SubmitJobResponse SubmitJob(1:SubmitJobRequest req)(api.post="/v1/job/submit/") 95 | QueryJobResponse QueryJob(1:QueryJobRequest req)(api.post="/v1/job/query/") 96 | DeleteJobResponse DeleteJob(1:DeleteJobRequest req)(api.post="/v1/job/delete/") 97 | DownloadJobOutputResponse DownloadJobOutput(1:DownloadJobOutputRequest req) (api.post="/v1/job/output/download/") 98 | QueryJobAttestationResponse QueryJobAttestationReport(1:QueryJobAttestationRequest req) (api.post="/v1/job/attestation/") 99 | } -------------------------------------------------------------------------------- /app/api/main.go: -------------------------------------------------------------------------------- 1 | // Code generated by hertz generator. 2 | 3 | package main 4 | 5 | import ( 6 | "github.com/cloudwego/hertz/pkg/app/server" 7 | 8 | "github.com/manatee-project/manatee/app/api/biz/dal" 9 | ) 10 | 11 | func Init() { 12 | dal.Init() 13 | } 14 | 15 | func main() { 16 | Init() 17 | h := server.Default(server.WithHostPorts(":8080")) 18 | 19 | register(h) 20 | h.Spin() 21 | } 22 | -------------------------------------------------------------------------------- /app/api/router.go: -------------------------------------------------------------------------------- 1 | // Code generated by hertz generator. 2 | 3 | package main 4 | 5 | import ( 6 | "github.com/cloudwego/hertz/pkg/app/server" 7 | handler "github.com/manatee-project/manatee/app/api/biz/handler" 8 | ) 9 | 10 | // customizeRegister registers customize routers. 11 | func customizedRegister(r *server.Hertz) { 12 | r.GET("/health", handler.Health) 13 | 14 | // your code ... 15 | } 16 | -------------------------------------------------------------------------------- /app/api/router_gen.go: -------------------------------------------------------------------------------- 1 | // Code generated by hertz generator. DO NOT EDIT. 2 | 3 | package main 4 | 5 | import ( 6 | "github.com/cloudwego/hertz/pkg/app/server" 7 | router "github.com/manatee-project/manatee/app/api/biz/router" 8 | ) 9 | 10 | // register registers all routers. 11 | func register(r *server.Hertz) { 12 | 13 | router.GeneratedRegister(r) 14 | 15 | customizedRegister(r) 16 | } 17 | -------------------------------------------------------------------------------- /app/api/script/bootstrap.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | CURDIR=$(cd $(dirname $0); pwd) 3 | BinaryName=hertz_service 4 | echo "$CURDIR/bin/${BinaryName}" 5 | exec $CURDIR/bin/${BinaryName} -------------------------------------------------------------------------------- /app/executor/.gitignore: -------------------------------------------------------------------------------- 1 | conf 2 | github.com -------------------------------------------------------------------------------- /app/executor/BUILD.bazel: -------------------------------------------------------------------------------- 1 | load("@rules_oci//oci:defs.bzl", "oci_image", "oci_load") 2 | load("@rules_pkg//pkg:tar.bzl", "pkg_tar") 3 | 4 | pkg_tar( 5 | name = "gen_custom_token_tar", 6 | srcs = [ 7 | "//app/executor/attestation:gen_custom_token", 8 | ], 9 | package_dir = "/home/jovyan", 10 | ) 11 | 12 | oci_image( 13 | name = "image", 14 | base = "@scipy-notebook_linux_amd64", 15 | tars = [ 16 | ":gen_custom_token_tar", 17 | ], 18 | # FIXME: for some reason, pkg_tar changes the owner of /home/jovyan. 19 | # run it as root for now, but it will go away once we fully switch to distroless image 20 | # https://github.com/manatee-project/manatee/issues/6 21 | user = "root:root", 22 | visibility = ["//visibility:public"], 23 | ) 24 | 25 | oci_load( 26 | name = "load_image", 27 | image = ":image", 28 | repo_tags = ["executor:latest"], 29 | visibility = ["//visibility:public"], 30 | ) 31 | -------------------------------------------------------------------------------- /app/executor/attestation/BUILD.bazel: -------------------------------------------------------------------------------- 1 | load("@rules_go//go:def.bzl", "go_binary", "go_library") 2 | 3 | go_library( 4 | name = "attestation_lib", 5 | srcs = ["main.go"], 6 | importpath = "github.com/manatee-project/manatee/app/executor/attestation", 7 | visibility = ["//visibility:private"], 8 | deps = ["@com_github_pkg_errors//:errors"], 9 | ) 10 | 11 | go_binary( 12 | name = "gen_custom_token", 13 | embed = [":attestation_lib"], 14 | goarch = "amd64", 15 | goos = "linux", 16 | visibility = ["//visibility:public"], 17 | ) 18 | -------------------------------------------------------------------------------- /app/executor/attestation/main.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 TikTok Pte. Ltd. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package main 16 | 17 | import ( 18 | "context" 19 | "encoding/json" 20 | "flag" 21 | "fmt" 22 | "io" 23 | "log" 24 | "net" 25 | "net/http" 26 | "os" 27 | "strings" 28 | 29 | "github.com/pkg/errors" 30 | ) 31 | 32 | const TikTokAudience = "https://research.tiktok.com/" 33 | const TokenFilename = "custom_token" 34 | 35 | type CustomToken struct { 36 | Audience string `json:"audience"` 37 | Nonces []string `json:"nonces"` // each nonce must be min 64bits 38 | TokenType string `json:"token_type"` 39 | } 40 | 41 | func GcsCustomAttestationToken(nonce string) ([]byte, error) { 42 | request := CustomToken{ 43 | Audience: TikTokAudience, 44 | Nonces: []string{nonce}, 45 | TokenType: "OIDC", 46 | } 47 | httpClient := http.Client{ 48 | Transport: &http.Transport{ 49 | DialContext: func(_ context.Context, _, _ string) (net.Conn, error) { 50 | return net.Dial("unix", "/run/container_launcher/teeserver.sock") 51 | }, 52 | }, 53 | } 54 | customJSON, err := json.Marshal(request) 55 | if err != nil { 56 | return nil, errors.Wrap(err, "failed to marshal request") 57 | } 58 | url := "http://localhost/v1/token" 59 | resp, err := httpClient.Post(url, "application/json", strings.NewReader(string(customJSON))) 60 | if err != nil { 61 | return nil, errors.Wrap(err, "faile to get custom token") 62 | } 63 | defer resp.Body.Close() 64 | tokenbytes, err := io.ReadAll(resp.Body) 65 | if err != nil { 66 | return nil, errors.Wrap(err, "faile to read from response") 67 | } 68 | 69 | return tokenbytes, nil 70 | } 71 | 72 | func generateCustomAttestationToken(nonce string) ([]byte, error) { 73 | if os.Getenv("TEE_BACKEND") == "MOCK" { 74 | return []byte(fmt.Sprintf("mock tee token with nonce %s", nonce)), nil 75 | } else { 76 | return GcsCustomAttestationToken(nonce) 77 | } 78 | } 79 | 80 | func requireParameter(name string, para string) { 81 | if para == "" { 82 | fmt.Printf("ERROR: %s parameter is required \n", name) 83 | flag.PrintDefaults() 84 | os.Exit(1) 85 | } 86 | } 87 | 88 | func main() { 89 | nonce := flag.String("nonce", "", "The nonce to generate custom token") 90 | flag.Parse() 91 | requireParameter("nonce", *nonce) 92 | customToken, err := generateCustomAttestationToken(*nonce) 93 | if err != nil { 94 | fmt.Printf("ERROR: failed to generate custom token %+v \n", err) 95 | panic(err) 96 | } 97 | 98 | err = os.WriteFile(TokenFilename, customToken, 0644) 99 | if err != nil { 100 | fmt.Printf("ERROR: failed to write custom token to file %+v \n", err) 101 | log.Fatal(err) 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /app/jupyterlab_manatee/.dockerignore: -------------------------------------------------------------------------------- 1 | *.bundle.* 2 | lib/ 3 | node_modules/ 4 | *.log 5 | .eslintcache 6 | .stylelintcache 7 | *.egg-info/ 8 | .ipynb_checkpoints 9 | *.tsbuildinfo 10 | labextension 11 | # Version file is handled by hatchling 12 | jupyterlab_manatee/_version.py 13 | 14 | # Integration tests 15 | ui-tests/test-results/ 16 | ui-tests/playwright-report/ 17 | 18 | # Created by https://www.gitignore.io/api/python 19 | # Edit at https://www.gitignore.io/?templates=python 20 | 21 | ### Python ### 22 | # Byte-compiled / optimized / DLL files 23 | __pycache__/ 24 | *.py[cod] 25 | *$py.class 26 | 27 | # C extensions 28 | *.so 29 | 30 | # Distribution / packaging 31 | .Python 32 | build/ 33 | develop-eggs/ 34 | dist/ 35 | downloads/ 36 | eggs/ 37 | .eggs/ 38 | lib/ 39 | lib64/ 40 | parts/ 41 | sdist/ 42 | var/ 43 | wheels/ 44 | pip-wheel-metadata/ 45 | share/python-wheels/ 46 | .installed.cfg 47 | *.egg 48 | MANIFEST 49 | 50 | # PyInstaller 51 | # Usually these files are written by a python script from a template 52 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 53 | *.manifest 54 | *.spec 55 | 56 | # Installer logs 57 | pip-log.txt 58 | pip-delete-this-directory.txt 59 | 60 | # Unit test / coverage reports 61 | htmlcov/ 62 | .tox/ 63 | .nox/ 64 | .coverage 65 | .coverage.* 66 | .cache 67 | nosetests.xml 68 | coverage/ 69 | coverage.xml 70 | *.cover 71 | .hypothesis/ 72 | .pytest_cache/ 73 | 74 | # Translations 75 | *.mo 76 | *.pot 77 | 78 | # Scrapy stuff: 79 | .scrapy 80 | 81 | # Sphinx documentation 82 | docs/_build/ 83 | 84 | # PyBuilder 85 | target/ 86 | 87 | # pyenv 88 | .python-version 89 | 90 | # celery beat schedule file 91 | celerybeat-schedule 92 | 93 | # SageMath parsed files 94 | *.sage.py 95 | 96 | # Spyder project settings 97 | .spyderproject 98 | .spyproject 99 | 100 | # Rope project settings 101 | .ropeproject 102 | 103 | # Mr Developer 104 | .mr.developer.cfg 105 | .project 106 | .pydevproject 107 | 108 | # mkdocs documentation 109 | /site 110 | 111 | # mypy 112 | .mypy_cache/ 113 | .dmypy.json 114 | dmypy.json 115 | 116 | # Pyre type checker 117 | .pyre/ 118 | 119 | # End of https://www.gitignore.io/api/python 120 | 121 | # OSX files 122 | .DS_Store 123 | 124 | # Yarn cache 125 | .yarn/ 126 | 127 | cheat-sheet.md 128 | *.yml 129 | !.yarnrc.yml 130 | build_pkg.sh -------------------------------------------------------------------------------- /app/jupyterlab_manatee/.gitignore: -------------------------------------------------------------------------------- 1 | *.bundle.* 2 | lib/ 3 | node_modules/ 4 | *.log 5 | .eslintcache 6 | .stylelintcache 7 | *.egg-info/ 8 | .ipynb_checkpoints 9 | *.tsbuildinfo 10 | jupyterlab_manatee/labextension 11 | # Version file is handled by hatchling 12 | jupyterlab_manatee/_version.py 13 | 14 | # Integration tests 15 | ui-tests/test-results/ 16 | ui-tests/playwright-report/ 17 | 18 | # Created by https://www.gitignore.io/api/python 19 | # Edit at https://www.gitignore.io/?templates=python 20 | 21 | ### Python ### 22 | # Byte-compiled / optimized / DLL files 23 | __pycache__/ 24 | *.py[cod] 25 | *$py.class 26 | 27 | # C extensions 28 | *.so 29 | 30 | # Distribution / packaging 31 | .Python 32 | build/ 33 | develop-eggs/ 34 | dist/ 35 | downloads/ 36 | eggs/ 37 | .eggs/ 38 | lib/ 39 | lib64/ 40 | parts/ 41 | sdist/ 42 | var/ 43 | wheels/ 44 | pip-wheel-metadata/ 45 | share/python-wheels/ 46 | .installed.cfg 47 | *.egg 48 | MANIFEST 49 | 50 | # PyInstaller 51 | # Usually these files are written by a python script from a template 52 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 53 | *.manifest 54 | *.spec 55 | 56 | # Installer logs 57 | pip-log.txt 58 | pip-delete-this-directory.txt 59 | 60 | # Unit test / coverage reports 61 | htmlcov/ 62 | .tox/ 63 | .nox/ 64 | .coverage 65 | .coverage.* 66 | .cache 67 | nosetests.xml 68 | coverage/ 69 | coverage.xml 70 | *.cover 71 | .hypothesis/ 72 | .pytest_cache/ 73 | 74 | # Translations 75 | *.mo 76 | *.pot 77 | 78 | # Scrapy stuff: 79 | .scrapy 80 | 81 | # Sphinx documentation 82 | docs/_build/ 83 | 84 | # PyBuilder 85 | target/ 86 | 87 | # pyenv 88 | .python-version 89 | 90 | # celery beat schedule file 91 | celerybeat-schedule 92 | 93 | # SageMath parsed files 94 | *.sage.py 95 | 96 | # Spyder project settings 97 | .spyderproject 98 | .spyproject 99 | 100 | # Rope project settings 101 | .ropeproject 102 | 103 | # Mr Developer 104 | .mr.developer.cfg 105 | .project 106 | .pydevproject 107 | 108 | # mkdocs documentation 109 | /site 110 | 111 | # mypy 112 | .mypy_cache/ 113 | .dmypy.json 114 | dmypy.json 115 | 116 | # Pyre type checker 117 | .pyre/ 118 | 119 | # End of https://www.gitignore.io/api/python 120 | 121 | # OSX files 122 | .DS_Store 123 | 124 | # Yarn cache 125 | .yarn/ 126 | 127 | cheat-sheet.md 128 | *.yml 129 | !.yarnrc.yml 130 | build_pkg.sh -------------------------------------------------------------------------------- /app/jupyterlab_manatee/.yarnrc.yml: -------------------------------------------------------------------------------- 1 | nodeLinker: node-modules 2 | 3 | npmRegistryServer: "https://registry.yarnpkg.com/" 4 | -------------------------------------------------------------------------------- /app/jupyterlab_manatee/20custom-hook.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # this hook is executed before notebook starts. 4 | pip install /manatee/jupyterlab_manatee-0.0.0-py3-none-any.whl 5 | jupyter labextension disable @jupyterlab/docmanager-extension:download 6 | jupyter labextension disable @jupyterlab/filebrowser-extension:download -------------------------------------------------------------------------------- /app/jupyterlab_manatee/BUILD.bazel: -------------------------------------------------------------------------------- 1 | load("@bazel_tools//tools/build_defs/pkg:pkg.bzl", "pkg_tar") 2 | load("@pydeps//:requirements.bzl", "all_requirements") 3 | load("@rules_oci//oci:defs.bzl", "oci_image", "oci_load") 4 | load("@rules_python//python:pip.bzl", "compile_pip_requirements") 5 | load("@rules_python//python/entry_points:py_console_script_binary.bzl", "py_console_script_binary") 6 | 7 | compile_pip_requirements( 8 | # base name for generated targets, typically "requirements". 9 | name = "requirements", 10 | requirements_in = "requirements.in", 11 | requirements_linux = "requirements_linux.txt", 12 | requirements_txt = "requirements.txt", 13 | ) 14 | 15 | py_console_script_binary( 16 | name = "jlpm", 17 | pkg = "@pydeps//jupyterlab", 18 | script = "jlpm", 19 | ) 20 | 21 | # needed for "jupyter labextension" command invoked by jlpm 22 | py_console_script_binary( 23 | name = "jupyter-labextension", 24 | pkg = "@pydeps//jupyterlab", 25 | script = "jupyter-labextension", 26 | ) 27 | 28 | # needed for "jupyter" command invoked by jlpm 29 | py_console_script_binary( 30 | name = "jupyter", 31 | pkg = "@pydeps//jupyter_core", 32 | script = "jupyter", 33 | ) 34 | 35 | py_console_script_binary( 36 | name = "pyproject-build", 37 | pkg = "@pydeps//build", 38 | script = "pyproject-build", 39 | ) 40 | 41 | # always use 0.0.0 for dev wheel 42 | dev_wheel_name = "jupyterlab_manatee-0.0.0-py3-none-any.whl" 43 | 44 | genrule( 45 | name = "build_wheel", 46 | srcs = glob([ 47 | # python server 48 | "jupyterlab_manatee/*.py", 49 | "jupyter-config/**/*.json", 50 | # frontend 51 | "src/*.ts", 52 | "src/*.tsx", 53 | "style/*.css", 54 | "style/*.js", 55 | ]) + [ 56 | # pyproject-build 57 | "pyproject.toml", 58 | "LICENSE", 59 | "README.md", 60 | # tsc project files 61 | "tsconfig.json", 62 | # npm dependencies 63 | "package.json", 64 | "yarn.lock", 65 | # yarnrc to use obsolete `node-modules` directory 66 | ".yarnrc.yml", 67 | ], 68 | # always use 0.0.0 for dev version 69 | outs = [dev_wheel_name], 70 | cmd = "\n".join([ 71 | "export NODE=$$(realpath $(location @nodejs//:node_bin))", 72 | "export JLPM=$$(realpath $(location :jlpm))", 73 | "export JUPYTER=$$(realpath $(location :jupyter))", 74 | "export PATH=$$(dirname $$NODE):$$(dirname $$JLPM):$$(dirname $$JUPYTER):$$PATH", 75 | "export TEMP=$$(mktemp -d)", 76 | "export PYBUILD=$$(realpath $(location :pyproject-build))", 77 | "export NPM_CONFIG_USERCONFIG=$$TEMP/npmrc", 78 | 79 | # copy all source code into build directory under execroot 80 | "mkdir -p build", 81 | "cp -L -R $$(dirname $(execpath package.json))/* build", 82 | "cd build", 83 | 84 | # yarn config 85 | "jlpm config set globalFolder $$TEMP/npm", 86 | "jlpm config set nodeLinker node-modules", 87 | "jlpm config set npmRegistryServer \"https://registry.yarnpkg.com/\"", 88 | 89 | # yarn install (print log only on error) 90 | "output=$$(jlpm install 2>&1) || echo $$output", 91 | 92 | # build jupyter labextension 93 | "jlpm run build", 94 | 95 | # build python package using pyproject.toml 96 | "$$PYBUILD", 97 | "cd ..", 98 | 99 | # copy output 100 | "cp build/dist/jupyterlab_manatee-*.whl $(location {})".format(dev_wheel_name), 101 | ]), 102 | tools = [ 103 | ":jlpm", 104 | ":jupyter", 105 | ":jupyter-labextension", 106 | ":pyproject-build", 107 | "@nodejs//:node_bin", 108 | ], 109 | # toolchains=["@rules_python//python:current_py_toolchain"], 110 | ) 111 | 112 | pkg_tar( 113 | name = "dev_wheel_tar", 114 | srcs = [ 115 | dev_wheel_name, 116 | ], 117 | package_dir = "/manatee/", 118 | ) 119 | 120 | pkg_tar( 121 | name = "hooks_tar", 122 | srcs = [ 123 | "20custom-hook.sh", 124 | ], 125 | package_dir = "/usr/local/bin/before-notebook.d/", 126 | ) 127 | 128 | oci_image( 129 | name = "image", 130 | base = "@scipy-notebook_linux_amd64", 131 | tars = [ 132 | "@noble//:flat", 133 | ":dev_wheel_tar", 134 | ":hooks_tar", 135 | ], 136 | visibility = ["//visibility:public"], 137 | ) 138 | 139 | oci_load( 140 | name = "load_image", 141 | image = ":image", 142 | repo_tags = ["jupyterlab_manatee:latest"], 143 | visibility = ["//visibility:public"], 144 | ) 145 | -------------------------------------------------------------------------------- /app/jupyterlab_manatee/LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2023, Dayeol Lee 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /app/jupyterlab_manatee/README.md: -------------------------------------------------------------------------------- 1 | # jupyterlab_manatee 2 | 3 | This is an open-source JupyterLab extension for ManaTEE framework 4 | 5 | ## Requirements 6 | 7 | - JupyterLab >= 4.0.0 8 | 9 | ## Contributing 10 | 11 | ### Development install 12 | 13 | Note: You will need NodeJS to build the extension package. 14 | 15 | The `jlpm` command is JupyterLab's pinned version of 16 | [yarn](https://yarnpkg.com/) that is installed with JupyterLab. You may use 17 | `yarn` or `npm` in lieu of `jlpm` below. 18 | 19 | ```bash 20 | # Clone the repo to your local environment 21 | # Change directory to the jupyterlab_manatee directory 22 | # Install package in development mode 23 | pip install -e "." 24 | # Link your development version of the extension with JupyterLab 25 | jupyter labextension develop . --overwrite 26 | # Rebuild extension Typescript source after making changes 27 | jlpm build 28 | ``` 29 | 30 | You can watch the source directory and run JupyterLab at the same time in different terminals to watch for changes in the extension's source and automatically rebuild the extension. 31 | 32 | ```bash 33 | # Watch the source directory in one terminal, automatically rebuilding when needed 34 | jlpm watch 35 | # Run JupyterLab in another terminal 36 | jupyter lab 37 | ``` 38 | 39 | With the watch command running, every saved change will immediately be built locally and available in your running JupyterLab. Refresh JupyterLab to load the change in your browser (you may need to wait several seconds for the extension to be rebuilt). 40 | 41 | By default, the `jlpm build` command generates the source maps for this extension to make it easier to debug using the browser dev tools. To also generate source maps for the JupyterLab core extensions, you can run the following command: 42 | 43 | ```bash 44 | jupyter lab build --minimize=False 45 | ``` 46 | 47 | ### Development uninstall 48 | 49 | ```bash 50 | pip uninstall jupyterlab_manatee 51 | ``` 52 | 53 | In development mode, you will also need to remove the symlink created by `jupyter labextension develop` 54 | command. To find its location, you can run `jupyter labextension list` to figure out where the `labextensions` 55 | folder is located. Then you can remove the symlink named `jupyterlab_manatee` within that folder. 56 | 57 | ### Testing the extension 58 | 59 | #### Frontend tests 60 | 61 | This extension is using [Jest](https://jestjs.io/) for JavaScript code testing. 62 | 63 | To execute them, execute: 64 | 65 | ```sh 66 | jlpm 67 | jlpm test 68 | ``` 69 | 70 | #### Integration tests 71 | 72 | This extension uses [Playwright](https://playwright.dev/docs/intro) for the integration tests (aka user level tests). 73 | More precisely, the JupyterLab helper [Galata](https://github.com/jupyterlab/jupyterlab/tree/master/galata) is used to handle testing the extension in JupyterLab. 74 | 75 | More information are provided within the [ui-tests](./ui-tests/README.md) README. 76 | 77 | ### Packaging the extension 78 | 79 | See [RELEASE](RELEASE.md) -------------------------------------------------------------------------------- /app/jupyterlab_manatee/RELEASE.md: -------------------------------------------------------------------------------- 1 | # Making a new release of jupyterlab_manatee 2 | 3 | The extension can be published to `PyPI` and `npm` manually or using the [Jupyter Releaser](https://github.com/jupyter-server/jupyter_releaser). 4 | 5 | ## Manual release 6 | 7 | ### Python package 8 | 9 | This extension can be distributed as Python packages. All of the Python 10 | packaging instructions are in the `pyproject.toml` file to wrap your extension in a 11 | Python package. Before generating a package, you first need to install some tools: 12 | 13 | ```bash 14 | pip install build twine hatch 15 | ``` 16 | 17 | Bump the version using `hatch`. By default this will create a tag. 18 | See the docs on [hatch-nodejs-version](https://github.com/agoose77/hatch-nodejs-version#semver) for details. 19 | 20 | ```bash 21 | hatch version 22 | ``` 23 | 24 | Make sure to clean up all the development files before building the package: 25 | 26 | ```bash 27 | jlpm clean:all 28 | ``` 29 | 30 | You could also clean up the local git repository: 31 | 32 | ```bash 33 | git clean -dfX 34 | ``` 35 | 36 | To create a Python source package (`.tar.gz`) and the binary package (`.whl`) in the `dist/` directory, do: 37 | 38 | ```bash 39 | python -m build 40 | ``` 41 | 42 | > `python setup.py sdist bdist_wheel` is deprecated and will not work for this package. 43 | 44 | Then to upload the package to PyPI, do: 45 | 46 | ```bash 47 | twine upload dist/* 48 | ``` 49 | 50 | ### NPM package 51 | 52 | To publish the frontend part of the extension as a NPM package, do: 53 | 54 | ```bash 55 | npm login 56 | npm publish --access public 57 | ``` 58 | 59 | ## Automated releases with the Jupyter Releaser 60 | 61 | The extension repository should already be compatible with the Jupyter Releaser. 62 | 63 | Check out the [workflow documentation](https://jupyter-releaser.readthedocs.io/en/latest/get_started/making_release_from_repo.html) for more information. 64 | 65 | Here is a summary of the steps to cut a new release: 66 | 67 | - Add tokens to the [Github Secrets](https://docs.github.com/en/actions/security-guides/encrypted-secrets) in the repository: 68 | - `ADMIN_GITHUB_TOKEN` (with "public_repo" and "repo:status" permissions); see the [documentation](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token) 69 | - `NPM_TOKEN` (with "automation" permission); see the [documentation](https://docs.npmjs.com/creating-and-viewing-access-tokens) 70 | - Set up PyPI 71 | 72 |
Using PyPI trusted publisher (modern way) 73 | 74 | - Set up your PyPI project by [adding a trusted publisher](https://docs.pypi.org/trusted-publishers/adding-a-publisher/) 75 | - The _workflow name_ is `publish-release.yml` and the _environment_ should be left blank. 76 | - Ensure the publish release job as `permissions`: `id-token : write` (see the [documentation](https://docs.pypi.org/trusted-publishers/using-a-publisher/)) 77 | 78 |
79 | 80 |
Using PyPI token (legacy way) 81 | 82 | - If the repo generates PyPI release(s), create a scoped PyPI [token](https://packaging.python.org/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/#saving-credentials-on-github). We recommend using a scoped token for security reasons. 83 | 84 | - You can store the token as `PYPI_TOKEN` in your fork's `Secrets`. 85 | 86 | - Advanced usage: if you are releasing multiple repos, you can create a secret named `PYPI_TOKEN_MAP` instead of `PYPI_TOKEN` that is formatted as follows: 87 | 88 | ```text 89 | owner1/repo1,token1 90 | owner2/repo2,token2 91 | ``` 92 | 93 | If you have multiple Python packages in the same repository, you can point to them as follows: 94 | 95 | ```text 96 | owner1/repo1/path/to/package1,token1 97 | owner1/repo1/path/to/package2,token2 98 | ``` 99 | 100 |
101 | 102 | - Go to the Actions panel 103 | - Run the "Step 1: Prep Release" workflow 104 | - Check the draft changelog 105 | - Run the "Step 2: Publish Release" workflow 106 | 107 | ## Publishing to `conda-forge` 108 | 109 | If the package is not on conda forge yet, check the documentation to learn how to add it: https://conda-forge.org/docs/maintainer/adding_pkgs.html 110 | 111 | Otherwise a bot should pick up the new version publish to PyPI, and open a new PR on the feedstock repository automatically. 112 | -------------------------------------------------------------------------------- /app/jupyterlab_manatee/babel.config.js: -------------------------------------------------------------------------------- 1 | module.exports = require('@jupyterlab/testutils/lib/babel.config'); 2 | -------------------------------------------------------------------------------- /app/jupyterlab_manatee/install.json: -------------------------------------------------------------------------------- 1 | { 2 | "packageManager": "python", 3 | "packageName": "jupyterlab_manatee", 4 | "uninstallInstructions": "Use your Python package manager (pip, conda, etc.) to uninstall the package jupyterlab_manatee" 5 | } 6 | -------------------------------------------------------------------------------- /app/jupyterlab_manatee/jest.config.js: -------------------------------------------------------------------------------- 1 | const jestJupyterLab = require('@jupyterlab/testutils/lib/jest-config'); 2 | 3 | const esModules = [ 4 | '@codemirror', 5 | '@jupyter/ydoc', 6 | '@jupyterlab/', 7 | 'lib0', 8 | 'nanoid', 9 | 'vscode-ws-jsonrpc', 10 | 'y-protocols', 11 | 'y-websocket', 12 | 'yjs' 13 | ].join('|'); 14 | 15 | const baseConfig = jestJupyterLab(__dirname); 16 | 17 | module.exports = { 18 | ...baseConfig, 19 | automock: false, 20 | collectCoverageFrom: [ 21 | 'src/**/*.{ts,tsx}', 22 | '!src/**/*.d.ts', 23 | '!src/**/.ipynb_checkpoints/*' 24 | ], 25 | coverageReporters: ['lcov', 'text'], 26 | testRegex: 'src/.*/.*.spec.ts[x]?$', 27 | transformIgnorePatterns: [`/node_modules/(?!${esModules}).+`] 28 | }; 29 | -------------------------------------------------------------------------------- /app/jupyterlab_manatee/jupyter-config/jupyter_server_config.d/jupyterlab_manatee.json: -------------------------------------------------------------------------------- 1 | { 2 | "ServerApp": { 3 | "jpserver_extensions": { 4 | "jupyterlab_manatee": true 5 | } 6 | } 7 | } -------------------------------------------------------------------------------- /app/jupyterlab_manatee/jupyterlab_manatee/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 TikTok Pte. Ltd. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import jupyter_server 16 | from jupyter_server.utils import url_path_join 17 | from ._version import __version__ 18 | from .handlers import * 19 | 20 | def _jupyter_server_extension_points(): 21 | return [{ 22 | 'module': 'jupyterlab_manatee' 23 | }] 24 | 25 | def _jupyter_labextension_paths(): 26 | return [{ 27 | "src": "labextension", 28 | "dest": "jupyterlab_manatee" 29 | }] 30 | 31 | 32 | def _load_jupyter_server_extension(serverapp: jupyter_server.serverapp.ServerApp): 33 | """ 34 | Called when the extension is loaded. 35 | """ 36 | 37 | web_app = serverapp.web_app 38 | base_url = web_app.settings['base_url'] 39 | handlers = [ 40 | (url_path_join(base_url, 'manatee', 'jobs'), DataCleanRoomJobHandler), 41 | (url_path_join(base_url, 'manatee', 'output'), DataCleanRoomOutputHandler), (url_path_join(base_url, 'manatee', 'attestation'), DataCleanRoomAttestationHandler), 42 | ] 43 | web_app.add_handlers('.*$', handlers) -------------------------------------------------------------------------------- /app/jupyterlab_manatee/noble.yaml: -------------------------------------------------------------------------------- 1 | # Packages for examples/debian_snapshot. 2 | # 3 | # Anytime this file is changed, the lockfile needs to be regenerated. 4 | # 5 | # To generate the bookworm.lock.json run the following command 6 | # 7 | # bazel run @bookworm//:lock 8 | # 9 | # See debian_package_index at WORKSPACE.bazel 10 | version: 1 11 | 12 | sources: 13 | - channel: noble main 14 | url: https://snapshot.ubuntu.com/ubuntu/20240301T030400Z 15 | - channel: noble-security main 16 | url: https://snapshot.ubuntu.com/ubuntu/20240301T030400Z 17 | - channel: noble-updates main 18 | url: https://snapshot.ubuntu.com/ubuntu/20240301T030400Z 19 | 20 | archs: 21 | - "amd64" 22 | 23 | packages: 24 | - "libblas-dev" 25 | - "liblapack-dev" -------------------------------------------------------------------------------- /app/jupyterlab_manatee/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling>=1.5.0", "jupyterlab>=4.0.0,<5", "hatch-nodejs-version>=0.3.2", "aiohttp", "aiofiles"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "jupyterlab_manatee" 7 | readme = "README.md" 8 | license = { file = "LICENSE" } 9 | requires-python = ">=3.8" 10 | classifiers = [ 11 | "Framework :: Jupyter", 12 | "Framework :: Jupyter :: JupyterLab", 13 | "Framework :: Jupyter :: JupyterLab :: 4", 14 | "Framework :: Jupyter :: JupyterLab :: Extensions", 15 | "Framework :: Jupyter :: JupyterLab :: Extensions :: Prebuilt", 16 | "License :: OSI Approved :: BSD License", 17 | "Programming Language :: Python", 18 | "Programming Language :: Python :: 3", 19 | "Programming Language :: Python :: 3.8", 20 | "Programming Language :: Python :: 3.9", 21 | "Programming Language :: Python :: 3.10", 22 | "Programming Language :: Python :: 3.11", 23 | "Programming Language :: Python :: 3.12", 24 | ] 25 | dependencies = [ 26 | "aiohttp", 27 | "aiofiles", 28 | "tornado>=6.3", 29 | ] 30 | dynamic = ["version", "description", "authors", "urls", "keywords"] 31 | 32 | [tool.hatch.version] 33 | source = "nodejs" 34 | 35 | [tool.hatch.metadata.hooks.nodejs] 36 | fields = ["description", "authors", "urls"] 37 | 38 | [tool.hatch.build.targets.sdist] 39 | artifacts = ["jupyterlab_manatee/labextension"] 40 | exclude = [".github", "binder"] 41 | 42 | [tool.hatch.build.targets.wheel.shared-data] 43 | "jupyter-config/jupyter_server_config.d" = "etc/jupyter/jupyter_server_config.d" 44 | "jupyterlab_manatee/labextension" = "share/jupyter/labextensions/jupyterlab_manatee" 45 | "install.json" = "share/jupyter/labextensions/jupyterlab_manatee/install.json" 46 | 47 | [tool.hatch.build.hooks.version] 48 | path = "jupyterlab_manatee/_version.py" 49 | 50 | [tool.hatch.build.hooks.jupyter-builder] 51 | dependencies = ["hatch-jupyter-builder>=0.5"] 52 | build-function = "hatch_jupyter_builder.npm_builder" 53 | ensured-targets = [ 54 | "jupyterlab_manatee/labextension/static/style.js", 55 | "jupyterlab_manatee/labextension/package.json", 56 | ] 57 | skip-if-exists = ["jupyterlab_manatee/labextension/static/style.js"] 58 | 59 | [tool.hatch.build.hooks.jupyter-builder.build-kwargs] 60 | build_cmd = "build:prod" 61 | npm = ["jlpm"] 62 | 63 | [tool.hatch.build.hooks.jupyter-builder.editable-build-kwargs] 64 | build_cmd = "install:extension" 65 | npm = ["jlpm"] 66 | source_dir = "src" 67 | build_dir = "jupyterlab_manatee/labextension" 68 | 69 | [tool.jupyter-releaser.options] 70 | version_cmd = "hatch version" 71 | 72 | [tool.jupyter-releaser.hooks] 73 | before-build-npm = [ 74 | "python -m pip install 'jupyterlab>=4.0.0,<5'", 75 | "jlpm", 76 | "jlpm build:prod" 77 | ] 78 | before-build-python = ["jlpm clean:all"] 79 | 80 | [tool.check-wheel-contents] 81 | ignore = ["W002"] 82 | -------------------------------------------------------------------------------- /app/jupyterlab_manatee/requirements.in: -------------------------------------------------------------------------------- 1 | jupyterhub==4.0.2 2 | jupyterlab 3 | twine 4 | hatch 5 | build 6 | setuptools 7 | -------------------------------------------------------------------------------- /app/jupyterlab_manatee/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 TikTok Pte. Ltd. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | __import__("setuptools").setup() 16 | -------------------------------------------------------------------------------- /app/jupyterlab_manatee/src/__tests__/jupyterlab_manatee.spec.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 TikTok Pte. Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | /** 18 | * Example of [Jest](https://jestjs.io/docs/getting-started) unit tests 19 | */ 20 | 21 | describe('jupyterlab_manatee', () => { 22 | it('should be tested', () => { 23 | expect(1 + 1).toEqual(2); 24 | }); 25 | }); 26 | -------------------------------------------------------------------------------- /app/jupyterlab_manatee/src/index.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 TikTok Pte. Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import { 18 | JupyterFrontEnd, 19 | JupyterFrontEndPlugin, 20 | ILayoutRestorer, 21 | } from '@jupyterlab/application'; 22 | import { IDocumentManager } from '@jupyterlab/docmanager'; 23 | import { ITranslator } from '@jupyterlab/translation'; 24 | import { DataCleanRoomSidebar } from './sidebar'; 25 | 26 | 27 | async function activate(app: JupyterFrontEnd, docManager: IDocumentManager, translator: ITranslator, restorer: ILayoutRestorer | null) { 28 | console.log("JupyterLab extension jupyterlab_manatee is activated!"); 29 | 30 | const sidebar = new DataCleanRoomSidebar({manager: docManager}); 31 | 32 | app.shell.add(sidebar, 'right', {rank: 0}); 33 | 34 | if (restorer) { 35 | restorer.add(sidebar, "data-clean-room-side-bar"); 36 | } 37 | } 38 | 39 | /** 40 | * Initialization data for the jupyterlab-manatee extension. 41 | */ 42 | const plugin: JupyterFrontEndPlugin = { 43 | id: 'jupyterlab_manatee:plugin', 44 | description: 'This is an open-source JupyterLab extension for ManaTEE framework', 45 | autoStart: true, 46 | requires: [IDocumentManager, ITranslator], 47 | optional: [ILayoutRestorer], 48 | activate: activate 49 | }; 50 | 51 | export default plugin; 52 | -------------------------------------------------------------------------------- /app/jupyterlab_manatee/src/sidebar.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 TikTok Pte. Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import { SidePanel, trustedIcon } from '@jupyterlab/ui-components'; 18 | import { ITranslator, nullTranslator } from '@jupyterlab/translation'; 19 | import { IDocumentManager } from '@jupyterlab/docmanager'; 20 | import { DataCleanRoomSources } from './sources'; 21 | // import { DataCleanRoomInputs } from './inputs'; 22 | import { DataCleanRoomJobs } from './jobs'; 23 | 24 | export class DataCleanRoomSidebar extends SidePanel { 25 | constructor(options: DataCleanRoomSidebar.IOptions) { 26 | const { manager } = options; 27 | const translator = options.translator || nullTranslator; 28 | super({ translator }); 29 | 30 | const jobsPanel = new DataCleanRoomJobs({ translator }); 31 | const sourcesPanel = new DataCleanRoomSources({ manager, translator }); 32 | this.addClass("jp-manatee-sidebar-view") 33 | 34 | this.title.icon = trustedIcon; 35 | this.id = "jp-DCRSource-sidebar" 36 | this.addWidget(jobsPanel); 37 | this.addWidget(sourcesPanel); 38 | } 39 | } 40 | 41 | export namespace DataCleanRoomSidebar { 42 | export interface IOptions { 43 | manager: IDocumentManager; 44 | translator?: ITranslator; 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /app/jupyterlab_manatee/src/sources.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 TikTok Pte. Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import { Contents } from '@jupyterlab/services'; 18 | import { IDocumentManager } from '@jupyterlab/docmanager'; 19 | import { PanelWithToolbar, ToolbarButton, fileUploadIcon, } from '@jupyterlab/ui-components'; 20 | import { filter } from '@lumino/algorithm'; 21 | import { ITranslator, nullTranslator } from '@jupyterlab/translation'; 22 | import { FileBrowser, FilterFileBrowserModel } from '@jupyterlab/filebrowser'; 23 | import { showDialog, Dialog } from '@jupyterlab/apputils'; 24 | import { ServerConnection } from '@jupyterlab/services'; 25 | 26 | /* 27 | This class overrides items() to make the filebrowser list only ipynb files. 28 | We're doing this soley for the demo purpose, and the actual product may have additional files 29 | (e.g., local python modules) 30 | */ 31 | class NotebookOnlyFilterFileBrowserModel extends FilterFileBrowserModel { 32 | override items(): IterableIterator { 33 | return filter(super.items(), value => { 34 | if (value.type === 'notebook') { 35 | return true; 36 | } else { 37 | return false; 38 | } 39 | }); 40 | } 41 | } 42 | 43 | export class DataCleanRoomSources extends PanelWithToolbar { 44 | constructor(options: DataCleanRoomSources.IOptions) { 45 | super(); 46 | const { manager } = options; 47 | this._manager = manager; 48 | const trans = (options.translator ?? nullTranslator).load('jupyterlab'); 49 | this.title.label = trans.__('Sources'); 50 | 51 | const fbModel = new NotebookOnlyFilterFileBrowserModel({ 52 | manager: manager, 53 | }); 54 | this._browser = new FileBrowser({ 55 | id: 'jupyterlab_manatee:plugin:sources', 56 | model: fbModel 57 | }); 58 | this.toolbar.addItem( 59 | 'submit', 60 | new ToolbarButton({ 61 | icon: fileUploadIcon, 62 | onClick: () => this.sendSelectedFilesToAPI(), 63 | tooltip: trans.__('Submit Job to Data Clean Room') 64 | }) 65 | ); 66 | 67 | this.addWidget(this._browser); 68 | }; 69 | 70 | async sendSelectedFilesToAPI() { 71 | for (const item of this._browser.selectedItems()) { 72 | const result = await showDialog({ 73 | title: "Submitting a Job to Data Clean Room?", 74 | body: 'Path: ' + item.path, 75 | buttons: [Dialog.okButton(), Dialog.cancelButton()] 76 | }); 77 | 78 | if (result.button.accept) { 79 | const file = await this._manager.services.contents.get(item.path); 80 | // Prepare data 81 | const data = JSON.stringify({ 82 | path: item.path, 83 | filename: file.name 84 | }); 85 | 86 | const settings = ServerConnection.makeSettings(); 87 | 88 | console.log("Sending... %s", settings.baseUrl); 89 | ServerConnection.makeRequest(settings.baseUrl + "manatee/jobs", { 90 | body: data, method: "POST" 91 | }, settings).then(response => { 92 | if (response.status !== 200) { 93 | console.log("Error has occured!"); 94 | } 95 | response.body?.getReader().read().then(({done, value}) => { 96 | if (done) { 97 | console.log("stream is closed"); 98 | return; 99 | } 100 | let decoder = new TextDecoder('utf-8'); 101 | console.log("value:", decoder.decode(value)); 102 | }); 103 | }); 104 | } 105 | } 106 | } 107 | 108 | protected _manager : IDocumentManager; 109 | protected _browser : FileBrowser; 110 | } 111 | 112 | export namespace DataCleanRoomSources { 113 | export interface IOptions { 114 | manager: IDocumentManager; 115 | translator?: ITranslator; 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /app/jupyterlab_manatee/style/base.css: -------------------------------------------------------------------------------- 1 | /* 2 | See the JupyterLab Developer Guide for useful CSS Patterns: 3 | 4 | https://jupyterlab.readthedocs.io/en/stable/developer/css.html 5 | */ 6 | 7 | .jp-manatee-sidebar-view .lm-SplitPanel-child { 8 | overflow-y: auto; 9 | } 10 | -------------------------------------------------------------------------------- /app/jupyterlab_manatee/style/index.css: -------------------------------------------------------------------------------- 1 | @import url('base.css'); 2 | -------------------------------------------------------------------------------- /app/jupyterlab_manatee/style/index.js: -------------------------------------------------------------------------------- 1 | import './base.css'; 2 | -------------------------------------------------------------------------------- /app/jupyterlab_manatee/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "allowSyntheticDefaultImports": true, 4 | "composite": true, 5 | "declaration": true, 6 | "esModuleInterop": true, 7 | "incremental": true, 8 | "jsx": "react", 9 | "module": "esnext", 10 | "moduleResolution": "node", 11 | "noEmitOnError": true, 12 | "noImplicitAny": true, 13 | "noUnusedLocals": true, 14 | "preserveWatchOutput": true, 15 | "resolveJsonModule": true, 16 | "outDir": "lib", 17 | "rootDir": "src", 18 | "strict": true, 19 | "strictNullChecks": true, 20 | "target": "ES2018", 21 | "skipLibCheck": true, 22 | }, 23 | "include": ["src/*"] 24 | } 25 | -------------------------------------------------------------------------------- /app/jupyterlab_manatee/tsconfig.test.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig", 3 | "compilerOptions": { 4 | "types": ["jest"] 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /app/jupyterlab_manatee/ui-tests/README.md: -------------------------------------------------------------------------------- 1 | # Integration Testing 2 | 3 | This folder contains the integration tests of the extension. 4 | 5 | They are defined using [Playwright](https://playwright.dev/docs/intro) test runner 6 | and [Galata](https://github.com/jupyterlab/jupyterlab/tree/main/galata) helper. 7 | 8 | The Playwright configuration is defined in [playwright.config.js](./playwright.config.js). 9 | 10 | The JupyterLab server configuration to use for the integration test is defined 11 | in [jupyter_server_test_config.py](./jupyter_server_test_config.py). 12 | 13 | The default configuration will produce video for failing tests and an HTML report. 14 | 15 | > There is a new experimental UI mode that you may fall in love with; see [that video](https://www.youtube.com/watch?v=jF0yA-JLQW0). 16 | 17 | ## Run the tests 18 | 19 | > All commands are assumed to be executed from the root directory 20 | 21 | To run the tests, you need to: 22 | 23 | 1. Compile the extension: 24 | 25 | ```sh 26 | jlpm install 27 | jlpm build:prod 28 | ``` 29 | 30 | > Check the extension is installed in JupyterLab. 31 | 32 | 2. Install test dependencies (needed only once): 33 | 34 | ```sh 35 | cd ./ui-tests 36 | jlpm install 37 | jlpm playwright install 38 | cd .. 39 | ``` 40 | 41 | 3. Execute the [Playwright](https://playwright.dev/docs/intro) tests: 42 | 43 | ```sh 44 | cd ./ui-tests 45 | jlpm playwright test 46 | ``` 47 | 48 | Test results will be shown in the terminal. In case of any test failures, the test report 49 | will be opened in your browser at the end of the tests execution; see 50 | [Playwright documentation](https://playwright.dev/docs/test-reporters#html-reporter) 51 | for configuring that behavior. 52 | 53 | ## Update the tests snapshots 54 | 55 | > All commands are assumed to be executed from the root directory 56 | 57 | If you are comparing snapshots to validate your tests, you may need to update 58 | the reference snapshots stored in the repository. To do that, you need to: 59 | 60 | 1. Compile the extension: 61 | 62 | ```sh 63 | jlpm install 64 | jlpm build:prod 65 | ``` 66 | 67 | > Check the extension is installed in JupyterLab. 68 | 69 | 2. Install test dependencies (needed only once): 70 | 71 | ```sh 72 | cd ./ui-tests 73 | jlpm install 74 | jlpm playwright install 75 | cd .. 76 | ``` 77 | 78 | 3. Execute the [Playwright](https://playwright.dev/docs/intro) command: 79 | 80 | ```sh 81 | cd ./ui-tests 82 | jlpm playwright test -u 83 | ``` 84 | 85 | > Some discrepancy may occurs between the snapshots generated on your computer and 86 | > the one generated on the CI. To ease updating the snapshots on a PR, you can 87 | > type `please update playwright snapshots` to trigger the update by a bot on the CI. 88 | > Once the bot has computed new snapshots, it will commit them to the PR branch. 89 | 90 | ## Create tests 91 | 92 | > All commands are assumed to be executed from the root directory 93 | 94 | To create tests, the easiest way is to use the code generator tool of playwright: 95 | 96 | 1. Compile the extension: 97 | 98 | ```sh 99 | jlpm install 100 | jlpm build:prod 101 | ``` 102 | 103 | > Check the extension is installed in JupyterLab. 104 | 105 | 2. Install test dependencies (needed only once): 106 | 107 | ```sh 108 | cd ./ui-tests 109 | jlpm install 110 | jlpm playwright install 111 | cd .. 112 | ``` 113 | 114 | 3. Start the server: 115 | 116 | ```sh 117 | cd ./ui-tests 118 | jlpm start 119 | ``` 120 | 121 | 4. Execute the [Playwright code generator](https://playwright.dev/docs/codegen) in **another terminal**: 122 | 123 | ```sh 124 | cd ./ui-tests 125 | jlpm playwright codegen localhost:8888 126 | ``` 127 | 128 | ## Debug tests 129 | 130 | > All commands are assumed to be executed from the root directory 131 | 132 | To debug tests, a good way is to use the inspector tool of playwright: 133 | 134 | 1. Compile the extension: 135 | 136 | ```sh 137 | jlpm install 138 | jlpm build:prod 139 | ``` 140 | 141 | > Check the extension is installed in JupyterLab. 142 | 143 | 2. Install test dependencies (needed only once): 144 | 145 | ```sh 146 | cd ./ui-tests 147 | jlpm install 148 | jlpm playwright install 149 | cd .. 150 | ``` 151 | 152 | 3. Execute the Playwright tests in [debug mode](https://playwright.dev/docs/debug): 153 | 154 | ```sh 155 | cd ./ui-tests 156 | jlpm playwright test --debug 157 | ``` 158 | 159 | ## Upgrade Playwright and the browsers 160 | 161 | To update the web browser versions, you must update the package `@playwright/test`: 162 | 163 | ```sh 164 | cd ./ui-tests 165 | jlpm up "@playwright/test" 166 | jlpm playwright install 167 | ``` 168 | -------------------------------------------------------------------------------- /app/jupyterlab_manatee/ui-tests/jupyter_server_test_config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 TikTok Pte. Ltd. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Server configuration for integration tests. 16 | 17 | !! Never use this configuration in production because it 18 | opens the server to the world and provide access to JupyterLab 19 | JavaScript objects through the global window variable. 20 | """ 21 | from jupyterlab.galata import configure_jupyter_server 22 | 23 | configure_jupyter_server(c) 24 | 25 | # Uncomment to set server log level to debug level 26 | # c.ServerApp.log_level = "DEBUG" 27 | -------------------------------------------------------------------------------- /app/jupyterlab_manatee/ui-tests/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "jupyterlab_manatee-ui-tests", 3 | "version": "1.0.0", 4 | "description": "JupyterLab jupyterlab-manatee Integration Tests", 5 | "private": true, 6 | "scripts": { 7 | "start": "jupyter lab --config jupyter_server_test_config.py", 8 | "test": "jlpm playwright test", 9 | "test:update": "jlpm playwright test --update-snapshots" 10 | }, 11 | "devDependencies": { 12 | "@jupyterlab/galata": "^5.0.5", 13 | "@playwright/test": "^1.37.0" 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /app/jupyterlab_manatee/ui-tests/playwright.config.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Configuration for Playwright using default from @jupyterlab/galata 3 | */ 4 | const baseConfig = require('@jupyterlab/galata/lib/playwright-config'); 5 | 6 | module.exports = { 7 | ...baseConfig, 8 | webServer: { 9 | command: 'jlpm start', 10 | url: 'http://localhost:8888/lab', 11 | timeout: 120 * 1000, 12 | reuseExistingServer: !process.env.CI 13 | } 14 | }; 15 | -------------------------------------------------------------------------------- /app/jupyterlab_manatee/ui-tests/tests/jupyterlab_manatee.spec.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 TikTok Pte. Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import { expect, test } from '@jupyterlab/galata'; 18 | 19 | /** 20 | * Don't load JupyterLab webpage before running the tests. 21 | * This is required to ensure we capture all log messages. 22 | */ 23 | test.use({ autoGoto: false }); 24 | 25 | test('should emit an activation console message', async ({ page }) => { 26 | const logs: string[] = []; 27 | 28 | page.on('console', message => { 29 | logs.push(message.text()); 30 | }); 31 | 32 | await page.goto(); 33 | 34 | expect( 35 | logs.filter(s => s === 'JupyterLab extension jupyterlab-manatee is activated!') 36 | ).toHaveLength(1); 37 | }); 38 | -------------------------------------------------------------------------------- /app/jupyterlab_manatee/ui-tests/yarn.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manatee-project/manatee/07d4c7d5f6fb4f1d5e91252d30cc1f133467abdc/app/jupyterlab_manatee/ui-tests/yarn.lock -------------------------------------------------------------------------------- /app/reconciler/BUILD.bazel: -------------------------------------------------------------------------------- 1 | load("@rules_go//go:def.bzl", "go_binary", "go_library", "go_test") 2 | load("@rules_oci//oci:defs.bzl", "oci_image", "oci_load") 3 | load("@rules_pkg//pkg:tar.bzl", "pkg_tar") 4 | 5 | go_library( 6 | name = "reconciler_lib", 7 | srcs = [ 8 | "main.go", 9 | "reconciler.go", 10 | ], 11 | importpath = "github.com/manatee-project/manatee/app/reconciler", 12 | visibility = ["//visibility:private"], 13 | deps = [ 14 | "//app/api/biz/dal/db", 15 | "//app/api/biz/model/job", 16 | "//app/reconciler/imagebuilder", 17 | "//app/reconciler/registry", 18 | "//app/reconciler/tee_backend", 19 | "@com_github_cloudwego_hertz//pkg/common/hlog", 20 | ], 21 | ) 22 | 23 | go_binary( 24 | name = "reconciler", 25 | embed = [":reconciler_lib"], 26 | goarch = "amd64", 27 | goos = "linux", 28 | visibility = ["//visibility:public"], 29 | ) 30 | 31 | pkg_tar( 32 | name = "tar", 33 | srcs = [":reconciler"], 34 | ) 35 | 36 | oci_image( 37 | name = "image", 38 | base = "@distroless_base_linux_amd64", 39 | entrypoint = ["/reconciler"], 40 | tars = [ 41 | ":tar", 42 | ], 43 | visibility = ["//visibility:public"], 44 | ) 45 | 46 | oci_load( 47 | name = "load_image", 48 | image = ":image", 49 | repo_tags = ["reconciler:latest"], 50 | visibility = ["//visibility:public"], 51 | ) 52 | 53 | go_test( 54 | name = "reconciler_test", 55 | srcs = ["reconciler_test.go"], 56 | embed = [":reconciler_lib"], 57 | deps = [ 58 | "//app/api/biz/dal/db", 59 | "//app/api/biz/model/job", 60 | "//app/reconciler/imagebuilder", 61 | "@com_github_cloudwego_hertz//pkg/common/test/assert", 62 | "@io_gorm_gorm//:gorm", 63 | ], 64 | ) 65 | -------------------------------------------------------------------------------- /app/reconciler/imagebuilder/BUILD.bazel: -------------------------------------------------------------------------------- 1 | load("@rules_go//go:def.bzl", "go_library", "go_test") 2 | 3 | go_library( 4 | name = "imagebuilder", 5 | srcs = ["kaniko.go"], 6 | importpath = "github.com/manatee-project/manatee/app/reconciler/imagebuilder", 7 | visibility = ["//visibility:public"], 8 | deps = [ 9 | "//app/api/biz/dal/db", 10 | "@com_github_cloudwego_hertz//pkg/common/hlog", 11 | "@com_github_pkg_errors//:errors", 12 | "@io_k8s_api//batch/v1:batch", 13 | "@io_k8s_api//core/v1:core", 14 | "@io_k8s_apimachinery//pkg/api/resource", 15 | "@io_k8s_apimachinery//pkg/apis/meta/v1:meta", 16 | "@io_k8s_client_go//kubernetes", 17 | "@io_k8s_client_go//rest", 18 | ], 19 | ) 20 | 21 | go_test( 22 | name = "imagebuilder_test", 23 | srcs = ["kaniko_test.go"], 24 | embed = [":imagebuilder"], 25 | ) 26 | -------------------------------------------------------------------------------- /app/reconciler/imagebuilder/kaniko_test.go: -------------------------------------------------------------------------------- 1 | package imagebuilder 2 | 3 | import ( 4 | "bufio" 5 | "strings" 6 | "testing" 7 | ) 8 | 9 | func TestGetImageAndDigestFromLog(t *testing.T) { 10 | // Sample log line with a typical URL and digest 11 | logLine := "INFO[0242] Pushed example-registry.com/namespace/repository/image@sha256:1253099ce7721d3879373d411fc7938aef80000154c9c0455c2229497ed59336\n" 12 | expectedImage := "example-registry.com/namespace/repository/image@sha256:1253099ce7721d3879373d411fc7938aef80000154c9c0455c2229497ed59336" 13 | expectedDigest := "1253099ce7721d3879373d411fc7938aef80000154c9c0455c2229497ed59336" 14 | 15 | // Simulate a reader with the log line 16 | reader := bufio.NewReader(strings.NewReader(logLine)) 17 | 18 | // Instantiate the struct containing the function if needed 19 | b := KanikoImageBuilder{} 20 | 21 | // Call the function to test 22 | image, digest, err := b.getImageAndDigestFromLog(reader) 23 | if err != nil { 24 | t.Fatalf("Expected no error, but got %v", err) 25 | } 26 | 27 | // Verify the output matches the expected values 28 | if image != expectedImage { 29 | t.Errorf("Expected image %v, but got %v", expectedImage, image) 30 | } 31 | if digest != expectedDigest { 32 | t.Errorf("Expected digest %v, but got %v", expectedDigest, digest) 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /app/reconciler/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "time" 6 | 7 | "github.com/cloudwego/hertz/pkg/common/hlog" 8 | "github.com/manatee-project/manatee/app/api/biz/dal/db" 9 | ) 10 | 11 | func main() { 12 | 13 | ctx := context.Background() 14 | 15 | db.Init() 16 | 17 | reconciler := NewReconciler(ctx) 18 | 19 | for { 20 | hlog.Info("Reconciling...") 21 | reconciler.Reconcile(ctx) 22 | 23 | time.Sleep(10 * time.Second) 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /app/reconciler/registry/BUILD.bazel: -------------------------------------------------------------------------------- 1 | load("@rules_go//go:def.bzl", "go_library") 2 | 3 | go_library( 4 | name = "registry", 5 | srcs = ["registry.go"], 6 | importpath = "github.com/manatee-project/manatee/app/reconciler/registry", 7 | visibility = ["//visibility:public"], 8 | ) 9 | -------------------------------------------------------------------------------- /app/reconciler/registry/registry.go: -------------------------------------------------------------------------------- 1 | package registry 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | ) 7 | 8 | type Registry interface { 9 | Url() string 10 | BaseImage() string 11 | } 12 | 13 | type GoogleDockerRegistry struct { 14 | } 15 | 16 | func (g *GoogleDockerRegistry) Url() string { 17 | projectId := os.Getenv("PROJECT_ID") 18 | if projectId == "" { 19 | panic("PROJECT_ID environment variable is not present") 20 | } 21 | env := os.Getenv("ENV") 22 | if env == "" { 23 | panic("ENV environment variable is not present") 24 | } 25 | 26 | return fmt.Sprintf("us-docker.pkg.dev/%s/dcr-%s-user-images", projectId, env) 27 | } 28 | 29 | func (g *GoogleDockerRegistry) BaseImage() string { 30 | return fmt.Sprintf("%s/manatee-executor-base:latest", g.Url()) 31 | } 32 | 33 | type MinikubeDockerRegistry struct { 34 | } 35 | 36 | func (m *MinikubeDockerRegistry) Url() string { 37 | return "registry.kube-system.svc.cluster.local" 38 | } 39 | 40 | func (m *MinikubeDockerRegistry) BaseImage() string { 41 | return fmt.Sprintf("%s/executor:latest", m.Url()) 42 | } 43 | 44 | func GetRegistry() Registry { 45 | registryType := os.Getenv("REGISTRY_TYPE") 46 | if registryType == "" { 47 | registryType = "GCP" 48 | } 49 | var registry Registry 50 | if registryType == "GCP" { 51 | registry = &GoogleDockerRegistry{} 52 | } else if registryType == "MINIKUBE" { 53 | registry = &MinikubeDockerRegistry{} 54 | } 55 | return registry 56 | } 57 | -------------------------------------------------------------------------------- /app/reconciler/tee_backend/BUILD.bazel: -------------------------------------------------------------------------------- 1 | load("@rules_go//go:def.bzl", "go_library") 2 | 3 | go_library( 4 | name = "tee_backend", 5 | srcs = [ 6 | "confidential_space.go", 7 | "mock_teebackend.go", 8 | ], 9 | importpath = "github.com/manatee-project/manatee/app/reconciler/tee_backend", 10 | visibility = ["//visibility:public"], 11 | deps = [ 12 | "@com_github_cloudwego_hertz//pkg/common/hlog", 13 | "@com_github_pkg_errors//:errors", 14 | "@com_google_cloud_go_compute//apiv1", 15 | "@com_google_cloud_go_compute//apiv1/computepb", 16 | "@io_k8s_api//batch/v1:batch", 17 | "@io_k8s_api//core/v1:core", 18 | "@io_k8s_apimachinery//pkg/apis/meta/v1:meta", 19 | "@io_k8s_client_go//kubernetes", 20 | "@io_k8s_client_go//rest", 21 | "@org_golang_google_protobuf//proto", 22 | ], 23 | ) 24 | -------------------------------------------------------------------------------- /app/reconciler/tee_backend/mock_teebackend.go: -------------------------------------------------------------------------------- 1 | package tee_backend 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "os" 7 | "strings" 8 | 9 | "github.com/cloudwego/hertz/pkg/common/hlog" 10 | "github.com/pkg/errors" 11 | batchv1 "k8s.io/api/batch/v1" 12 | corev1 "k8s.io/api/core/v1" 13 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 14 | "k8s.io/client-go/kubernetes" 15 | "k8s.io/client-go/rest" 16 | ) 17 | 18 | type MockTeeBackend struct { 19 | ctx context.Context 20 | clientSet *kubernetes.Clientset 21 | namespace string 22 | } 23 | 24 | func NewMockTeeBackend(ctx context.Context) (*MockTeeBackend, error) { 25 | clusterConfig, err := rest.InClusterConfig() 26 | if err != nil { 27 | return nil, errors.Wrap(err, "failed to init cluster config") 28 | } 29 | 30 | clientSet, err := kubernetes.NewForConfig(clusterConfig) 31 | if err != nil { 32 | return nil, errors.Wrap(err, "failed to create client") 33 | } 34 | 35 | RunningNameSpaceByte, err := os.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/namespace") 36 | if err != nil { 37 | return nil, errors.Wrap(err, "failed to get namespace") 38 | } 39 | namespace := string(RunningNameSpaceByte) 40 | 41 | return &MockTeeBackend{ 42 | ctx: ctx, 43 | clientSet: clientSet, 44 | namespace: namespace, 45 | }, nil 46 | } 47 | 48 | func (m *MockTeeBackend) LaunchInstance(instanceName string, image string, digest string, extraEnvs map[string]string) error { 49 | ttlSecondsAfterFinished := int32(3600 * 3) 50 | var envs []corev1.EnvVar 51 | for key, value := range extraEnvs { 52 | envs = append(envs, corev1.EnvVar{ 53 | Name: key, 54 | Value: value, 55 | }) 56 | } 57 | envs = append(envs, corev1.EnvVar{ 58 | Name: "TEE_BACKEND", 59 | Value: os.Getenv("TEE_BACKEND"), 60 | }, 61 | ) 62 | mockTeeJob := &batchv1.Job{ 63 | ObjectMeta: metav1.ObjectMeta{ 64 | Name: instanceName, 65 | Namespace: m.namespace, 66 | }, 67 | Spec: batchv1.JobSpec{ 68 | TTLSecondsAfterFinished: &ttlSecondsAfterFinished, 69 | Template: corev1.PodTemplateSpec{ 70 | Spec: corev1.PodSpec{ 71 | ServiceAccountName: "dcr-k8s-pod-sa", 72 | Containers: []corev1.Container{ 73 | { 74 | Name: "mock-tee", 75 | Image: convertImageToLocal(image), 76 | Env: envs, 77 | }, 78 | }, 79 | RestartPolicy: "Never", 80 | }, 81 | }, 82 | }, 83 | } 84 | _, err := m.clientSet.BatchV1().Jobs(m.namespace).Create(m.ctx, mockTeeJob, metav1.CreateOptions{}) 85 | if err != nil { 86 | return errors.Wrap(err, "failed to create kubernetes job") 87 | } 88 | return nil 89 | } 90 | 91 | func (m *MockTeeBackend) CleanUpInstance(instanceName string) error { 92 | deletePolicy := metav1.DeletePropagationForeground 93 | if err := m.clientSet.BatchV1().Jobs(m.namespace).Delete(m.ctx, instanceName, metav1.DeleteOptions{ 94 | PropagationPolicy: &deletePolicy, 95 | }); err != nil { 96 | return errors.Wrap(err, "failed to delete job") 97 | } 98 | return nil 99 | } 100 | 101 | func (m *MockTeeBackend) GetInstanceStatus(instanceName string) (string, error) { 102 | teeJob, err := m.clientSet.BatchV1().Jobs(m.namespace).Get(m.ctx, instanceName, metav1.GetOptions{}) 103 | if err != nil { 104 | hlog.Errorf("[MockTeeBackend]failed to get mock tee job: %v", err) 105 | return "", errors.Wrap(err, "failed to get job") 106 | } 107 | hlog.Infof("[MockTeeBackend]mock tee name: %v, status: %v", teeJob.Name, teeJob.Status) 108 | if teeJob.Status.Active > 0 { 109 | return "RUNNING", nil 110 | } else { 111 | return "TERMINATED", nil 112 | } 113 | } 114 | 115 | func convertImageToLocal(imageName string) string { 116 | index := strings.Index(imageName, "/") 117 | if index == -1 { 118 | hlog.Errorf("[MockTeeBackend]failed to find / in image name") 119 | return "" 120 | } 121 | return fmt.Sprintf("localhost:5000/%s", imageName[index+1:]) 122 | } 123 | -------------------------------------------------------------------------------- /deployment/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2024 TikTok Pte. Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | set -e 16 | 17 | debug=false 18 | 19 | for arg in "$@" 20 | do 21 | case $arg in 22 | --namespace=*) 23 | # If we find an argument --namespace=something, split the string into a name/value array. 24 | IFS='=' read -ra NAMESPACE <<< "$arg" 25 | # Assign the second element of the array (the value of the --namespace argument) to our variable. 26 | namespace="${NAMESPACE[1]}" 27 | ;; 28 | --debug=*) 29 | IFS='=' read -ra DEBUG <<< "$arg" 30 | debug="${DEBUG[1]}" 31 | ;; 32 | esac 33 | done 34 | 35 | 36 | if [ -z "$namespace" ]; then 37 | echo "Error: the namespace parameter is required, run the script again like ./apply.sh --namespace=" 38 | exit 1 39 | fi 40 | 41 | deploy_service() { 42 | app=$1 43 | pushd $app 44 | ./deploy.sh $2 $3 45 | popd 46 | } 47 | 48 | deploy_service manatee $namespace $debug 49 | deploy_service jupyterhub $namespace 50 | -------------------------------------------------------------------------------- /deployment/jupyterhub/config.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2024 TikTok Pte. Ltd. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | singleuser: 16 | image: 17 | name: us-docker.pkg.dev/${project_id}/${artifact_repo_docker}/datascience-notebook-with-dcr 18 | tag: ${tag} 19 | pullPolicy: Always 20 | cmd: null 21 | nodeSelector: { iam.gke.io/gke-metadata-server-enabled: "true"} 22 | serviceAccountName: $single_user_pod_sa 23 | extraEnv: 24 | NOTEBOOK_ARGS: '--NotebookApp.terminals_enabled=False --NotebookApp.allow_root=False' 25 | JUPYTERHUB_SINGLEUSER_APP: "jupyter_server.serverapp.ServerApp" 26 | DATA_CLEAN_ROOM_HOST: "$api" 27 | DEPLOYMENT_ENV: "$env" 28 | PROJECT_ID: "$project_id" 29 | KEY_LOCALTION: "$region" 30 | networkPolicy: 31 | egressAllowRules: 32 | cloudMetadataServer: true 33 | cloudMetadata: 34 | blockWithIptables: false -------------------------------------------------------------------------------- /deployment/jupyterhub/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2024 TikTok Pte. Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | VAR_FILE="../../env.bzl" 17 | if [ ! -f "$VAR_FILE" ]; then 18 | echo "Error: Variables file does not exist." 19 | exit 1 20 | fi 21 | 22 | VAR_FILE=$(realpath $VAR_FILE) 23 | source $VAR_FILE 24 | 25 | if [ -z "$1" ] 26 | then 27 | echo "Error: No namespace argument supplied." 28 | exit 1 29 | fi 30 | namespace=$1 31 | 32 | tag="latest" 33 | helm_name="jupyterhub-helm-$namespace" 34 | api="http://manatee.$namespace.svc.cluster.local" 35 | 36 | service_account="jupyter-k8s-pod-sa" 37 | docker_repo="dcr-${env}-${namespace}-images" 38 | docker_reference="us-docker.pkg.dev/${project_id}/${docker_repo}/manatee-jupyterlab-singleuser" 39 | 40 | helm repo add jupyterhub https://hub.jupyter.org/helm-chart/ 41 | helm repo update 42 | 43 | helm upgrade --cleanup-on-fail \ 44 | --set singleuser.image.name=${docker_reference} \ 45 | --set singleuser.image.tag=${tag} \ 46 | --set singleuser.serviceAccountName=${service_account} \ 47 | --set singleuser.extraEnv.DATA_CLEAN_ROOM_HOST=${api} \ 48 | --set singleuser.extraEnv.EXECUTION_STAGE='"1"' \ 49 | --set singleuser.extraEnv.MANATEE_EXTRA_ENV_EXECUTION_STAGE='"2"' \ 50 | --set singleuser.extraEnv.DEPLOYMENT_ENV=${env} \ 51 | --set singleuser.extraEnv.PROJECT_ID=${project_id} \ 52 | --set singleuser.extraEnv.KEY_LOCALTION=${region} \ 53 | --set singleuser.networkPolicy.enabled=false \ 54 | --set singleuser.storage.capacity=20Gi \ 55 | --install $helm_name jupyterhub/jupyterhub \ 56 | --namespace ${namespace} \ 57 | --version=3.0.3 \ 58 | --values config.yaml 59 | 60 | echo "Deployment Completed." 61 | echo "Try 'kubectl --namespace=$namespace get service proxy-public' to obtain external IP" 62 | -------------------------------------------------------------------------------- /deployment/manatee/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /deployment/manatee/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: manatee-chart 3 | description: A Helm chart for Kubernetes 4 | 5 | # A chart can be either an 'application' or a 'library' chart. 6 | # 7 | # Application charts are a collection of templates that can be packaged into versioned archives 8 | # to be deployed. 9 | # 10 | # Library charts provide useful utilities or functions for the chart developer. They're included as 11 | # a dependency of application charts to inject those utilities and functions into the rendering 12 | # pipeline. Library charts do not define any templates and therefore cannot be deployed. 13 | type: application 14 | 15 | # This is the chart version. This version number should be incremented each time you make changes 16 | # to the chart and its templates, including the app version. 17 | # Versions are expected to follow Semantic Versioning (https://semver.org/) 18 | version: 0.1.0 19 | 20 | # This is the version number of the application being deployed. This version number should be 21 | # incremented each time you make changes to the application. Versions are not expected to 22 | # follow Semantic Versioning. They should reflect the version the application is using. 23 | # It is recommended to use it with quotes. 24 | appVersion: "1.16.0" 25 | -------------------------------------------------------------------------------- /deployment/manatee/config.yaml: -------------------------------------------------------------------------------- 1 | cloudSql: 2 | connection_name: $connection_name 3 | serviceAccount: 4 | create: false 5 | name: $service_account 6 | apiImage: 7 | repository: $api_reference 8 | tag: $tag 9 | monitorImage: 10 | repository: $monitor_reference 11 | tag: $tag 12 | nodeSelector: { iam.gke.io/gke-metadata-server-enabled: "true"} 13 | mysql: 14 | host: localhost 15 | port: 9910 16 | namespace: "" -------------------------------------------------------------------------------- /deployment/manatee/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2024 TikTok Pte. Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | VAR_FILE="../../env.bzl" 17 | if [ ! -f "$VAR_FILE" ]; then 18 | echo "Error: Variables file does not exist." 19 | exit 1 20 | fi 21 | 22 | VAR_FILE=$(realpath $VAR_FILE) 23 | source $VAR_FILE 24 | 25 | if [ -z "$1" ] 26 | then 27 | echo "Error: No namespace argument supplied." 28 | exit 1 29 | fi 30 | namespace=$1 31 | debug=$2 32 | tag="latest" 33 | helm_name="manatee-helm" 34 | 35 | connection_name="${project_id}:${region}:dcr-${env}-db-instance" 36 | service_account="dcr-k8s-pod-sa" 37 | docker_repo="dcr-${env}-${namespace}-images" 38 | api_docker_reference="us-docker.pkg.dev/${project_id}/${docker_repo}/manatee-api" 39 | monitor_docker_reference="us-docker.pkg.dev/${project_id}/${docker_repo}/manatee-reconciler" 40 | 41 | helm upgrade --cleanup-on-fail \ 42 | --set apiImage.repository=${api_docker_reference} \ 43 | --set apiImage.tag=${tag} \ 44 | --set monitorImage.repository=${monitor_docker_reference} \ 45 | --set monitorImage.tag=${tag} \ 46 | --set serviceAccount.name=${service_account} \ 47 | --set cloudSql.connection_name=${connection_name} \ 48 | --set namespace=${namespace} \ 49 | --set config.env=${env} \ 50 | --set config.projectId=${project_id} \ 51 | --set config.zone=${zone} \ 52 | --set config.region=${region} \ 53 | --set config.debug=${debug} \ 54 | --install $helm_name ./ \ 55 | --namespace $namespace \ 56 | --values config.yaml 57 | -------------------------------------------------------------------------------- /deployment/manatee/templates/NOTES.txt: -------------------------------------------------------------------------------- 1 | 1. Get the application URL by running these commands: 2 | {{- if .Values.ingress.enabled }} 3 | {{- range $host := .Values.ingress.hosts }} 4 | {{- range .paths }} 5 | http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }} 6 | {{- end }} 7 | {{- end }} 8 | {{- else if contains "NodePort" .Values.service.type }} 9 | export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "manatee-chart.fullname" . }}) 10 | export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") 11 | echo http://$NODE_IP:$NODE_PORT 12 | {{- else if contains "LoadBalancer" .Values.service.type }} 13 | NOTE: It may take a few minutes for the LoadBalancer IP to be available. 14 | You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "manatee-chart.fullname" . }}' 15 | export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "manatee-chart.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") 16 | echo http://$SERVICE_IP:{{ .Values.service.port }} 17 | {{- else if contains "ClusterIP" .Values.service.type }} 18 | export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "manatee-chart.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") 19 | export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") 20 | echo "Visit http://127.0.0.1:8080 to use your application" 21 | kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT 22 | {{- end }} 23 | -------------------------------------------------------------------------------- /deployment/manatee/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* 2 | Expand the name of the chart. 3 | */}} 4 | {{- define "manatee-chart.name" -}} 5 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} 6 | {{- end }} 7 | 8 | {{/* 9 | Create a default fully qualified app name. 10 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 11 | If release name contains chart name it will be used as a full name. 12 | */}} 13 | {{- define "manatee-chart.fullname" -}} 14 | {{- if .Values.fullnameOverride }} 15 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} 16 | {{- else }} 17 | {{- $name := default .Chart.Name .Values.nameOverride }} 18 | {{- if contains $name .Release.Name }} 19 | {{- .Release.Name | trunc 63 | trimSuffix "-" }} 20 | {{- else }} 21 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} 22 | {{- end }} 23 | {{- end }} 24 | {{- end }} 25 | 26 | {{/* 27 | Create chart name and version as used by the chart label. 28 | */}} 29 | {{- define "manatee-chart.chart" -}} 30 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} 31 | {{- end }} 32 | 33 | {{/* 34 | Common labels 35 | */}} 36 | {{- define "manatee-chart.labels" -}} 37 | helm.sh/chart: {{ include "manatee-chart.chart" . }} 38 | {{ include "manatee-chart.selectorLabels" . }} 39 | {{- if .Chart.AppVersion }} 40 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} 41 | {{- end }} 42 | app.kubernetes.io/managed-by: {{ .Release.Service }} 43 | {{- end }} 44 | 45 | {{/* 46 | Selector labels 47 | */}} 48 | {{- define "manatee-chart.selectorLabels" -}} 49 | app.kubernetes.io/name: {{ include "manatee-chart.name" . }} 50 | app.kubernetes.io/instance: {{ .Release.Name }} 51 | {{- end }} 52 | 53 | {{/* 54 | Create the name of the service account to use 55 | */}} 56 | {{- define "manatee-chart.serviceAccountName" -}} 57 | {{- if .Values.serviceAccount.create }} 58 | {{- default (include "manatee-chart.fullname" .) .Values.serviceAccount.name }} 59 | {{- else }} 60 | {{- default "default" .Values.serviceAccount.name }} 61 | {{- end }} 62 | {{- end }} 63 | -------------------------------------------------------------------------------- /deployment/manatee/templates/configmap.yaml: -------------------------------------------------------------------------------- 1 | # configmap.yaml 2 | apiVersion: v1 3 | kind: ConfigMap 4 | metadata: 5 | name: manatee-configmap 6 | data: 7 | env: {{ .Values.config.env | quote }} 8 | projectId: {{ .Values.config.projectId | quote }} 9 | zone: {{ .Values.config.zone | quote }} 10 | region: {{ .Values.config.region | quote }} 11 | debug: {{ .Values.config.debug | quote }} 12 | teeBackend: {{.Values.config.teeBackend | quote }} 13 | registryType: {{.Values.config.registryType | quote }} 14 | storageType: {{.Values.config.storageType | quote }} 15 | minioEndpoint: {{ .Values.config.minioEndpoint | quote }} 16 | minioAccessKey: {{ .Values.config.minioAccessKey | quote }} 17 | minioSecretKey: {{ .Values.config.minioSecretKey | quote }} 18 | minioRegion: {{ .Values.config.minioRegion | quote }} -------------------------------------------------------------------------------- /deployment/manatee/templates/ingress.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.ingress.enabled -}} 2 | {{- $fullName := include "manatee-chart.fullname" . -}} 3 | {{- $svcPort := .Values.service.port -}} 4 | {{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }} 5 | {{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }} 6 | {{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}} 7 | {{- end }} 8 | {{- end }} 9 | {{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}} 10 | apiVersion: networking.k8s.io/v1 11 | {{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}} 12 | apiVersion: networking.k8s.io/v1beta1 13 | {{- else -}} 14 | apiVersion: extensions/v1beta1 15 | {{- end }} 16 | kind: Ingress 17 | metadata: 18 | name: {{ $fullName }} 19 | labels: 20 | {{- include "manatee-chart.labels" . | nindent 4 }} 21 | {{- with .Values.ingress.annotations }} 22 | annotations: 23 | {{- toYaml . | nindent 4 }} 24 | {{- end }} 25 | spec: 26 | {{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }} 27 | ingressClassName: {{ .Values.ingress.className }} 28 | {{- end }} 29 | {{- if .Values.ingress.tls }} 30 | tls: 31 | {{- range .Values.ingress.tls }} 32 | - hosts: 33 | {{- range .hosts }} 34 | - {{ . | quote }} 35 | {{- end }} 36 | secretName: {{ .secretName }} 37 | {{- end }} 38 | {{- end }} 39 | rules: 40 | {{- range .Values.ingress.hosts }} 41 | - host: {{ .host | quote }} 42 | http: 43 | paths: 44 | {{- range .paths }} 45 | - path: {{ .path }} 46 | {{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }} 47 | pathType: {{ .pathType }} 48 | {{- end }} 49 | backend: 50 | {{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }} 51 | service: 52 | name: {{ $fullName }} 53 | port: 54 | number: {{ $svcPort }} 55 | {{- else }} 56 | serviceName: {{ $fullName }} 57 | servicePort: {{ $svcPort }} 58 | {{- end }} 59 | {{- end }} 60 | {{- end }} 61 | {{- end }} 62 | -------------------------------------------------------------------------------- /deployment/manatee/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ include "manatee-chart.fullname" . }} 5 | labels: 6 | {{- include "manatee-chart.labels" . | nindent 4 }} 7 | spec: 8 | type: {{ .Values.service.type }} 9 | ports: 10 | - port: {{ .Values.service.port }} 11 | targetPort: http 12 | protocol: TCP 13 | selector: 14 | {{- include "manatee-chart.selectorLabels" . | nindent 4 }} 15 | -------------------------------------------------------------------------------- /deployment/manatee/templates/tests/test-connection.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: "{{ include "manatee-chart.fullname" . }}-test-connection" 5 | labels: 6 | {{- include "manatee-chart.labels" . | nindent 4 }} 7 | annotations: 8 | "helm.sh/hook": test 9 | spec: 10 | containers: 11 | - name: wget 12 | image: busybox 13 | command: ['wget'] 14 | args: ['{{ include "manatee-chart.fullname" . }}:{{ .Values.service.port }}'] 15 | restartPolicy: Never 16 | -------------------------------------------------------------------------------- /deployment/manatee/values.yaml: -------------------------------------------------------------------------------- 1 | # Default values for manatee-chart. 2 | # This is a YAML-formatted file. 3 | # Declare variables to be passed into your templates. 4 | 5 | replicaCount: 1 6 | 7 | apiImage: 8 | repository: "" 9 | pullPolicy: Always 10 | # Overrides the image tag whose default is the chart appVersion. 11 | tag: "" 12 | 13 | monitorImage: 14 | repository: "" 15 | pullPolicy: Always 16 | # Overrides the image tag whose default is the chart appVersion. 17 | tag: "" 18 | 19 | 20 | imagePullSecrets: [] 21 | nameOverride: "manatee" 22 | fullnameOverride: "manatee" 23 | 24 | serviceAccount: 25 | # Specifies whether a service account should be created 26 | create: false 27 | # Automatically mount a ServiceAccount's API credentials? 28 | automount: true 29 | # Annotations to add to the service account 30 | annotations: {} 31 | # The name of the service account to use. 32 | # If not set and create is true, a name is generated using the fullname template 33 | name: "" 34 | 35 | podAnnotations: {} 36 | podLabels: {} 37 | 38 | podSecurityContext: {} 39 | # fsGroup: 2000 40 | 41 | securityContext: {} 42 | # capabilities: 43 | # drop: 44 | # - ALL 45 | # readOnlyRootFilesystem: true 46 | # runAsNonRoot: true 47 | # runAsUser: 1000 48 | 49 | service: 50 | type: ClusterIP 51 | port: 80 52 | 53 | api: 54 | port: 8080 55 | 56 | ingress: 57 | enabled: false 58 | className: "" 59 | annotations: {} 60 | # kubernetes.io/ingress.class: nginx 61 | # kubernetes.io/tls-acme: "true" 62 | hosts: 63 | - host: chart-example.local 64 | paths: 65 | - path: / 66 | pathType: ImplementationSpecific 67 | tls: [] 68 | # - secretName: chart-example-tls 69 | # hosts: 70 | # - chart-example.local 71 | 72 | resources: {} 73 | # We usually recommend not to specify default resources and to leave this as a conscious 74 | # choice for the user. This also increases chances charts run on environments with little 75 | # resources, such as Minikube. If you do want to specify resources, uncomment the following 76 | # lines, adjust them as necessary, and remove the curly braces after 'resources:'. 77 | # limits: 78 | # cpu: 100m 79 | # memory: 128Mi 80 | # requests: 81 | # cpu: 100m 82 | # memory: 128Mi 83 | 84 | autoscaling: 85 | enabled: false 86 | minReplicas: 1 87 | maxReplicas: 100 88 | targetCPUUtilizationPercentage: 80 89 | # targetMemoryUtilizationPercentage: 80 90 | 91 | # Additional volumes on the output Deployment definition. 92 | volumes: [] 93 | # - name: foo 94 | # secret: 95 | # secretName: mysecret 96 | # optional: false 97 | 98 | # Additional volumeMounts on the output Deployment definition. 99 | volumeMounts: [] 100 | # - name: foo 101 | # mountPath: "/etc/foo" 102 | # readOnly: true 103 | 104 | nodeSelector: {} 105 | 106 | tolerations: [] 107 | 108 | affinity: {} 109 | 110 | cloudSql: 111 | connection_name: "" 112 | 113 | mysql: 114 | host: "localhost" 115 | port: "9910" 116 | 117 | useMinikube: false 118 | 119 | # every minute 120 | schedule: "*/1 * * * *" 121 | 122 | namespace: "" 123 | 124 | config: 125 | env: "" 126 | projectId: "" 127 | zone: "" 128 | region: "" 129 | debug: "false" 130 | teeBackend: "GCP" 131 | storageType: "GCP" 132 | registryType: "GCP" 133 | minioEndpoint: "" 134 | minioAccessKey: "" 135 | minioSecretKey: "" 136 | minioRegion: "us" 137 | -------------------------------------------------------------------------------- /deployment/minikube/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2024 TikTok Pte. Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | env="minikube" 17 | namespace="manatee" 18 | project_id="mock-gcp-project-id" 19 | region="us-west2" 20 | zone="us-west2-a" 21 | helm_name="manatee-helm" 22 | eval $(minikube docker-env) 23 | kubectl apply -f mysql-deployment.yaml -n $namespace 24 | kubectl apply -f mysql-service.yaml -n $namespace 25 | kubectl apply -f minio-dev.yaml 26 | # deploy dcr api 27 | helm upgrade --cleanup-on-fail \ 28 | --set apiImage.repository=docker.io/library/api \ 29 | --set apiImage.tag=latest \ 30 | --set apiImage.pullPolicy=Never \ 31 | --set monitorImage.repository=docker.io/library/reconciler \ 32 | --set monitorImage.tag=latest \ 33 | --set monitorImage.pullPolicy=Never \ 34 | --set serviceAccount.name=dcr-k8s-pod-sa \ 35 | --set serviceAccount.create=false \ 36 | --set cloudSql.connection_name="" \ 37 | --set namespace=${namespace} \ 38 | --set config.env=${env} \ 39 | --set config.projectId=${project_id} \ 40 | --set config.zone=${zone} \ 41 | --set config.region=${region} \ 42 | --set config.debug=true \ 43 | --set config.teeBackend=MOCK \ 44 | --set config.registryType=MINIKUBE \ 45 | --set config.storageType=MINIO \ 46 | --set config.minioSecretKey=minioadmin \ 47 | --set config.minioAccessKey=minioadmin \ 48 | --set config.minioEndpoint=minio-service:9000 \ 49 | --set mysql.host=mysql-service \ 50 | --set mysql.port=3306 \ 51 | --set useMinikube=true \ 52 | --install $helm_name ../manatee \ 53 | --namespace $namespace 54 | 55 | helm repo add jupyterhub https://hub.jupyter.org/helm-chart/ 56 | helm repo update 57 | 58 | service_account="jupyter-k8s-pod-sa" 59 | helm_name="jupyterhub-helm" 60 | api="http://manatee.$namespace.svc.cluster.local" 61 | 62 | helm upgrade --cleanup-on-fail \ 63 | --set singleuser.image.name=docker.io/library/jupyterlab_manatee \ 64 | --set singleuser.image.tag=latest \ 65 | --set singleuser.image.pullPolicy=Never \ 66 | --set singleuser.serviceAccountName=${service_account} \ 67 | --set singleuser.extraEnv.DATA_CLEAN_ROOM_HOST=${api} \ 68 | --set singleuser.extraEnv.DEPLOYMENT_ENV=${env} \ 69 | --set singleuser.extraEnv.PROJECT_ID=${project_id} \ 70 | --set singleuser.extraEnv.KEY_LOCALTION=${region} \ 71 | --set singleuser.networkPolicy.enabled=false \ 72 | --set singleuser.nodeSelector=null \ 73 | --set prePuller.continuous.enabled=false \ 74 | --set prePuller.hook.enabled=false \ 75 | --install $helm_name jupyterhub/jupyterhub \ 76 | --namespace ${namespace} \ 77 | --version=3.0.3 \ 78 | --values ../jupyterhub/config.yaml 79 | 80 | echo "Deployment Completed." 81 | echo "Try 'kubectl --namespace=$namespace get service proxy-public' to obtain external IP" 82 | -------------------------------------------------------------------------------- /deployment/minikube/minio-dev.yaml: -------------------------------------------------------------------------------- 1 | # Deploys a new MinIO Pod into the metadata.namespace Kubernetes namespace 2 | # 3 | # The `spec.containers[0].args` contains the command run on the pod 4 | # The `/data` directory corresponds to the `spec.containers[0].volumeMounts[0].mountPath` 5 | # That mount path corresponds to a Kubernetes HostPath which binds `/data` to a local drive or volume on the worker node where the pod runs 6 | # 7 | apiVersion: v1 8 | kind: Pod 9 | metadata: 10 | labels: 11 | app: minio 12 | name: minio 13 | namespace: manatee # Change this value to match the namespace metadata.name 14 | spec: 15 | containers: 16 | - name: minio 17 | image: quay.io/minio/minio:latest 18 | command: 19 | - /bin/bash 20 | - -c 21 | args: 22 | - minio server /data --console-address :9090 23 | volumeMounts: 24 | - mountPath: /data 25 | name: localvolume # Corresponds to the `spec.volumes` Persistent Volume 26 | volumes: 27 | - name: localvolume 28 | hostPath: # MinIO generally recommends using locally-attached volumes 29 | path: /mnt/disk1/data # Specify a path to a local drive or volume on the Kubernetes worker node 30 | type: DirectoryOrCreate # The path to the last directory must exist 31 | --- 32 | # Deploys a new MinIO Service into the metadata.namespace Kubernetes namespace 33 | apiVersion: v1 34 | kind: Service 35 | metadata: 36 | name: minio-service 37 | namespace: manatee 38 | spec: 39 | type: NodePort 40 | ports: 41 | - port: 9000 42 | targetPort: 9000 43 | selector: 44 | app: minio -------------------------------------------------------------------------------- /deployment/minikube/mysql-deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: mysql-deployment 5 | spec: 6 | replicas: 1 7 | selector: 8 | matchLabels: 9 | app: mysql-server 10 | template: 11 | metadata: 12 | labels: 13 | app: mysql-server 14 | spec: 15 | containers: 16 | - name: mysql 17 | image: mysql:8.0 18 | env: 19 | - name: MYSQL_DATABASE 20 | valueFrom: 21 | secretKeyRef: 22 | name: mysql-secret 23 | key: mysql-database 24 | - name: MYSQL_USER 25 | valueFrom: 26 | secretKeyRef: 27 | name: mysql-secret 28 | key: mysql-username 29 | - name: MYSQL_PASSWORD 30 | valueFrom: 31 | secretKeyRef: 32 | name: mysql-secret 33 | key: mysql-password 34 | - name: MYSQL_RANDOM_ROOT_PASSWORD 35 | value: "yes" 36 | ports: 37 | - containerPort: 3306 -------------------------------------------------------------------------------- /deployment/minikube/mysql-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: mysql-service 5 | spec: 6 | type: NodePort 7 | ports: 8 | - port: 3306 9 | targetPort: 3306 10 | selector: 11 | app: mysql-server -------------------------------------------------------------------------------- /docs/assets/img/arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manatee-project/manatee/07d4c7d5f6fb4f1d5e91252d30cc1f133467abdc/docs/assets/img/arch.png -------------------------------------------------------------------------------- /docs/assets/img/jobs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manatee-project/manatee/07d4c7d5f6fb4f1d5e91252d30cc1f133467abdc/docs/assets/img/jobs.png -------------------------------------------------------------------------------- /docs/assets/img/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manatee-project/manatee/07d4c7d5f6fb4f1d5e91252d30cc1f133467abdc/docs/assets/img/logo.png -------------------------------------------------------------------------------- /docs/assets/img/manatee-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manatee-project/manatee/07d4c7d5f6fb4f1d5e91252d30cc1f133467abdc/docs/assets/img/manatee-architecture.png -------------------------------------------------------------------------------- /docs/assets/img/manatee-white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manatee-project/manatee/07d4c7d5f6fb4f1d5e91252d30cc1f133467abdc/docs/assets/img/manatee-white.png -------------------------------------------------------------------------------- /docs/assets/img/manatee.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manatee-project/manatee/07d4c7d5f6fb4f1d5e91252d30cc1f133467abdc/docs/assets/img/manatee.png -------------------------------------------------------------------------------- /docs/assets/img/plugin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manatee-project/manatee/07d4c7d5f6fb4f1d5e91252d30cc1f133467abdc/docs/assets/img/plugin.png -------------------------------------------------------------------------------- /docs/assets/img/stage-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manatee-project/manatee/07d4c7d5f6fb4f1d5e91252d30cc1f133467abdc/docs/assets/img/stage-1.png -------------------------------------------------------------------------------- /docs/assets/img/two-stage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manatee-project/manatee/07d4c7d5f6fb4f1d5e91252d30cc1f133467abdc/docs/assets/img/two-stage.png -------------------------------------------------------------------------------- /docs/assets/img/unzip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manatee-project/manatee/07d4c7d5f6fb4f1d5e91252d30cc1f133467abdc/docs/assets/img/unzip.png -------------------------------------------------------------------------------- /docs/blog/index.md: -------------------------------------------------------------------------------- 1 | # Blog 2 | 3 | -------------------------------------------------------------------------------- /docs/blog/posts/2025-01-community-release.md: -------------------------------------------------------------------------------- 1 | --- 2 | date: 2025-01-07 3 | --- 4 | 5 | # First Community Release of ManaTEE 6 | 7 | We are thrilled to announce the first public community release of ManaTEE, an open-source framework for private data analytics. 8 | ManaTEE was introduced as a [key privacy solution](https://developers.tiktok.com/blog/privacygo-data-clean-room-open-source) for private data collaboration at TikTok, which built [one of its products](https://developers.tiktok.com/doc/vce-getting-started) on top of this solution. The team decided to improve and consolidate the solution by open-sourcing it. 9 | To further its momentum as an open-source private data analytics framework, TikTok has [donated the project](https://developers.tiktok.com/blog/tiktok-open-source-project-donation-manatee) to the Confidential Computing Consortium under the Linux Foundation. 10 | After months of development, testing, and refinement, we’re excited to share this project with the broader community. 11 | 12 | ## What is New? 13 | 14 | In the community release, we are providing the following features: 15 | 16 | * [Test deployment in minikube](../../getting-started/minikube.md) without cloud accounts (e.g., GCP) 17 | * Full [tutorial](../../getting-started/tutorials.md) to reproduce the demo 18 | 19 | We also worked hard to refactor the code, to make it much more extensible. It now leverages Bazel for hermetic and reproducible builds, and has a basic CI/CD pipeline setup. The project is now ready to get contribution from the community! 20 | 21 | ## What's Next? 22 | 23 | This is just the beginning. There are still many work to be done, such as: 24 | 25 | * **Diverse backend support**: ManaTEE currently only supports Google Confidential Space as the TEE backend, but different use cases may need diifferent backend. For example, some may want to use an on-prem TEE cluster, or a different cloud. Some might even want to deploy the system in multiple clouds. 26 | * **Integrated data pipeline**: One of the big challenge for organizations to share data is to process or filter the data to protect privacy and maintain data compliance. To ensure end-to-end data privacy, the data management should be closely integrated with the framework that consumes the data. 27 | * **Output privacy**: Although TEE provides data privacy during execution, the outputs of the execution needs extra efforts to protect data privacy. 28 | * **Support for confidential GPUs**: Data analytics these days often rely on large AI models requiring hardware accelerators such as GPUs. Now that confidential GPUs are readily available, we are ready to support GPU workloads seemlessly in ManaTEE framework. 29 | 30 | We are in the process of forming a Technical Steering Committee (TSC) to govern the project and drive its roadmap. Stay tuned for more updates in future posts. 31 | 32 | ## Join Us 33 | 34 | We’d love your feedback to help shape the future of ManaTEE and private data research framework. 35 | Please feel free to open issues, contribute code, or suggest ideas on GitHub. Please subscribe to our [mailing list](https://groups.google.com/u/1/g/manatee-project) for updates, too! -------------------------------------------------------------------------------- /docs/developer/architecture.md: -------------------------------------------------------------------------------- 1 | # Architecture 2 | 3 | -------------------------------------------------------------------------------- /docs/getting-started/building.md: -------------------------------------------------------------------------------- 1 | # Building 2 | 3 | ManaTEE uses [Bazel](https://bazel.build/install) for hermetic builds. 4 | Bazel is aware of all required tools and dependencies, thus building images is as easy as: 5 | 6 | ``` 7 | bazel build //... 8 | ``` 9 | 10 | Find individual rules from corresponding `BUILD.bazel` files. 11 | 12 | ## Components 13 | 14 | `app` directory contains the source codes of the data clean room which has three components: 15 | 16 | * `executor` contains tools that are used in the base image of stage2 such as a tool generates custom attestation report within GCP confidential space. 17 | * `api` is the backend service of the data clean room that processes the request from jupyterlab. 18 | * `reconciler` is a reconciler that monitors in-progress jobs and take actions. 19 | * `jupyterlab_manatee` is an JupyterLab extension for data clean room that submits a job on the fronted and queries the status of the jobs. 20 | 21 | ## Loading Container Images 22 | 23 | If you'd like to load the images in your local container runtime (e.g., Docker), you can use `oci_load` rules. 24 | 25 | ```shell 26 | bazel query 'kind("oci_load", "//app/...")' | xargs -n1 bazel run 27 | ``` 28 | 29 | # Testing 30 | 31 | To run all tests, run: 32 | 33 | ``` 34 | bazel test //... 35 | ``` -------------------------------------------------------------------------------- /docs/getting-started/deployment.md: -------------------------------------------------------------------------------- 1 | # GCP Deployment 2 | 3 | ## Prerequisites 4 | 5 | Currently, ManaTEE requires Google Cloud Platform (GCP) for deployment, as it requires cloud-provided TEE. 6 | In the future, we will support more cloud backends as well as local test deployment (See [#31](https://github.com/manatee-project/manatee/issues/31)). 7 | 8 | Because of the cloud resource requirement, we recommend a cloud admin to create all the resources by following the steps below. 9 | 10 | ### Cloud Setup 11 | 12 | * A valid GCP account that has ability to create/destroy resources. For a GCP project, please enable the following apis: 13 | - serviceusage.googleapis.com 14 | - compute.googleapis.com 15 | - container.googleapis.com 16 | - cloudkms.googleapis.com 17 | - servicenetworking.googleapis.com 18 | - cloudresourcemanager.googleapis.com 19 | - sqladmin.googleapis.com 20 | - confidentialcomputing.googleapis.com 21 | 22 | ### Tools 23 | * [Gcloud CLI](https://cloud.google.com/sdk/docs/install) Login to the GCP `gcloud auth login && gcloud auth application-default login && gcloud components install gke-gcloud-auth-plugin` 24 | * [Terraform](https://developer.hashicorp.com/terraform/tutorials/aws-get-started/install-cli) Terraform is an infrastructure as code tool that enables you to safely and predictably provision and manage infrastructure in any cloud. 25 | * [Helm](https://helm.sh/docs/intro/install/) Helm is a package manager for Kubernetes that allows developers and operators to more easily package, configure, and deploy applications and services onto Kubernetes clusters. 26 | * [Hertz](https://github.com/cloudwego/hertz) Hertz is a high-performance, high-usability, extensible HTTP framework for Go. It’s designed to make it easy for developers to build microservices. 27 | 28 | ## Create Resources 29 | 30 | The resources are created and managed by the project administrator who has the `Owner` role in the GCP project. Make sure you have correctly defined environment variables in the `env.bzl`. Only the project administrator is responsible to run these commands to create resources. 31 | 32 | `resources/global` directory contains the global resources including: clusters, cloud sql instance, database, docker repositories, and service accounts. These resource are global and only created once. 33 | ``` 34 | pushd resources/global 35 | ./apply.sh 36 | popd 37 | ``` 38 | 39 | `resources/deployment` directory includes the resources releated to kunernates including: kubernetes namespace, role, secret. These resources are created under different namespace. So the namespace parameter is required, and you can create different deployments under different namespaces. 40 | ```shell 41 | pushd resources/deployment 42 | ./apply.sh --namespace= 43 | popd 44 | ``` 45 | 46 | ## Pushing Images 47 | 48 | ```shell 49 | gcloud auth configure-docker us-docker.pkg.dev # authenticate to artifact registry 50 | bazel run //:push_all_images --action_env=namespace= 51 | ``` 52 | 53 | > [!IMPORTANT] 54 | > the `--action_env=namespace=` flag is required. 55 | 56 | You can also push images separately by this command. Replace `` by the directory name under `/app` (e.g., api) 57 | 58 | ``` 59 | bazel run //:push__image --action_env=namespace= 60 | ``` 61 | 62 | ## Deploying in Google Cloud Platform (GCP) 63 | 64 | ### Defining environment variables 65 | First, copy the example environment variables template to the existing directory. 66 | ``` 67 | cp .env.example env.bzl 68 | ``` 69 | Edit the variables in `env.bzl`. The `env.bzl` file is the one that really takes effect, the other files are just templates. The double quotes around a variable name are needed. For example: 70 | 71 | ``` sh title="env.bzl" 72 | env="dev" # the deployment environment 73 | project_id="you project id" # gcp project id 74 | region="" # the region that the resources created in 75 | zone="" # the zone that the resources created in 76 | ``` 77 | 78 | ### Deploy 79 | 80 | Deploy data clean room and jupyterhub by helm chart. 81 | ```shell 82 | source env.bzl 83 | gcloud container clusters get-credentials dcr-$env-cluster --zone $zone --project $project_id 84 | 85 | pushd deployment 86 | ./deploy.sh --namespace= 87 | popd 88 | ``` 89 | When deployment is complete, you can follow the output of the script to get the public ip of jupyterhub. 90 | ``` 91 | kubectl --namespace= get service proxy-public 92 | ``` 93 | -------------------------------------------------------------------------------- /docs/getting-started/llm-model-evaluation.md: -------------------------------------------------------------------------------- 1 | # Trusted LLM Model Evaluation Example 2 | 3 | This doc demonstrates how to use manatee for trusted evaluation of LLM models. Manatee seamlessly integrates with lm-evaluation-harness, enabling comprehensive testing of LLM models across a wide range of evaluation tasks. 4 | 5 | Scenario: 6 | Suppose a model provider owns a proprietary LLM model. The provider wishes to prove that their model performs as publicly claimed (e.g., in terms of fairness or accuracy). This evaluation process is divided into two stages: 7 | - Stage 1: The script runs on a mock (fake) model to illustrate the workflow. 8 | - Stage 2: The script runs on the actual model, producing real evaluation results along with cryptographic attestation. 9 | 10 | The attestation process cryptographically binds the evaluation results to a TEE (Trusted Execution Environment) quote. This quote serves as proof that a specific model (identified by its hash) was executed within a legitimate TEE, and that the reported outputs are authentic and trustworthy. 11 | 12 | 13 | ## Install lm-evaluation-harness 14 | `lm-evaluation-harness` provides a unified framework to test generative language models on a large number of different evaluation tasks. 15 | 16 | ```python 17 | !git clone --depth 1 https://github.com/EleutherAI/lm-evaluation-harness 18 | %pip install -e ./lm-evaluation-harness[wandb] 19 | ``` 20 | 21 | ## Model Selection(HuggingFace for Example) 22 | 23 | ``` 24 | HG_MODEL="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" 25 | import lm_eval 26 | from lm_eval.utils import setup_logging 27 | lm_obj = lm_eval.models.huggingface.HFLM(pretrained=HG_MODEL, device="cpu") 28 | task_manager = lm_eval.tasks.TaskManager() 29 | lm_obj.get_model_info() 30 | ``` 31 | 32 | ## Tasks Selection 33 | 34 | ### BBQ: A Hand-Built Bias Benchmark for Question Answering 35 | 36 | BBQ measures the bias in the output for the question answering task. The dataset of question-sets constructed by the authors that highlight attested social biases against people belonging to protected classes along nine social dimensions relevant for U.S. English-speaking contexts. BBQ evaluates model responses at two levels: (i) given an under-informative context, how strongly responses reflect social biases (AMBIGUOUS CONTEXT), and (ii) given an adequately informative context, whether the model's biases override a correct answer choice (DISAMBIGUATED CONTEXT). 37 | 38 | ### CrowS-Pairs: A Challenge Dataset for Measuring Social Biases in Masked Language Models 39 | 40 | CrowS-Pairs is a challenge set for evaluating what language models (LMs) on their tendency to generate biased outputs. CrowS-Pairs comes in 2 languages and the English subset has a newer version which fixes some of the issues with the original version 41 | 42 | ### Simple Cooccurrence Bias 43 | 44 | This bias evaluation relies on simple templates for prompting LMs and tests for bias in the next word prediction. For instance, when given a context such as "The {occupation} was a", masculine gender identifiers are found to be more likely to follow than feminine gender ones. Following Brown et al. (2020), this occupation bias is measured as the average log-likelihood of choosing a female gender identifier (woman, female) minus the log-likelihood of choosing a male gender identifier (man, male). 45 | 46 | ### Winogender: Gender Bias in Coreference Resolution 47 | Winogender is designed to measure gender bias in coreference resolution systems, but has also been used for evaluating language models. The dataset consists of simple sentences with an occupation, participant, and pronoun, where the pronoun refers to either the occupation or participant. Each example consists of three variations, where only the gender of the pronoun is changed, to test how the pronoun affects the prediction. An example of the Winogender schema is "The paramedic performed CPR on the passenger even though he/she/they knew it was too late." This implementation follows the description from the paper "Language Models are Few-Shot Learners", which uses prompts. 48 | 49 | ```python 50 | import datasets 51 | datasets.config.HF_DATASETS_TRUST_REMOTE_CODE = True 52 | tee_llm_evaluation_result = lm_eval.simple_evaluate( # call simple_evaluate 53 | model=lm_obj, 54 | tasks=["winogender","simple_cooccurrence_bias", "crows_pairs_english"], 55 | num_fewshot=0, 56 | task_manager=task_manager, 57 | log_samples=True, 58 | batch_size=1024, 59 | confirm_run_unsafe_code=True 60 | ) 61 | tee_llm_evaluation_result["results"] 62 | ``` 63 | 64 | ## Get Result and TEE Attestation Report 65 | After the job finished, downloaded the result along with the attestation report. The `eat_nonce` in the attestation report is the hash of the output file. 66 | -------------------------------------------------------------------------------- /docs/getting-started/minikube.md: -------------------------------------------------------------------------------- 1 | # Test Deployment on Minikube 2 | 3 | We also made it possible to deploy and test ManaTEE without having any cloud account. 4 | Our test deployment uses a local Minikube cluster with a few components that replaces cloud resources. 5 | With this, users can quickly test and try ManaTEE JupyterLab extension and the API without having an actual TEE backend. 6 | 7 | ## Prerequisite 8 | 9 | First, Install [Minikube CLI](https://minikube.sigs.k8s.io/docs/start/). 10 | 11 | Then, create a minikube cluster with enough memory. We need larger memory because of the Kaniko jobs. 12 | 13 | ``` 14 | minikube start --memory=12192mb --cpus=8 --disk-size=50g --insecure-registry "10.0.0.0/24" 15 | ``` 16 | 17 | ## Create Cluster Resources 18 | 19 | Once minikube cluster is up and running, create the resources in the minikube cluster 20 | 21 | ``` 22 | pushd resources/minikube 23 | ./apply.sh 24 | popd 25 | ``` 26 | 27 | ## Build Images 28 | 29 | Now, build the images and load it into the Docker. 30 | Minikube has its own Docker engine running inside the cluster. 31 | Thus, we first need to point the local Docker client to the Docker engine inside minikube 32 | 33 | ``` 34 | eval $(minikube docker-env) 35 | ``` 36 | 37 | Then, run the following command to load all images 38 | 39 | ``` 40 | bazelisk run //:load_all_images 41 | ``` 42 | 43 | ## Setup Registry 44 | 45 | The API requires artifact registry to store the TEE base image. 46 | Thus, we use minikube's registry addon to host the image. 47 | 48 | Enable the registry 49 | ``` 50 | minikube addons enable registry 51 | ``` 52 | 53 | RUN a proxy to connect to minikube registry and push executor image to minikube registry. 54 | ``` 55 | docker run --rm -it --network=host alpine ash -c "apk add socat && socat TCP-LISTEN:5000,reuseaddr,fork TCP:$(minikube ip):5000" 56 | ``` 57 | 58 | Open another terminal, and run 59 | 60 | ``` 61 | eval $(minikube docker-env) 62 | docker tag executor localhost:5000/executor && docker push localhost:5000/executor 63 | ``` 64 | 65 | You can close the proxy after the docker push. 66 | 67 | ## Deploy 68 | 69 | Now, you can deploy ManaTEE to minikube. 70 | 71 | ``` 72 | pushd deployment/minikube 73 | ./deploy.sh 74 | popd 75 | ``` 76 | 77 | ## Accessing JupyterHub 78 | 79 | You can port-forward traffic to the k8s Service proxy-public with kubectl to access it from your computer. `kubectl --namespace=manatee port-forward service/proxy-public 8080:http`. 80 | 81 | Try insecure HTTP access: http://localhost:8080 82 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # Welcome to ManaTEE Project 2 | 3 | ![ManaTEE Logo](assets/img/logo.png) 4 | 5 | ManaTEE is an open-source framework for secure data analytics in public research. It leverages Privacy Enhancing Technologies, including confidential computing, to protect sensitive data while maintaining usability. 6 | 7 | ManaTEE Project was initiated in 2024 as a core use case of TikTok. Now part of the Confidential Computing Consortium, ManaTEE addresses the growing challenges of balancing privacy, usability, and accuracy in enterprise data collaboration. 8 | 9 | ## Two-Stage Data Analytics Platform 10 | 11 | ManaTEE introduces a two-stage data clean room model to provide an interactive interface for exploring data while protecting private data during processing. It combines different privacy-enhancing technologies (PETs) across two stages: 12 | 13 | * **Programming Stage**: Data consumers explore datasets using low-risk data, employing different PETs such as pseudonymization or differentially private synthetic data generation. 14 | * **Secure Execution Stage**: Workloads run in a trusted execution environment (TEE), which provides attestable integrity and confidentiality guarantees for the workload in the cloud. 15 | 16 | ![Two-stage data analytics platform](assets/img/two-stage.png) 17 | /// Caption 18 | Two-stage data clean room model 19 | /// 20 | 21 | ## Key Features 22 | 23 | ManaTEE provides following key benefits: 24 | 25 | * **Interactive Programming**: Integrated with Jupyter Notebook, allowing data consumers to work with Python and other popular languages. 26 | * **Cloud-Ready**: ManaTEE can be easily deployed to existing cloud TEE backends such as Google Cloud. We plan to support other backends as well, eliminating the need to build the entire infrastructure from scratch. 27 | * **Flexible PET**: Data providers can control the protection mechanisms at each stage to tailor to specific privacy requirements of the data. 28 | * **Trusted Execution Environment**: By leveraging TEEs, ManaTEE ensures a high level of confidence in data confidentiality and program integrity for both data providers and data consumers. 29 | * **Accuracy and Utility**: ManaTEE employs a two-stage design to ensure that result accuracy is not compromised for the sake of privacy. 30 | 31 | 32 | ## Use Cases 33 | 34 | Potential use cases for ManaTEE include: 35 | 36 | * **Trusted Research Environments (TREs)**: Secure data analysis for public health, economics, and more, while maintaining data privacy. 37 | * **Advertising & Marketing**: Lookalike segment analysis and private ad tracking without compromising user data. 38 | * **Machine Learning**: Enables private model training without exposing sensitive data or algorithms. 39 | -------------------------------------------------------------------------------- /docs/project-status.md: -------------------------------------------------------------------------------- 1 | # Project Roadmap 2 | 3 | A few necessary components such as data SDK are not included in the open source version. 4 | However, you can still try to reproduce our demo by following [tutorials](getting-started/tutorials.md). 5 | 6 | ## Feature Status 7 | 8 | Many parts of ManaTEE are still under active development. 9 | 10 | | | Current (Alpha) | Future | 11 | |-------------------------|--------------------------|---------------------------| 12 | | **Users** | One-Way Data Sharing | Multi-Way Data Sharing | 13 | | **Backend** | Single Backend (Goole Cloud Platform) | Multiple Backend | 14 | | **Data Provisioning** | Manual | Automated | 15 | | **Policy and Attestation** | Manual | Automated | 16 | | **Compute** | CPU | CPU/GPU | 17 | 18 | * **Data Provisioning, Policy, and Attestation**: Currently, the data owner is responsible for manually setting all the infrastructure including data and the access control. However, future versions will make this easier by including a generic interface for uploading data and configuring the data access policies based on attestation. 19 | 20 | * **Backend**: We only support a single TEE backend called [Confidential Space](https://cloud.google.com/confidential-computing/confidential-space/docs/confidential-space-overview) provided by Google Cloud Platform (GCP). In the future, it will be extended to support more TEE backends including other cloud providers or native confidential VMs/containers. 21 | 22 | * **Compute**: ManaTEE currently does not support confidential GPU or any accelerator-based computation. 23 | 24 | ## Roadmap 25 | 26 | We are currently forming Technical Steering Committee (TSC) for governing the project and driving the roadmap. 27 | If you're interested in joining the project, please reach out to the team via our [mailing list](manatee-project@googlegroups.com). 28 | -------------------------------------------------------------------------------- /docs/stylesheets/extra.css: -------------------------------------------------------------------------------- 1 | [data-md-color-scheme="manatee"] { 2 | --md-primary-fg-color: #757575; 3 | --md-primary-fg-color--light: #a8a8a8; 4 | --md-primary-fg-color--dark: #424242; 5 | } -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: ManaTEE 2 | site_url: https://manatee-project.github.io 3 | theme: 4 | name: material 5 | features: 6 | - navigation.sections 7 | - navigation.expand 8 | - navigation.footer 9 | - content.action.edit 10 | - content.code.copy 11 | - content.code.annotate 12 | logo: assets/img/manatee.png 13 | palette: 14 | primary: black 15 | repo_url: https://github.com/manatee-project/manatee 16 | repo_name: manatee-project/manatee 17 | 18 | extra: 19 | social: 20 | - icon: fontawesome/brands/github 21 | link: https://github.com/manatee-project/manatee 22 | 23 | markdown_extensions: 24 | - attr_list 25 | - md_in_html 26 | - pymdownx.blocks.caption 27 | - pymdownx.highlight: 28 | anchor_linenums: true 29 | line_spans: __span 30 | pygments_lang_class: true 31 | - pymdownx.inlinehilite 32 | - pymdownx.snippets 33 | - pymdownx.superfences 34 | 35 | 36 | plugins: 37 | - blog 38 | 39 | # Page tree 40 | nav: 41 | - Introduction: index.md 42 | - Roadmap: project-status.md 43 | - Getting Started: 44 | - Build: getting-started/building.md 45 | - Deploy: 46 | - GCP: getting-started/deployment.md 47 | - Minikube: getting-started/minikube.md 48 | - Tutorials: 49 | - Tutorials: getting-started/tutorials.md 50 | - LLM Model Evaluation: getting-started/llm-model-evaluation.md 51 | - Blog: 52 | - blog/index.md -------------------------------------------------------------------------------- /resources/.gitignore: -------------------------------------------------------------------------------- 1 | .terraform/ 2 | terraform.tfstate 3 | terraform.tfstate.backup 4 | terraform.tfvars 5 | -------------------------------------------------------------------------------- /resources/deployment/apply.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2024 TikTok Pte. Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | set -e 16 | 17 | for arg in "$@" 18 | do 19 | case $arg in 20 | --namespace=*) 21 | # If we find an argument --namespace=something, split the string into a name/value array. 22 | IFS='=' read -ra NAMESPACE <<< "$arg" 23 | # Assign the second element of the array (the value of the --namespace argument) to our variable. 24 | namespace="${NAMESPACE[1]}" 25 | ;; 26 | --database-user=*) 27 | IFS='=' read -ra DBUSER <<< "$arg" 28 | dbuser="${DBUSER[1]}" 29 | ;; 30 | --database-password=*) 31 | IFS='=' read -ra DBPWD <<< "$arg" 32 | dbpwd="${DBPWD[1]}" 33 | esac 34 | done 35 | 36 | 37 | if [ -z "$namespace" ]; then 38 | echo -e "Error: the namespace parameter is missing, please run the script like ./apply.sh --namespace=xxx" 39 | exit 40 | fi 41 | 42 | if [ -z "$dbuser" ]; then 43 | dbuser=$namespace 44 | echo -e "\033[1;33mWarning: the database-user parameters doesn't exist using default database user: ${namespace}\033[0m" 45 | 46 | fi 47 | 48 | if [ -z "$dbpwd" ]; then 49 | dbpwd=$(cat /dev/urandom | tr -dc 'a-zA-Z0-9' | fold -w 12 | head -n 1) 50 | echo -e "\033[1;33mWarning: the database-password parameters doesn't exist using random database password: ${dbpwd}\033[0m" 51 | fi 52 | 53 | # Check if gcloud is installed 54 | if ! [ -x "$(command -v gcloud)" ]; then 55 | echo "Error: gcloud is not installed." >&2 56 | exit 1 57 | fi 58 | 59 | # Check if gcloud logged in 60 | if ! gcloud auth list | grep -q 'ACTIVE'; then 61 | echo "Error: No active gcloud account found." >&2 62 | exit 1 63 | fi 64 | 65 | # check whether variables has been set 66 | VAR_FILE="../../env.bzl" 67 | if [ ! -f "$VAR_FILE" ]; then 68 | echo "Error: Variables file does not exist." 69 | exit 1 70 | fi 71 | VAR_FILE=$(realpath $VAR_FILE) 72 | source $VAR_FILE 73 | 74 | zone=$region-a 75 | # get kubernete cluster credentials 76 | gcloud container clusters get-credentials dcr-$env-cluster --zone $zone --project $project_id 77 | 78 | cp $VAR_FILE terraform.tfvars 79 | 80 | echo -e "\nnamespace=\"$namespace\"" >> terraform.tfvars 81 | echo -e "mysql_username=\"$dbuser\"" >> terraform.tfvars 82 | echo -e "mysql_password=\"$dbpwd\"" >> terraform.tfvars 83 | terraform init -reconfigure -backend-config="bucket=dcr-tf-state-$env" -backend-config="prefix=$namespace" 84 | 85 | terraform apply 86 | -------------------------------------------------------------------------------- /resources/deployment/backend.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 TikTok Pte. Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | terraform { 18 | backend "gcs" {} 19 | } 20 | -------------------------------------------------------------------------------- /resources/deployment/cluster_rolebinding.tf: -------------------------------------------------------------------------------- 1 | resource "kubernetes_cluster_role_binding" "cluster_admin_binding" { 2 | metadata { 3 | name = "cluster-admin-binding" 4 | } 5 | role_ref { 6 | api_group = "rbac.authorization.k8s.io" 7 | kind = "ClusterRole" 8 | name = "cluster-admin" 9 | } 10 | subject { 11 | kind = "User" 12 | name = data.google_client_openid_userinfo.me.email 13 | api_group = "rbac.authorization.k8s.io" 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /resources/deployment/db_account.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 TikTok Pte. Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | resource "google_sql_database" "database" { 18 | name = "dcr-${var.namespace}-database" 19 | project = var.project_id 20 | instance = "dcr-${var.env}-db-instance" 21 | } 22 | 23 | resource "google_sql_user" "dcr_db_user" { 24 | name = var.mysql_username 25 | instance = "dcr-${var.env}-db-instance" 26 | password = var.mysql_password 27 | project = var.project_id 28 | } 29 | -------------------------------------------------------------------------------- /resources/deployment/namespace.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 TikTok Pte. Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | resource "kubernetes_namespace" "data_clean_room_k8s_namespace" { 18 | metadata { 19 | name = var.namespace 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /resources/deployment/providers.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 TikTok Pte. Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | terraform { 18 | required_providers { 19 | google = { 20 | source = "hashicorp/google" 21 | } 22 | kubernetes = { 23 | source = "hashicorp/kubernetes" 24 | } 25 | } 26 | 27 | } 28 | 29 | provider "kubernetes" { 30 | config_path = "~/.kube/config" 31 | } 32 | 33 | data "google_client_openid_userinfo" "me" {} 34 | -------------------------------------------------------------------------------- /resources/deployment/repositories.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 TikTok Pte. Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | resource "google_artifact_registry_repository" "data_clean_room_images" { 18 | project = var.project_id 19 | location = "us" 20 | repository_id = "dcr-${var.env}-${var.namespace}-images" 21 | description = "Data Clean Room Images" 22 | format = "DOCKER" 23 | } 24 | -------------------------------------------------------------------------------- /resources/deployment/role.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 TikTok Pte. Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | resource "kubernetes_role" "role" { 18 | metadata { 19 | name = "dcr-pod-role" 20 | namespace = var.namespace 21 | } 22 | 23 | rule { 24 | api_groups = ["batch", ""] 25 | resources = ["jobs", "pods", "pods/log"] 26 | verbs = ["get", "list", "watch", "create", "update", "patch", "delete"] 27 | } 28 | } 29 | 30 | resource "kubernetes_role_binding" "role_binding" { 31 | metadata { 32 | name = "dcr-pod-role-binding" 33 | namespace = var.namespace 34 | } 35 | role_ref { 36 | api_group = "rbac.authorization.k8s.io" 37 | kind = "Role" 38 | name = kubernetes_role.role.metadata[0].name 39 | } 40 | subject { 41 | kind = "ServiceAccount" 42 | name = kubernetes_service_account.k8s_dcr_pod_service_account.metadata[0].name 43 | namespace = var.namespace 44 | } 45 | } 46 | 47 | -------------------------------------------------------------------------------- /resources/deployment/secret.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 TikTok Pte. Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | resource "kubernetes_secret" "secret" { 18 | metadata { 19 | name = "mysql-secret" 20 | namespace = kubernetes_namespace.data_clean_room_k8s_namespace.metadata[0].name 21 | } 22 | data = { 23 | mysql-username = var.mysql_username, 24 | mysql-password = var.mysql_password, 25 | mysql-database = local.database 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /resources/deployment/service_accounts.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 TikTok Pte. Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | locals { 18 | gcp_dcr_pod_sa_email = "${local.gcp_dcr_pod_sa}@${var.project_id}.iam.gserviceaccount.com" 19 | gcp_jupyter_pod_sa_email = "${local.gcp_jupyter_pod_sa}@${var.project_id}.iam.gserviceaccount.com" 20 | } 21 | 22 | resource "kubernetes_service_account" "k8s_dcr_pod_service_account" { 23 | metadata { 24 | name = "dcr-k8s-pod-sa" 25 | namespace = var.namespace 26 | annotations = { 27 | "iam.gke.io/gcp-service-account" = local.gcp_dcr_pod_sa_email 28 | } 29 | } 30 | automount_service_account_token = true 31 | depends_on = [kubernetes_namespace.data_clean_room_k8s_namespace] 32 | } 33 | 34 | resource "kubernetes_service_account" "k8s_jupyter_pod_service_account" { 35 | metadata { 36 | name = "jupyter-k8s-pod-sa" 37 | namespace = var.namespace 38 | annotations = { 39 | "iam.gke.io/gcp-service-account" = local.gcp_jupyter_pod_sa_email 40 | } 41 | } 42 | automount_service_account_token = true 43 | depends_on = [kubernetes_namespace.data_clean_room_k8s_namespace] 44 | } 45 | 46 | 47 | resource "google_service_account_iam_member" "dcr_pod_sa_iam_member" { 48 | service_account_id = "projects/${var.project_id}/serviceAccounts/${local.gcp_dcr_pod_sa}@${var.project_id}.iam.gserviceaccount.com" 49 | role = "roles/iam.workloadIdentityUser" 50 | member = "serviceAccount:${var.project_id}.svc.id.goog[${var.namespace}/${kubernetes_service_account.k8s_dcr_pod_service_account.metadata[0].name}]" 51 | depends_on = [kubernetes_namespace.data_clean_room_k8s_namespace] 52 | } 53 | 54 | resource "google_service_account_iam_member" "jupyter_pod_sa_iam_member" { 55 | service_account_id = "projects/${var.project_id}/serviceAccounts/${local.gcp_jupyter_pod_sa}@${var.project_id}.iam.gserviceaccount.com" 56 | role = "roles/iam.workloadIdentityUser" 57 | member = "serviceAccount:${var.project_id}.svc.id.goog[${var.namespace}/${kubernetes_service_account.k8s_jupyter_pod_service_account.metadata[0].name}]" 58 | depends_on = [kubernetes_namespace.data_clean_room_k8s_namespace] 59 | } 60 | -------------------------------------------------------------------------------- /resources/deployment/variables.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 TikTok Pte. Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | variable "namespace" { 18 | type = string 19 | description = "Kubernetes namespaces" 20 | default = "" 21 | } 22 | 23 | variable "env" { 24 | type = string 25 | description = "Deployment environment, e.g., dev, prod, oss" 26 | } 27 | 28 | variable "project_id" { 29 | type = string 30 | description = "The GCP project ID" 31 | } 32 | 33 | variable "mysql_username" { 34 | type = string 35 | description = "Mysql username" 36 | } 37 | 38 | variable "mysql_password" { 39 | type = string 40 | description = "Mysql password" 41 | } 42 | 43 | variable "region" { 44 | type = string 45 | description = "Region to create the gcp resources" 46 | } 47 | 48 | variable "zone" { 49 | type = string 50 | description = "Zone to create the gcp resources" 51 | } 52 | 53 | locals { 54 | gcp_dcr_pod_sa = "dcr-${var.env}-pod-sa" 55 | gcp_jupyter_pod_sa = "jupyter-${var.env}-pod-sa" 56 | database = "dcr-${var.namespace}-database" 57 | } 58 | -------------------------------------------------------------------------------- /resources/global/apply.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2024 TikTok Pte. Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | echo "You are creating the gcp resources and this should only be done once." 17 | 18 | # Check if gcloud is installed 19 | if ! [ -x "$(command -v gcloud)" ]; then 20 | echo "Error: gcloud is not installed." >&2 21 | exit 1 22 | fi 23 | 24 | # Check if gcloud logged in 25 | if ! gcloud auth list | grep -q 'ACTIVE'; then 26 | echo "Error: No active gcloud account found." >&2 27 | exit 1 28 | fi 29 | 30 | # check whether variables has been set 31 | VAR_FILE="../../env.bzl" 32 | if [ ! -f "$VAR_FILE" ]; then 33 | echo "Error: Variables file does not exist." 34 | exit 1 35 | fi 36 | VAR_FILE=$(realpath $VAR_FILE) 37 | source $VAR_FILE 38 | 39 | if ! gsutil ls gs://dcr-tf-state-$env > /dev/null 2>&1; then 40 | gsutil mb -l us gs://dcr-tf-state-$env 41 | fi 42 | 43 | cp $VAR_FILE terraform.tfvars 44 | terraform init -reconfigure -backend-config="bucket=dcr-tf-state-$env" -backend-config="prefix=cloud" 45 | terraform apply 46 | -------------------------------------------------------------------------------- /resources/global/backend.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 TikTok Pte. Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | terraform { 18 | backend "gcs" {} 19 | } 20 | -------------------------------------------------------------------------------- /resources/global/buckets.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 TikTok Pte. Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | resource "google_storage_bucket" "data_clean_room_hub" { 18 | name = "dcr-${var.env}-hub" 19 | location = "us" 20 | project = var.project_id 21 | public_access_prevention = "enforced" 22 | uniform_bucket_level_access = true 23 | } 24 | -------------------------------------------------------------------------------- /resources/global/cluster.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 TikTok Pte. Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | locals { 18 | cluster_name = "dcr-${var.env}-cluster" 19 | node_pool_name = "dcr-${var.env}-node-pool" 20 | } 21 | 22 | # GKE Cluster 23 | resource "google_container_cluster" "dcr_cluster" { 24 | project = var.project_id 25 | name = local.cluster_name 26 | # if use region, each zone will create a node 27 | location = var.zone 28 | # We can't create a cluster with no node pool defined, but we want to only use 29 | # separately managed node pools. So we create the smallest possible default 30 | # node pool and immediately delete it. 31 | deletion_protection = false 32 | remove_default_node_pool = true 33 | enable_l4_ilb_subsetting = true 34 | initial_node_count = 1 35 | workload_identity_config { 36 | workload_pool = "${var.project_id}.svc.id.goog" 37 | } 38 | ip_allocation_policy { 39 | stack_type = "IPV4_IPV6" 40 | } 41 | datapath_provider = "ADVANCED_DATAPATH" 42 | network = google_compute_network.data_clean_room_network.self_link 43 | subnetwork = google_compute_subnetwork.data_clean_room_subnetwork.self_link 44 | } 45 | 46 | 47 | # Note pool for GKE cluster 48 | resource "google_container_node_pool" "dcr_node_pool" { 49 | project = var.project_id 50 | name = local.node_pool_name 51 | location = var.zone 52 | cluster = google_container_cluster.dcr_cluster.name 53 | node_count = var.num_nodes 54 | 55 | node_config { 56 | service_account = google_service_account.gcp_dcr_cluster_sa.email 57 | preemptible = false 58 | machine_type = var.type 59 | } 60 | 61 | depends_on = [ 62 | google_service_account.gcp_dcr_cluster_sa, 63 | ] 64 | autoscaling { 65 | max_node_count = 3 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /resources/global/database.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 TikTok Pte. Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | resource "google_sql_database_instance" "dcr_database_instance" { 18 | name = "dcr-${var.env}-db-instance" 19 | database_version = "MYSQL_8_0" 20 | project = var.project_id 21 | region = var.region 22 | settings { 23 | tier = "db-f1-micro" 24 | ip_configuration { 25 | ipv4_enabled = false 26 | private_network = google_compute_network.data_clean_room_network.id 27 | enable_private_path_for_google_cloud_services = true 28 | } 29 | } 30 | lifecycle { 31 | prevent_destroy = false 32 | } 33 | depends_on = [ google_compute_subnetwork.data_clean_room_subnetwork, google_compute_global_address.dcr_private_address, google_service_networking_connection.private_vpc_connection ] 34 | } 35 | -------------------------------------------------------------------------------- /resources/global/network.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 TikTok Pte. Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | resource "google_compute_network" "data_clean_room_network" { 18 | name = "dcr-${var.env}-network" 19 | auto_create_subnetworks = false 20 | project = var.project_id 21 | } 22 | 23 | resource "google_compute_global_address" "dcr_private_address" { 24 | name = "dcr-${var.env}-private-address" 25 | project = var.project_id 26 | purpose = "VPC_PEERING" 27 | address_type = "INTERNAL" 28 | prefix_length = 16 29 | network = google_compute_network.data_clean_room_network.self_link 30 | } 31 | 32 | resource "google_service_networking_connection" "private_vpc_connection" { 33 | network = google_compute_network.data_clean_room_network.self_link 34 | service = "servicenetworking.googleapis.com" 35 | reserved_peering_ranges = [google_compute_global_address.dcr_private_address.name] 36 | depends_on = [ google_compute_global_address.dcr_private_address ] 37 | } 38 | 39 | resource "google_compute_subnetwork" "data_clean_room_subnetwork" { 40 | name = "dcr-${var.env}-subnetwork" 41 | project = var.project_id 42 | ip_cidr_range = "10.0.0.0/22" 43 | region = var.region 44 | 45 | stack_type = "IPV4_IPV6" 46 | ipv6_access_type = "EXTERNAL" 47 | 48 | network = google_compute_network.data_clean_room_network.id 49 | depends_on = [ google_service_networking_connection.private_vpc_connection ] 50 | } 51 | -------------------------------------------------------------------------------- /resources/global/providers.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 TikTok Pte. Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | terraform { 18 | required_providers { 19 | google = { 20 | source = "hashicorp/google" 21 | } 22 | } 23 | } 24 | 25 | provider "kubernetes" { 26 | config_path = "~/.kube/config" 27 | } 28 | 29 | data "google_client_openid_userinfo" "me" {} 30 | -------------------------------------------------------------------------------- /resources/global/repositories.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 TikTok Pte. Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | resource "google_artifact_registry_repository" "dcr_user_images" { 18 | project = var.project_id 19 | location = "us" 20 | repository_id = "dcr-${var.env}-user-images" 21 | description = "The repository stores the images that are built by data clean room API and running in the confidential space." 22 | format = "DOCKER" 23 | } 24 | -------------------------------------------------------------------------------- /resources/global/service_accounts.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 TikTok Pte. Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | # A service account used for data clean room cluster 18 | resource "google_service_account" "gcp_dcr_cluster_sa" { 19 | account_id = "dcr-${var.env}-cluster-sa" 20 | display_name = "A Service account for data clean room cluster" 21 | project = var.project_id 22 | } 23 | 24 | resource "google_service_account" "gcp_cvm_sa" { 25 | account_id = "dcr-${var.env}-cvm-sa" 26 | display_name = "A Service account for confidential vm" 27 | project = var.project_id 28 | } 29 | 30 | resource "google_service_account" "gcp_dcr_pod_sa" { 31 | account_id = "dcr-${var.env}-pod-sa" 32 | display_name = "A Service account for data clean room api pod" 33 | project = var.project_id 34 | } 35 | 36 | resource "google_service_account" "gcp_jupyter_pod_sa" { 37 | account_id = "jupyter-${var.env}-pod-sa" 38 | display_name = "A Service account for jupyterhub single user pod" 39 | project = var.project_id 40 | } 41 | -------------------------------------------------------------------------------- /resources/global/variables.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 TikTok Pte. Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | variable "env" { 18 | type = string 19 | description = "Deployment environment, e.g., dev, prod, oss" 20 | } 21 | 22 | variable "region" { 23 | type = string 24 | description = "Region to create the gcp resources" 25 | } 26 | 27 | variable "zone" { 28 | type = string 29 | description = "Zone to create the gcp resources" 30 | } 31 | 32 | variable "project_id" { 33 | type = string 34 | description = "The GCP project ID" 35 | } 36 | 37 | variable "type" { 38 | type = string 39 | description = "Instance type for the GKE instances" 40 | default = "c3-highcpu-22" 41 | } 42 | 43 | variable "num_nodes" { 44 | type = number 45 | description = "Number of nodes to create in the GKE cluster" 46 | default = 1 47 | } 48 | -------------------------------------------------------------------------------- /resources/minikube/apply.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2024 TikTok Pte. Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | if ! command -v minikube &> /dev/null 17 | then 18 | echo "Minikube is not installed. Please install it first. https://minikube.sigs.k8s.io/docs/start/" 19 | exit 1 20 | fi 21 | 22 | env="minikube" 23 | namespace="manatee" 24 | dbuser="manatee" 25 | dbpwd=$(LC_ALL=C tr -dc 'a-zA-Z0-9' terraform.tfvars 29 | echo -e "namespace=\"$namespace\"" >> terraform.tfvars 30 | echo -e "mysql_username=\"$dbuser\"" >> terraform.tfvars 31 | echo -e "mysql_password=\"$dbpwd\"" >> terraform.tfvars 32 | 33 | terraform init -reconfigure 34 | terraform apply 35 | 36 | eval $(minikube docker-env) -------------------------------------------------------------------------------- /resources/minikube/namespace.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 TikTok Pte. Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | resource "kubernetes_namespace" "data_clean_room_k8s_namespace" { 18 | metadata { 19 | name = var.namespace 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /resources/minikube/providers.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 TikTok Pte. Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | terraform { 18 | required_providers { 19 | kubernetes = { 20 | source = "hashicorp/kubernetes" 21 | } 22 | } 23 | 24 | } 25 | 26 | provider "kubernetes" { 27 | config_path = "~/.kube/config" 28 | } 29 | -------------------------------------------------------------------------------- /resources/minikube/role.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 TikTok Pte. Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | resource "kubernetes_role" "role" { 18 | metadata { 19 | name = "dcr-pod-role" 20 | namespace = var.namespace 21 | } 22 | 23 | rule { 24 | api_groups = ["batch", ""] 25 | resources = ["jobs", "pods", "pods/log"] 26 | verbs = ["get", "list", "watch", "create", "update", "patch", "delete"] 27 | } 28 | } 29 | 30 | resource "kubernetes_role_binding" "role_binding" { 31 | metadata { 32 | name = "dcr-pod-role-binding" 33 | namespace = var.namespace 34 | } 35 | role_ref { 36 | api_group = "rbac.authorization.k8s.io" 37 | kind = "Role" 38 | name = kubernetes_role.role.metadata[0].name 39 | } 40 | subject { 41 | kind = "ServiceAccount" 42 | name = kubernetes_service_account.k8s_dcr_pod_service_account.metadata[0].name 43 | namespace = var.namespace 44 | } 45 | } 46 | 47 | -------------------------------------------------------------------------------- /resources/minikube/secret.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 TikTok Pte. Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | resource "kubernetes_secret" "secret" { 18 | metadata { 19 | name = "mysql-secret" 20 | namespace = kubernetes_namespace.data_clean_room_k8s_namespace.metadata[0].name 21 | } 22 | data = { 23 | mysql-username = var.mysql_username, 24 | mysql-password = var.mysql_password, 25 | mysql-database = local.database 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /resources/minikube/service_accounts.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 TikTok Pte. Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | resource "kubernetes_service_account" "k8s_dcr_pod_service_account" { 18 | metadata { 19 | name = "dcr-k8s-pod-sa" 20 | namespace = var.namespace 21 | } 22 | automount_service_account_token = true 23 | depends_on = [kubernetes_namespace.data_clean_room_k8s_namespace] 24 | } 25 | 26 | resource "kubernetes_service_account" "k8s_jupyter_pod_service_account" { 27 | metadata { 28 | name = "jupyter-k8s-pod-sa" 29 | namespace = var.namespace 30 | } 31 | automount_service_account_token = true 32 | depends_on = [kubernetes_namespace.data_clean_room_k8s_namespace] 33 | } 34 | -------------------------------------------------------------------------------- /resources/minikube/variables.tf: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 TikTok Pte. Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | variable "namespace" { 18 | type = string 19 | description = "Kubernetes namespaces" 20 | default = "" 21 | } 22 | 23 | variable "env" { 24 | type = string 25 | description = "Deployment environment, e.g., dev, prod, oss" 26 | } 27 | 28 | variable "mysql_username" { 29 | type = string 30 | description = "Mysql username" 31 | } 32 | 33 | variable "mysql_password" { 34 | type = string 35 | description = "Mysql password" 36 | } 37 | 38 | locals { 39 | database = "dcr-${var.namespace}-database" 40 | } 41 | -------------------------------------------------------------------------------- /tutorials/code/insurance.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "fc104190-b96c-4bf8-a7e7-4978c3f11259", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "!pip install google-cloud-resource-manager google-cloud-storage numpy seaborn matplotlib pandas scikit-learn xgboost > /dev/null" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "id": "460f04a1-1f02-441b-94b9-084971624bd6", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "import sdk\n", 21 | "import pandas\n", 22 | "import io\n", 23 | "import matplotlib.pyplot as plt\n", 24 | "import seaborn as sns" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "id": "28a1e860-278c-4b02-aac6-badd8be70af7", 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "sdk.gcp.init(\"\", \"\",\"\")\n", 35 | "repo = sdk.DataRepo(\"gs://\", \"gs://\")\n", 36 | "raw = repo.get_data(\"insurance.csv\")\n", 37 | "data = pandas.read_csv(io.StringIO(raw))" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "id": "8fc82891-1dc8-4f32-80ac-2d34d481067c", 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "data.info()" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "id": "e5b6fd79-08f1-4d29-9274-b78f6553ef3e", 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "data.head()" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "id": "d18097a2-4da1-4de2-a939-8b59caf4703e", 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "# Distribution Histogram\n", 68 | "fig, axes = plt.subplots(3, 3, figsize=(15, 15))\n", 69 | "for i, column in enumerate(data.columns):\n", 70 | " sns.histplot(data[column], ax=axes[i//3, i%3])\n", 71 | " axes[i//3, i%3].set_title(column)\n", 72 | "plt.tight_layout()\n", 73 | "plt.show()" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "id": "54ee0cc4-8dfd-457e-805b-c336fcc0556e", 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "\n", 84 | "# Correlation Heatmap\n", 85 | "data['sex'] = data['sex'].apply({'male':0,'female':1}.get) \n", 86 | "data['smoker'] = data['smoker'].apply({'yes':1, 'no':0}.get)\n", 87 | "data['region'] = data['region'].apply({'southwest':1, 'southeast':2, 'northwest':3, 'northeast':4}.get)\n", 88 | "plt.figure(figsize=(12, 10))\n", 89 | "sns.heatmap(data.corr(), annot=True, cmap='coolwarm', fmt='.2f', square=True, cbar_kws={\"shrink\": 0.75})\n", 90 | "plt.title('Correlation Heatmap - Strength of Relationships Between Features', fontsize=16)\n", 91 | "plt.xlabel('Features', fontsize=14)\n", 92 | "plt.ylabel('Features', fontsize=14)\n", 93 | "\n", 94 | "# Display the plot\n", 95 | "plt.show()" 96 | ] 97 | } 98 | ], 99 | "metadata": { 100 | "kernelspec": { 101 | "display_name": "Python 3 (ipykernel)", 102 | "language": "python", 103 | "name": "python3" 104 | }, 105 | "language_info": { 106 | "codemirror_mode": { 107 | "name": "ipython", 108 | "version": 3 109 | }, 110 | "file_extension": ".py", 111 | "mimetype": "text/x-python", 112 | "name": "python", 113 | "nbconvert_exporter": "python", 114 | "pygments_lexer": "ipython3", 115 | "version": "3.11.9" 116 | } 117 | }, 118 | "nbformat": 4, 119 | "nbformat_minor": 5 120 | } 121 | -------------------------------------------------------------------------------- /tutorials/code/regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "37cb4626-0ba4-4a58-a08b-0087d55d286e", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "!pip install google-cloud-resource-manager google-cloud-storage numpy seaborn matplotlib pandas scikit-learn xgboost > /dev/null" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "id": "680d25da-5522-4234-ad98-2cf13f80bae1", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "from sklearn.model_selection import train_test_split\n", 21 | "from sklearn.metrics import root_mean_squared_error\n", 22 | "from xgboost import XGBRegressor\n", 23 | "import sdk\n", 24 | "import io" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "id": "204ecd06-7d0b-41ae-af2b-e4ca1277b409", 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "\n", 35 | "sdk.gcp.init(\"\", \"\",\"\")\n", 36 | "repo = sdk.DataRepo(\"gs://\", \"gs://\")\n", 37 | "raw = repo.get_data(\"insurance.csv\")\n", 38 | "data = pandas.read_csv(io.StringIO(raw))" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "id": "d7d34424", 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "data['sex'] = data['sex'].apply({'male':0,'female':1}.get) \n", 49 | "data['smoker'] = data['smoker'].apply({'yes':1, 'no':0}.get)\n", 50 | "data['region'] = data['region'].apply({'southwest':1, 'southeast':2, 'northwest':3, 'northeast':4}.get)" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "id": "43f5f736-f987-48dc-977f-d75b9d683038", 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "X = data[['age','bmi', 'smoker']]\n", 61 | "y = data[['charges']]" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "id": "340ca175-db00-40fe-b2ae-0962f6fe6708", 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "# Assume 'X' contains features and 'y' is the target variable (charges)\n", 72 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "id": "34276ee8-bc8b-4ec5-9be2-d2e817fe86ff", 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "model = XGBRegressor()\n", 83 | "model.fit(X_train, y_train)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "id": "98468cf0-244d-49f1-814d-2dfd17f1eeba", 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "predictions = model.predict(X_test)" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "id": "ac29bb1e-944f-4672-b9e0-38c2f0218680", 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "rmse = root_mean_squared_error(y_test, predictions)\n", 104 | "print(f'Root Mean Squared Error: {rmse}')" 105 | ] 106 | } 107 | ], 108 | "metadata": { 109 | "kernelspec": { 110 | "display_name": "Python 3 (ipykernel)", 111 | "language": "python", 112 | "name": "python3" 113 | }, 114 | "language_info": { 115 | "codemirror_mode": { 116 | "name": "ipython", 117 | "version": 3 118 | }, 119 | "file_extension": ".py", 120 | "mimetype": "text/x-python", 121 | "name": "python", 122 | "nbconvert_exporter": "python", 123 | "pygments_lexer": "ipython3", 124 | "version": "3.11.9" 125 | } 126 | }, 127 | "nbformat": 4, 128 | "nbformat_minor": 5 129 | } 130 | -------------------------------------------------------------------------------- /tutorials/code/sdk/.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints/ 2 | -------------------------------------------------------------------------------- /tutorials/code/sdk/__init__.py: -------------------------------------------------------------------------------- 1 | from .data import * 2 | 3 | __all__ = ["DataRepo", "gcp"] -------------------------------------------------------------------------------- /tutorials/code/sdk/__version__.py: -------------------------------------------------------------------------------- 1 | VERSION = (0, 0, 1) 2 | 3 | __version__ = '.'.join(map(str, VERSION)) 4 | -------------------------------------------------------------------------------- /tutorials/code/sdk/data.py: -------------------------------------------------------------------------------- 1 | import google.cloud.storage as gcs 2 | from google.cloud import resourcemanager_v3 3 | from google.auth import load_credentials_from_dict 4 | from enum import Enum 5 | import pandas as pd 6 | from urllib.parse import urlparse 7 | import os 8 | import logging 9 | import io 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | class Gcp(): 14 | def __init__(self): 15 | self.project_id = "" 16 | self.pool_name = "" 17 | self.project_number = "" 18 | self.service_account = "" 19 | 20 | def init(self, project_id, pool_name, service_account): 21 | self.project_id = project_id 22 | self.pool_name = pool_name 23 | self.service_account = service_account 24 | self.project_number = self.get_project_number(project_id) 25 | 26 | def get_project_number(self, project_id): 27 | client = resourcemanager_v3.ProjectsClient() 28 | project = client.get_project(name=f"projects/{project_id}") 29 | return project.name.split('/')[1] 30 | 31 | gcp = Gcp() 32 | 33 | class Stage(Enum): 34 | UNKNOWN = 0 35 | STAGE1 = 1 36 | STAGE2 = 2 37 | 38 | class DataRepo(): 39 | def __init__(self, stage_1_bucket, stage_2_bucket): 40 | self.stage1 = RemoteStorage.init(Stage.STAGE1, stage_1_bucket) 41 | self.stage2 = RemoteStorage.init(Stage.STAGE2, stage_2_bucket) 42 | 43 | def get_data(self, filename): 44 | if self.get_stage() == 1: 45 | return self.stage1.get_data(filename) 46 | elif self.get_stage() == 2: 47 | return self.stage2.get_data(filename) 48 | else: 49 | logger.warning("Unknown stage") 50 | return filename 51 | 52 | def get_stage(self): 53 | stage = int(os.getenv('EXECUTION_STAGE', '').strip('\'"')) 54 | return stage 55 | 56 | class RemoteStorage(): 57 | def __init__(self): 58 | pass 59 | 60 | def get_data(self, filename): 61 | pass 62 | 63 | @staticmethod 64 | def init(stage, url): 65 | try: 66 | o = urlparse(url, allow_fragments=False) 67 | except Exception as e: 68 | raise ValueError("Invalid URL: " + url) 69 | 70 | if o.scheme == "gs": 71 | return RemoteStorageGCS(stage, o.netloc, o.path) 72 | elif o.scheme == "s3": 73 | raise NotImplementedError("S3 storage not implemented") 74 | elif o.scheme == "https": 75 | raise NotImplementedError("HTTPS storage not implemented") 76 | else: 77 | raise ValueError("Invalid scheme: " + o.scheme) 78 | 79 | 80 | class RemoteStorageGCS(RemoteStorage): 81 | def __init__(self, stage, bucket_name, path): 82 | super().__init__() 83 | self.bucket = bucket_name 84 | self.path = path 85 | 86 | if stage == Stage.STAGE1: 87 | self.client = gcs.Client() 88 | elif stage == Stage.STAGE2: 89 | credentials_dict = { 90 | "type": "external_account", 91 | "audience": "//iam.googleapis.com/projects/%s/locations/global/workloadIdentityPools/%s/providers/attestation-verifier"%(gcp.project_number, gcp.pool_name), 92 | "subject_token_type": "urn:ietf:params:oauth:token-type:jwt", 93 | "token_url": "https://sts.googleapis.com/v1/token", 94 | "credential_source": { 95 | "file": "/run/container_launcher/attestation_verifier_claims_token" 96 | }, 97 | "service_account_impersonation_url": "https://iamcredentials.googleapis.com/v1/projects/-/serviceAccounts/%s@%s.iam.gserviceaccount.com:generateAccessToken"%(gcp.service_account, gcp.project_id), 98 | } 99 | credentials, _ = load_credentials_from_dict(credentials_dict) 100 | self.client = gcs.Client(credentials=credentials) 101 | 102 | def get_data(self, filename): 103 | # join the path and filename 104 | full_path = os.path.join(self.path, filename) 105 | blob = self.client.get_bucket(self.bucket).blob(full_path) 106 | data = blob.download_as_text() 107 | return data 108 | -------------------------------------------------------------------------------- /tutorials/tutorial.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | #### Please fill the following variables before running #### 4 | STAGE_1_BUCKET= 5 | STAGE_2_BUCKET= 6 | WORKLOAD_IDENTITY_POOL_NAME= 7 | TEE_SERVICE_ACCOUNT= 8 | ############################################################ 9 | 10 | VAR_FILE="../env.bzl" 11 | if [ ! -f "$VAR_FILE" ]; then 12 | echo "Error: Variables file does not exist." 13 | exit 1 14 | fi 15 | 16 | VAR_FILE=$(realpath $VAR_FILE) 17 | source $VAR_FILE 18 | 19 | # data provisioning 20 | 21 | gcloud storage buckets create gs://$STAGE_1_BUCKET 22 | gcloud storage buckets create gs://$STAGE_2_BUCKET 23 | 24 | gcloud storage cp data/stage1/insurance.csv gs://$STAGE_1_BUCKET 25 | gcloud storage cp data/stage2/insurance.csv gs://$STAGE_2_BUCKET 26 | 27 | # data permissions: stage 1 28 | 29 | gcloud storage buckets add-iam-policy-binding gs://$STAGE_1_BUCKET \ 30 | --member=serviceAccount:jupyter-$env-pod-sa@$project_id.iam.gserviceaccount.com \ 31 | --role=roles/storage.objectViewer 32 | 33 | # data permissions: stage 2 34 | 35 | gcloud iam service-accounts create $TEE_SERVICE_ACCOUNT 36 | 37 | gcloud storage buckets add-iam-policy-binding gs://$STAGE_2_BUCKET \ 38 | --member=serviceAccount:$TEE_SERVICE_ACCOUNT@$project_id.iam.gserviceaccount.com \ 39 | --role=roles/storage.objectViewer 40 | 41 | gcloud iam workload-identity-pools create $WORKLOAD_IDENTITY_POOL_NAME \ 42 | --location=global 43 | 44 | gcloud iam service-accounts add-iam-policy-binding \ 45 | $TEE_SERVICE_ACCOUNT@$project_id.iam.gserviceaccount.com \ 46 | --member="principalSet://iam.googleapis.com/projects/"$(gcloud projects describe $project_id \ 47 | --format="value(projectNumber)")"/locations/global/workloadIdentityPools/$WORKLOAD_IDENTITY_POOL_NAME/*" \ 48 | --role=roles/iam.workloadIdentityUser 49 | 50 | gcloud iam workload-identity-pools providers create-oidc attestation-verifier \ 51 | --location=global \ 52 | --workload-identity-pool=$WORKLOAD_IDENTITY_POOL_NAME \ 53 | --issuer-uri="https://confidentialcomputing.googleapis.com/" \ 54 | --allowed-audiences="https://sts.googleapis.com" \ 55 | --attribute-mapping="google.subject=\"gcpcs::\"+assertion.submods.container.image_digest+\"::\"+assertion.submods.gce.project_number+\"::\"+assertion.submods.gce.instance_id" \ 56 | --attribute-condition="assertion.swname == 'CONFIDENTIAL_SPACE' && 'STABLE' in assertion.submods.confidential_space.support_attributes" 57 | --------------------------------------------------------------------------------