├── sdk-py ├── tests │ └── __init__.py ├── setup.py ├── modelbox │ ├── __init__.py │ ├── resource_usage.py │ ├── notebook_widgets.py │ ├── lightning_logger.py │ ├── client.py │ ├── admin_pb2_grpc.py │ └── admin_pb2.py ├── pyproject.toml ├── setup.cfg └── README.md ├── website ├── static │ ├── .nojekyll │ └── img │ │ ├── favicon.ico │ │ ├── ModelBox3.png │ │ ├── docusaurus.png │ │ ├── logo.svg │ │ └── undraw_docusaurus_tree.svg ├── docs │ ├── guides │ │ ├── tensorboard.md │ │ ├── model-distribution.md │ │ ├── events-and-audit.md │ │ ├── img │ │ │ ├── API_Concepts.png │ │ │ ├── localeDropdown.png │ │ │ ├── ModelBox_HighLevel.png │ │ │ ├── docsVersionDropdown.png │ │ │ └── modelbox_high_level_picture.png │ │ ├── service-observability.md │ │ ├── _category_.json │ │ ├── cli.md │ │ ├── python-sdk.md │ │ ├── metrics.md │ │ ├── develop.md │ │ ├── concepts.md │ │ ├── configuration.md │ │ ├── experiments.md │ │ └── models.md │ ├── tutorials │ │ ├── log-metadata-pytorch.md │ │ ├── _category_.json │ │ ├── deploy-your-site.md │ │ ├── create-a-blog-post.md │ │ ├── create-a-page.md │ │ └── create-a-document.md │ ├── compare.md │ ├── getting_started.md │ ├── intro.md │ └── install.md ├── babel.config.js ├── blog │ ├── 2021-08-26-welcome │ │ ├── docusaurus-plushie-banner.jpeg │ │ └── index.md │ └── authors.yml ├── src │ ├── pages │ │ ├── markdown-page.md │ │ ├── index.module.css │ │ └── index.js │ ├── components │ │ └── HomepageFeatures │ │ │ ├── styles.module.css │ │ │ └── index.js │ └── css │ │ └── custom.css ├── .gitignore ├── README.md ├── package.json ├── sidebars.js └── docusaurus.config.js ├── src ├── lib.rs ├── main.rs ├── agent.rs └── server_config.rs ├── docs └── images │ ├── ModelBox1.png │ ├── API_Concepts.png │ └── ModelBox_HighLevel.png ├── tutorials └── artifacts │ └── mnist_cnn.pt ├── .gitpod.yml ├── migration ├── src │ ├── main.rs │ └── lib.rs ├── Cargo.toml └── README.md ├── sdk-rs ├── build.rs ├── Cargo.toml └── src │ └── lib.rs ├── deploy ├── scripts │ ├── gitpod_init.sh │ ├── compose_start.sh │ └── jupyter_server_start.sh └── kubernetes │ └── manifests │ └── modelbox.yaml ├── Dockerfile ├── entity ├── src │ ├── lib.rs │ ├── prelude.rs │ ├── events.rs │ ├── metadata.rs │ ├── models.rs │ ├── experiments.rs │ ├── metrics.rs │ ├── files.rs │ ├── mutations.rs │ └── model_versions.rs └── Cargo.toml ├── Makefile.toml ├── .gitignore ├── sdk-go ├── config_test.go ├── config.go └── client.go ├── python-rpc-client ├── build.rs ├── pyproject.toml ├── Cargo.toml ├── .gitignore ├── .github │ └── workflows │ │ └── CI.yml └── src │ ├── mock.rs │ ├── api_structs.rs │ └── lib.rs ├── CONTRIBUTING.md ├── proto ├── gen_proto.sh ├── admin.proto └── service.proto ├── agents ├── basic-pytorch-inspector │ └── inspect.py └── py │ ├── client.py │ └── agent.py ├── tests ├── setup.rs └── integration_test.rs ├── .circleci └── config.yml ├── Cargo.toml ├── .goreleaser.yaml ├── BUILD.md ├── Makefile ├── docker-compose.yaml ├── go.mod └── CODE_OF_CONDUCT.md /sdk-py/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /website/static/.nojekyll: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod repository; -------------------------------------------------------------------------------- /sdk-py/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | if __name__ == '__main__': 4 | setup() -------------------------------------------------------------------------------- /docs/images/ModelBox1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorland/modelbox/HEAD/docs/images/ModelBox1.png -------------------------------------------------------------------------------- /docs/images/API_Concepts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorland/modelbox/HEAD/docs/images/API_Concepts.png -------------------------------------------------------------------------------- /sdk-py/modelbox/__init__.py: -------------------------------------------------------------------------------- 1 | from modelbox import * 2 | from . import service_pb2 3 | from . import service_pb2_grpc -------------------------------------------------------------------------------- /website/static/img/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorland/modelbox/HEAD/website/static/img/favicon.ico -------------------------------------------------------------------------------- /docs/images/ModelBox_HighLevel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorland/modelbox/HEAD/docs/images/ModelBox_HighLevel.png -------------------------------------------------------------------------------- /sdk-py/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=42", "wheel"] 3 | build-backend = "setuptools.build_meta" -------------------------------------------------------------------------------- /tutorials/artifacts/mnist_cnn.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorland/modelbox/HEAD/tutorials/artifacts/mnist_cnn.pt -------------------------------------------------------------------------------- /website/docs/guides/tensorboard.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 4 3 | --- 4 | 5 | # Exporting metadata to TensorBoard 6 | -------------------------------------------------------------------------------- /website/static/img/ModelBox3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorland/modelbox/HEAD/website/static/img/ModelBox3.png -------------------------------------------------------------------------------- /website/static/img/docusaurus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorland/modelbox/HEAD/website/static/img/docusaurus.png -------------------------------------------------------------------------------- /website/babel.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | presets: [require.resolve('@docusaurus/core/lib/babel/preset')], 3 | }; 4 | -------------------------------------------------------------------------------- /.gitpod.yml: -------------------------------------------------------------------------------- 1 | tasks: 2 | - init: ./deploy/scripts/gitpod_init.sh 3 | command: docker compose --profile local up 4 | 5 | 6 | -------------------------------------------------------------------------------- /website/docs/guides/model-distribution.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 6 3 | --- 4 | 5 | # Distributing Models to Inference Services -------------------------------------------------------------------------------- /website/docs/guides/events-and-audit.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 7 3 | --- 4 | 5 | # Model and Experiment Change and Diagnostics Events -------------------------------------------------------------------------------- /website/docs/guides/img/API_Concepts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorland/modelbox/HEAD/website/docs/guides/img/API_Concepts.png -------------------------------------------------------------------------------- /website/docs/tutorials/log-metadata-pytorch.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 7 3 | --- 4 | 5 | # Log Experiment Metadata from Pytorch 6 | -------------------------------------------------------------------------------- /website/docs/guides/img/localeDropdown.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorland/modelbox/HEAD/website/docs/guides/img/localeDropdown.png -------------------------------------------------------------------------------- /website/docs/guides/img/ModelBox_HighLevel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorland/modelbox/HEAD/website/docs/guides/img/ModelBox_HighLevel.png -------------------------------------------------------------------------------- /website/docs/guides/img/docsVersionDropdown.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorland/modelbox/HEAD/website/docs/guides/img/docsVersionDropdown.png -------------------------------------------------------------------------------- /website/docs/guides/service-observability.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 9 3 | --- 4 | 5 | # Troubleshooting of the ModelBox Control Plane Service 6 | 7 | -------------------------------------------------------------------------------- /sdk-py/modelbox/resource_usage.py: -------------------------------------------------------------------------------- 1 | class HostProfiler: 2 | 3 | def __init__(self) -> None: 4 | pass 5 | 6 | def profile(self): 7 | pass -------------------------------------------------------------------------------- /migration/src/main.rs: -------------------------------------------------------------------------------- 1 | use sea_orm_migration::prelude::*; 2 | 3 | #[async_std::main] 4 | async fn main() { 5 | cli::run_cli(migration::Migrator).await; 6 | } 7 | -------------------------------------------------------------------------------- /website/docs/guides/img/modelbox_high_level_picture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorland/modelbox/HEAD/website/docs/guides/img/modelbox_high_level_picture.png -------------------------------------------------------------------------------- /sdk-rs/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | tonic_build::configure() 3 | .format(false) 4 | .compile(&["service.proto"], &["../proto"]) 5 | .unwrap(); 6 | } 7 | -------------------------------------------------------------------------------- /website/blog/2021-08-26-welcome/docusaurus-plushie-banner.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorland/modelbox/HEAD/website/blog/2021-08-26-welcome/docusaurus-plushie-banner.jpeg -------------------------------------------------------------------------------- /website/src/pages/markdown-page.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Markdown page example 3 | --- 4 | 5 | # Markdown page example 6 | 7 | You don't need React to write simple standalone pages. 8 | -------------------------------------------------------------------------------- /deploy/scripts/gitpod_init.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Install goreleaser 4 | go install github.com/goreleaser/goreleaser@latest 5 | 6 | # Build modelbox 7 | goreleaser release --snapshot --rm-dist -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Dockerfile 2 | #FROM gcr.io/distroless/static-debian11 3 | FROM ubuntu 4 | WORKDIR /app 5 | COPY modelbox /app/modelbox 6 | COPY cmd/modelbox/assets/modelbox_server.yaml /app/modelbox_server.yaml 7 | EXPOSE 8085 8 | -------------------------------------------------------------------------------- /website/docs/tutorials/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Tutorials", 3 | "position": 2, 4 | "link": { 5 | "type": "generated-index", 6 | "description": "Learn how to solve some common use cases with ModelBox" 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /website/blog/authors.yml: -------------------------------------------------------------------------------- 1 | diptanu: 2 | name: Diptanu Gon Choudhury 3 | title: Maintainer of ModelBox, Software Engineer at Meta. 4 | url: https://www.linkedin.com/in/diptanu/ 5 | image_url: https://avatars.githubusercontent.com/u/21833?v=4 -------------------------------------------------------------------------------- /website/src/components/HomepageFeatures/styles.module.css: -------------------------------------------------------------------------------- 1 | .features { 2 | display: flex; 3 | align-items: center; 4 | padding: 2rem 0; 5 | width: 100%; 6 | } 7 | 8 | .featureSvg { 9 | height: 200px; 10 | width: 200px; 11 | } 12 | -------------------------------------------------------------------------------- /website/docs/guides/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Guides", 3 | "position": 3, 4 | "link": { 5 | "type": "generated-index", 6 | "description": "In-depth guides for using various aspects of ModelBox and Operating the Service" 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /entity/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity. Generated by sea-orm-codegen 0.11.1 2 | 3 | pub mod prelude; 4 | 5 | pub mod events; 6 | pub mod experiments; 7 | pub mod files; 8 | pub mod metadata; 9 | pub mod metrics; 10 | pub mod model_versions; 11 | pub mod models; 12 | pub mod mutations; 13 | -------------------------------------------------------------------------------- /website/docs/compare.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 2 3 | --- 4 | 5 | # Comparison with other Services 6 | 7 | There are various MLOps systems solving similar problems, below are comparisons of ModelBox with other tools and services - 8 | 9 | ## MLFlow 10 | 11 | ## ClearML 12 | 13 | ## KubeFlow 14 | 15 | ## TFX 16 | -------------------------------------------------------------------------------- /Makefile.toml: -------------------------------------------------------------------------------- 1 | [tasks.migration] 2 | install_crate = "sea-orm-cli" 3 | command = "sea-orm-cli" 4 | args = ["migrate", "up"] 5 | 6 | [tasks.entity] 7 | install_crate = "sea-orm-cli" 8 | command = "sea-orm-cli" 9 | args = ["generate", "entity", "--with-serde", "both", "--with-copy-enums","--date-time-crate", "time", "-l", "-o", "entity/src/"] -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | debug 2 | target 3 | .vscode 4 | ve 5 | 6 | data 7 | 8 | MNIST 9 | lightning_logs 10 | modelbox_server.toml 11 | modelbox_client.toml 12 | 13 | build 14 | 15 | __pycache__ 16 | dist 17 | modelbox.egg-info 18 | 19 | .ipynb_checkpoints/ 20 | *.pt 21 | 22 | tutorials/lid_quartznet* 23 | *.yaml 24 | modelbox 25 | .env 26 | 27 | -------------------------------------------------------------------------------- /sdk-go/config_test.go: -------------------------------------------------------------------------------- 1 | package client 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestNewClientConfig(t *testing.T) { 10 | config, err := NewClientConfig("../cmd/modelbox/assets/modelbox_client.yaml") 11 | assert.Nil(t, err) 12 | assert.Equal(t, ":8085", config.ServerAddr) 13 | } 14 | -------------------------------------------------------------------------------- /python-rpc-client/build.rs: -------------------------------------------------------------------------------- 1 | fn main() -> Result<(), Box> { 2 | let out_dir = "./src"; 3 | 4 | tonic_build::configure() 5 | .out_dir(out_dir) 6 | .compile(&["../proto/service.proto"], &["../proto"]) 7 | .unwrap_or_else(|e| panic!("Failed to compile protos {:?}", e)); 8 | 9 | Ok(()) 10 | } 11 | -------------------------------------------------------------------------------- /migration/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub use sea_orm_migration::prelude::*; 2 | 3 | mod m20220101_000001_create_table; 4 | 5 | pub struct Migrator; 6 | 7 | #[async_trait::async_trait] 8 | impl MigratorTrait for Migrator { 9 | fn migrations() -> Vec> { 10 | vec![Box::new(m20220101_000001_create_table::Migration)] 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /website/.gitignore: -------------------------------------------------------------------------------- 1 | # Dependencies 2 | /node_modules 3 | 4 | # Production 5 | /build 6 | 7 | # Generated files 8 | .docusaurus 9 | .cache-loader 10 | 11 | # Misc 12 | .DS_Store 13 | .env.local 14 | .env.development.local 15 | .env.test.local 16 | .env.production.local 17 | 18 | npm-debug.log* 19 | yarn-debug.log* 20 | yarn-error.log* 21 | .vercel 22 | -------------------------------------------------------------------------------- /website/README.md: -------------------------------------------------------------------------------- 1 | # Website 2 | 3 | This website is built using [docusaurus](https://docusaurus.io) 4 | 5 | ### Local Development 6 | 7 | ``` 8 | $ npm install 9 | $ npx docusaurus start 10 | ``` 11 | 12 | This command starts a local development server and opens up a browser window. Most changes are reflected live without having to restart the server. -------------------------------------------------------------------------------- /deploy/scripts/compose_start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Setup schema 4 | until /app/modelbox server create-schema --schema-dir /app/schemas/ --config-path=/app/config/modelbox_server_compose.yaml 5 | do 6 | echo "Trying to apply schema again in 5 seconds...." 7 | sleep 5s 8 | done 9 | 10 | # Srart server 11 | /app/modelbox server start --config-path=/app/config/modelbox_server_compose.yaml -------------------------------------------------------------------------------- /deploy/scripts/jupyter_server_start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Install Jupyter nbextensions 4 | 5 | pip install jupyter_contrib_nbextensions 6 | 7 | jupyter contrib nbextension install --user 8 | 9 | pip install jupyter_nbextensions_configurator 10 | 11 | jupyter nbextensions_configurator enable --user 12 | 13 | 14 | # Start the nb server 15 | 16 | start-notebook.sh --NotebookApp.token='' --NotebookApp.password='' -------------------------------------------------------------------------------- /python-rpc-client/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["maturin>=0.14,<0.15"] 3 | build-backend = "maturin" 4 | 5 | [project] 6 | name = "modelbox-rpc-client" 7 | requires-python = ">=3.7" 8 | classifiers = [ 9 | "Programming Language :: Rust", 10 | "Programming Language :: Python :: Implementation :: CPython", 11 | "Programming Language :: Python :: Implementation :: PyPy", 12 | ] 13 | 14 | 15 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to ModelBox 2 | 3 | Thank you for your interest in ModelBox. We don't have any strict guidelines for contributing to ModelBox yet. 4 | Please create an issue to discuss the problem you are solving and once we discuss the implementation details on the issue, please feel free to open a Pull Request 5 | to merge the changes. Please ensure that the existing unit tests are green before submitting a request. 6 | -------------------------------------------------------------------------------- /entity/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "entity" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | sea-orm = { version = "^0", features=["debug-print", "runtime-tokio-rustls", "sqlx-postgres", "sqlx-sqlite", "with-time", "with-json", "mock"] } 8 | sea-query = "0" 9 | serde = { version = "1.0", features = ["derive"] } 10 | serde_json = "1.0" 11 | thiserror = "1.0.38" 12 | time = { version = "0.3", features = ["macros"] } -------------------------------------------------------------------------------- /sdk-rs/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "modelbox-client-rs" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | futures = { version = "0.3.*" } 10 | tokio = {version = "1.17.0", features = ["full"]} 11 | anyhow = "1.0.56" 12 | tonic = "0.5" 13 | prost = "0.8" 14 | 15 | [build-dependencies] 16 | tonic-build = "0.5" 17 | 18 | 19 | -------------------------------------------------------------------------------- /entity/src/prelude.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity. Generated by sea-orm-codegen 0.11.1 2 | 3 | pub use super::events::Entity as Events; 4 | pub use super::experiments::Entity as Experiments; 5 | pub use super::files::Entity as Files; 6 | pub use super::metadata::Entity as Metadata; 7 | pub use super::metrics::Entity as Metrics; 8 | pub use super::model_versions::Entity as ModelVersions; 9 | pub use super::models::Entity as Models; 10 | pub use super::mutations::Entity as Mutations; 11 | -------------------------------------------------------------------------------- /website/src/pages/index.module.css: -------------------------------------------------------------------------------- 1 | /** 2 | * CSS files with the .module.css suffix will be treated as CSS modules 3 | * and scoped locally. 4 | */ 5 | 6 | .heroBanner { 7 | padding: 4rem 0; 8 | text-align: center; 9 | position: relative; 10 | overflow: hidden; 11 | } 12 | 13 | @media screen and (max-width: 996px) { 14 | .heroBanner { 15 | padding: 2rem; 16 | } 17 | } 18 | 19 | .buttons { 20 | display: flex; 21 | align-items: center; 22 | justify-content: center; 23 | } 24 | -------------------------------------------------------------------------------- /website/docs/guides/cli.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 2 3 | --- 4 | 5 | # Server 6 | 7 | ## Generate server configuration 8 | ``` 9 | modelbox server init-config 10 | ``` 11 | 12 | ## Start the server 13 | ``` 14 | modelbox server start --config-path /path/to/config.toml 15 | ``` 16 | 17 | # Interacting with the Service with the CLI 18 | 19 | ## Generate config 20 | ``` 21 | modelbox client init-config 22 | ``` 23 | 24 | ## List Experiments 25 | ``` 26 | modelbox client list-experiments --namespace= 27 | -------------------------------------------------------------------------------- /proto/gen_proto.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | protoc --go_out=sdk-go --go_opt=paths=source_relative --go-grpc_out=sdk-go --go-grpc_opt=paths=source_relative proto/service.proto 4 | 5 | python -m grpc_tools.protoc -Iproto/ --python_out=sdk-py/modelbox/ --grpc_python_out=sdk-py/modelbox/ proto/service.proto 6 | 7 | 8 | protoc --go_out=sdk-go/proto -Iproto/ --go_opt=paths=source_relative --go-grpc_out=sdk-go/proto --go-grpc_opt=paths=source_relative proto/admin.proto 9 | 10 | python -m grpc_tools.protoc -Iproto/ --python_out=sdk-py/modelbox/ --grpc_python_out=sdk-py/modelbox/ proto/admin.proto -------------------------------------------------------------------------------- /sdk-rs/src/lib.rs: -------------------------------------------------------------------------------- 1 | mod proto { 2 | tonic::include_proto!("modelbox"); 3 | } 4 | 5 | pub struct ClientConfig { 6 | pub server_addr: String 7 | } 8 | 9 | impl ClientConfig { 10 | pub fn new(_path: String) -> Box { 11 | Box::new(ClientConfig{server_addr: ":8085".to_owned()}) 12 | } 13 | } 14 | 15 | pub struct ModelBoxClient { 16 | 17 | } 18 | 19 | impl ModelBoxClient { 20 | pub fn new() -> Self{ 21 | ModelBoxClient{} 22 | } 23 | } 24 | 25 | #[cfg(test)] 26 | mod tests { 27 | #[test] 28 | fn it_works() { 29 | let result = 2 + 2; 30 | assert_eq!(result, 4); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /python-rpc-client/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "modelbox-rpc-client" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | [lib] 8 | name = "modelbox_rpc_client" 9 | crate-type = ["cdylib"] 10 | 11 | [build-dependencies] 12 | tonic-build = "0.8.4" 13 | 14 | [dependencies] 15 | pyo3 = { version = "0.18.0", features = ["extension-module", "multiple-pymethods"] } 16 | tonic = "0.8" 17 | prost = "0.11" 18 | prost-types = "0.11.7" 19 | tokio = { version = "1.17", features = ["macros", "rt-multi-thread", "full"] } 20 | tokio-stream = {version = "^0.1", features = ["net"]} -------------------------------------------------------------------------------- /entity/src/events.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity. Generated by sea-orm-codegen 0.11.1 2 | 3 | use sea_orm::entity::prelude::*; 4 | use serde::{Deserialize, Serialize}; 5 | 6 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq, Serialize, Deserialize)] 7 | #[sea_orm(table_name = "events")] 8 | pub struct Model { 9 | #[sea_orm(primary_key, auto_increment = false)] 10 | pub id: String, 11 | pub parent_id: String, 12 | pub name: String, 13 | pub source: String, 14 | pub metadata: Json, 15 | pub source_wall_clock: TimeDateTime, 16 | } 17 | 18 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 19 | pub enum Relation {} 20 | 21 | impl ActiveModelBehavior for ActiveModel {} 22 | -------------------------------------------------------------------------------- /entity/src/metadata.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity. Generated by sea-orm-codegen 0.11.1 2 | 3 | use sea_orm::entity::prelude::*; 4 | use serde::{Deserialize, Serialize}; 5 | 6 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq, Serialize, Deserialize)] 7 | #[sea_orm(table_name = "metadata")] 8 | pub struct Model { 9 | #[sea_orm(primary_key, auto_increment = false)] 10 | pub id: String, 11 | pub parent_id: String, 12 | pub name: String, 13 | pub meta: Json, 14 | pub created_at: TimeDateTime, 15 | pub updated_at: TimeDateTime, 16 | } 17 | 18 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 19 | pub enum Relation {} 20 | 21 | impl ActiveModelBehavior for ActiveModel {} 22 | -------------------------------------------------------------------------------- /sdk-py/setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = modelbox 3 | version = 0.0.5 4 | author = Diptanu Gon Choudhury 5 | author_email = diptanuc@gmail.com 6 | description = Client Library of the ModelBox Service 7 | long_description = file: README.md 8 | long_description_content_type = text/markdown 9 | url = https://github.com/tensorland/modelbox 10 | classifiers = 11 | Programming Language :: Python :: 3.7 12 | License :: OSI Approved :: Apache Software License 13 | Operating System :: OS Independent 14 | 15 | [options] 16 | packages = 17 | modelbox 18 | python_requires = >=3.7 19 | install_requires = 20 | grpcio>=1.46.1 21 | grpcio-tools>=1.46.1 22 | protobuf>=3.20.1 23 | tabulate>=0.8.10 24 | -------------------------------------------------------------------------------- /website/docs/guides/python-sdk.md: -------------------------------------------------------------------------------- 1 | # Using the Python SDK 2 | 3 | The Python SDK can be used to interact with the ModelBox API from model trainers and other MLOps services. 4 | 5 | ## Installation 6 | 7 | The library is available on PyPi [here](https://pypi.org/project/modelbox/). Install the library in your environment using pip - 8 | ``` 9 | pip install modelbox 10 | ``` 11 | 12 | 13 | ## Usage 14 | Detailed usage of the Python SDK is demonstrated by the following Jupyter Notebooks - 15 | 16 | 1. [Python SDK Guide](https://github.com/tensorland/modelbox/blob/main/tutorials/Tutorial_Python_SDK.ipynb) 17 | 2. [Pytorch Integration](https://github.com/tensorland/modelbox/blob/main/tutorials/Tutorial_Pytorch.ipynb) -------------------------------------------------------------------------------- /migration/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "migration" 3 | version = "0.1.0" 4 | edition = "2021" 5 | publish = false 6 | 7 | [lib] 8 | name = "migration" 9 | path = "src/lib.rs" 10 | 11 | [dependencies] 12 | async-std = { version = "1", features = ["attributes", "tokio1"] } 13 | 14 | [dependencies.sea-orm-migration] 15 | version = "0.11.0" 16 | features = [ 17 | # Enable at least one `ASYNC_RUNTIME` and `DATABASE_DRIVER` feature if you want to run migration via CLI. 18 | # View the list of supported features at https://www.sea-ql.org/SeaORM/docs/install-and-config/database-and-async-runtime. 19 | # e.g. 20 | "runtime-tokio-rustls", # `ASYNC_RUNTIME` feature 21 | "sqlx-postgres", # `DATABASE_DRIVER` feature 22 | ] 23 | -------------------------------------------------------------------------------- /entity/src/models.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity. Generated by sea-orm-codegen 0.11.1 2 | 3 | use sea_orm::entity::prelude::*; 4 | use serde::{Deserialize, Serialize}; 5 | 6 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq, Serialize, Deserialize)] 7 | #[sea_orm(table_name = "models")] 8 | pub struct Model { 9 | #[sea_orm(primary_key, auto_increment = false)] 10 | pub id: String, 11 | pub name: String, 12 | pub owner: String, 13 | pub namespace: String, 14 | pub task: String, 15 | pub description: String, 16 | pub created_at: TimeDateTime, 17 | pub updated_at: TimeDateTime, 18 | } 19 | 20 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 21 | pub enum Relation {} 22 | 23 | impl ActiveModelBehavior for ActiveModel {} 24 | -------------------------------------------------------------------------------- /sdk-go/config.go: -------------------------------------------------------------------------------- 1 | package client 2 | 3 | import ( 4 | "net" 5 | "os" 6 | 7 | "gopkg.in/yaml.v3" 8 | ) 9 | 10 | type ClientConfig struct { 11 | ServerAddr string `yaml:"server_addr"` 12 | } 13 | 14 | func NewClientConfig(configPath string) (*ClientConfig, error) { 15 | bytes, err := os.ReadFile(configPath) 16 | if err != nil { 17 | return nil, err 18 | } 19 | var config ClientConfig 20 | if err := yaml.Unmarshal(bytes, &config); err != nil { 21 | return nil, err 22 | } 23 | if err := config.validate(); err != nil { 24 | return nil, err 25 | } 26 | return &config, nil 27 | } 28 | 29 | func (c *ClientConfig) validate() error { 30 | if _, _, err := net.SplitHostPort(c.ServerAddr); err != nil { 31 | return err 32 | } 33 | return nil 34 | } 35 | -------------------------------------------------------------------------------- /agents/basic-pytorch-inspector/inspect.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | from modelbox.modelbox import Event, Model, Experiment 4 | 5 | class ModelBoxWorker(ABC): 6 | 7 | @abstractmethod 8 | def handle_event(event: Event): 9 | pass 10 | 11 | def model(self) -> Model: 12 | pass 13 | 14 | def experiment(self) -> Experiment: 15 | pass 16 | 17 | 18 | class PytorchModelInspector(ModelBoxWorker): 19 | 20 | def __init__(self) -> None: 21 | super().__init__() 22 | 23 | def handle_event(event: Event): 24 | pass 25 | 26 | def inspect_model(model): 27 | model.to_onnx() 28 | model.get_num_params() 29 | pass 30 | 31 | if __name__ == "__main__": 32 | print("hello world") -------------------------------------------------------------------------------- /entity/src/experiments.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity. Generated by sea-orm-codegen 0.11.1 2 | 3 | use sea_orm::entity::prelude::*; 4 | use serde::{Deserialize, Serialize}; 5 | 6 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq, Serialize, Deserialize)] 7 | #[sea_orm(table_name = "experiments")] 8 | pub struct Model { 9 | #[sea_orm(primary_key, auto_increment = false)] 10 | pub id: String, 11 | pub name: String, 12 | pub external_id: String, 13 | pub owner: String, 14 | pub namespace: String, 15 | pub ml_framework: i32, 16 | pub created_at: TimeDateTime, 17 | pub updated_at: TimeDateTime, 18 | } 19 | 20 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 21 | pub enum Relation {} 22 | 23 | impl ActiveModelBehavior for ActiveModel {} 24 | -------------------------------------------------------------------------------- /entity/src/metrics.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity. Generated by sea-orm-codegen 0.11.1 2 | 3 | use sea_orm::entity::prelude::*; 4 | use serde::{Deserialize, Serialize}; 5 | 6 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Serialize, Deserialize)] 7 | #[sea_orm(table_name = "metrics")] 8 | pub struct Model { 9 | #[sea_orm(primary_key)] 10 | pub id: i32, 11 | pub object_id: String, 12 | pub name: String, 13 | pub tensor: Option, 14 | #[sea_orm(column_type = "Double", nullable)] 15 | pub double_value: Option, 16 | pub step: Option, 17 | pub wall_clock: Option, 18 | pub created_at: TimeDateTime, 19 | } 20 | 21 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 22 | pub enum Relation {} 23 | 24 | impl ActiveModelBehavior for ActiveModel {} 25 | -------------------------------------------------------------------------------- /entity/src/files.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity. Generated by sea-orm-codegen 0.11.1 2 | 3 | use sea_orm::entity::prelude::*; 4 | use serde::{Deserialize, Serialize}; 5 | 6 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq, Serialize, Deserialize)] 7 | #[sea_orm(table_name = "files")] 8 | pub struct Model { 9 | #[sea_orm(primary_key, auto_increment = false)] 10 | pub id: String, 11 | pub parent_id: String, 12 | pub src_path: String, 13 | pub upload_path: Option, 14 | pub file_type: String, 15 | pub metadata: Json, 16 | pub artifact_name: String, 17 | pub artifact_id: String, 18 | pub created_at: TimeDateTime, 19 | pub updated_at: TimeDateTime, 20 | } 21 | 22 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 23 | pub enum Relation {} 24 | 25 | impl ActiveModelBehavior for ActiveModel {} 26 | -------------------------------------------------------------------------------- /entity/src/mutations.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity. Generated by sea-orm-codegen 0.11.1 2 | 3 | use sea_orm::entity::prelude::*; 4 | use serde::{Deserialize, Serialize}; 5 | 6 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq, Serialize, Deserialize)] 7 | #[sea_orm(table_name = "mutations")] 8 | pub struct Model { 9 | #[sea_orm(primary_key)] 10 | pub id: i32, 11 | pub object_id: String, 12 | pub object_type: i16, 13 | pub mutation_type: i16, 14 | pub namespace: String, 15 | pub experiment_payload: Option, 16 | pub model_payload: Option, 17 | pub model_version_payload: Option, 18 | pub created_at: TimeDateTime, 19 | pub processed_at: Option, 20 | } 21 | 22 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 23 | pub enum Relation {} 24 | 25 | impl ActiveModelBehavior for ActiveModel {} 26 | -------------------------------------------------------------------------------- /website/docs/tutorials/deploy-your-site.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 5 3 | --- 4 | 5 | # Deploy your site 6 | 7 | Docusaurus is a **static-site-generator** (also called **[Jamstack](https://jamstack.org/)**). 8 | 9 | It builds your site as simple **static HTML, JavaScript and CSS files**. 10 | 11 | ## Build your site 12 | 13 | Build your site **for production**: 14 | 15 | ```bash 16 | npm run build 17 | ``` 18 | 19 | The static files are generated in the `build` folder. 20 | 21 | ## Deploy your site 22 | 23 | Test your production build locally: 24 | 25 | ```bash 26 | npm run serve 27 | ``` 28 | 29 | The `build` folder is now served at [http://localhost:3000/](http://localhost:3000/). 30 | 31 | You can now deploy the `build` folder **almost anywhere** easily, **for free** or very small cost (read the **[Deployment Guide](https://docusaurus.io/docs/deployment)**). 32 | -------------------------------------------------------------------------------- /entity/src/model_versions.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity. Generated by sea-orm-codegen 0.11.1 2 | 3 | use sea_orm::entity::prelude::*; 4 | use serde::{Deserialize, Serialize}; 5 | 6 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq, Serialize, Deserialize)] 7 | #[sea_orm(table_name = "model_versions")] 8 | pub struct Model { 9 | #[sea_orm(primary_key, auto_increment = false)] 10 | pub id: String, 11 | pub name: String, 12 | pub model_id: String, 13 | pub namespace: String, 14 | pub experiment_id: String, 15 | pub version: String, 16 | pub description: String, 17 | pub ml_framework: i32, 18 | pub unique_tags: Json, 19 | pub created_at: TimeDateTime, 20 | pub updated_at: TimeDateTime, 21 | } 22 | 23 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 24 | pub enum Relation {} 25 | 26 | impl ActiveModelBehavior for ActiveModel {} 27 | -------------------------------------------------------------------------------- /migration/README.md: -------------------------------------------------------------------------------- 1 | # Running Migrator CLI 2 | 3 | - Generate a new migration file 4 | ```sh 5 | cargo run -- migrate generate MIGRATION_NAME 6 | ``` 7 | - Apply all pending migrations 8 | ```sh 9 | cargo run 10 | ``` 11 | ```sh 12 | cargo run -- up 13 | ``` 14 | - Apply first 10 pending migrations 15 | ```sh 16 | cargo run -- up -n 10 17 | ``` 18 | - Rollback last applied migrations 19 | ```sh 20 | cargo run -- down 21 | ``` 22 | - Rollback last 10 applied migrations 23 | ```sh 24 | cargo run -- down -n 10 25 | ``` 26 | - Drop all tables from the database, then reapply all migrations 27 | ```sh 28 | cargo run -- fresh 29 | ``` 30 | - Rollback all applied migrations, then reapply all migrations 31 | ```sh 32 | cargo run -- refresh 33 | ``` 34 | - Rollback all applied migrations 35 | ```sh 36 | cargo run -- reset 37 | ``` 38 | - Check the status of all migrations 39 | ```sh 40 | cargo run -- status 41 | ``` 42 | -------------------------------------------------------------------------------- /tests/setup.rs: -------------------------------------------------------------------------------- 1 | use entity::experiments::Entity as ExperimentEntity; 2 | use entity::mutations::Entity as MutationEntity; 3 | use sea_orm::entity::prelude::*; 4 | use sea_orm::{Database, DbBackend, DbErr, Schema}; 5 | use sea_query::table::TableCreateStatement; 6 | 7 | pub async fn create_db() -> Result { 8 | let db = Database::connect("sqlite::memory:").await?; 9 | 10 | setup_schema(&db).await?; 11 | 12 | Ok(db) 13 | } 14 | 15 | async fn setup_schema(db: &DbConn) -> Result<(), DbErr> { 16 | // Setup Schema helper 17 | let schema = Schema::new(DbBackend::Sqlite); 18 | 19 | // Derive from Entity 20 | let stmt1: TableCreateStatement = schema.create_table_from_entity(ExperimentEntity); 21 | let stmt2: TableCreateStatement = schema.create_table_from_entity(MutationEntity); 22 | 23 | // Execute create table statement 24 | db.execute(db.get_database_backend().build(&stmt1)).await?; 25 | db.execute(db.get_database_backend().build(&stmt2)).await?; 26 | Ok(()) 27 | } 28 | -------------------------------------------------------------------------------- /python-rpc-client/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | .pytest_cache/ 6 | *.py[cod] 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | .venv/ 14 | env/ 15 | bin/ 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | include/ 26 | man/ 27 | venv/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | pip-selfcheck.json 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | 45 | # Translations 46 | *.mo 47 | 48 | # Mr Developer 49 | .mr.developer.cfg 50 | .project 51 | .pydevproject 52 | 53 | # Rope 54 | .ropeproject 55 | 56 | # Django stuff: 57 | *.log 58 | *.pot 59 | 60 | .DS_Store 61 | 62 | # Sphinx documentation 63 | docs/_build/ 64 | 65 | # PyCharm 66 | .idea/ 67 | 68 | # VSCode 69 | .vscode/ 70 | 71 | # Pyenv 72 | .python-version -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | # Use the latest 2.1 version of CircleCI pipeline process engine. 2 | # See: https://circleci.com/docs/2.0/configuration-reference 3 | version: 2.1 4 | 5 | # Define a job to be invoked later in a workflow. 6 | # See: https://circleci.com/docs/2.0/configuration-reference/#jobs 7 | jobs: 8 | say-hello: 9 | # Specify the execution environment. You can specify an image from Dockerhub or use one of our Convenience Images from CircleCI's Developer Hub. 10 | # See: https://circleci.com/docs/2.0/configuration-reference/#docker-machine-macos-windows-executor 11 | docker: 12 | - image: cimg/base:stable 13 | # Add steps to the job 14 | # See: https://circleci.com/docs/2.0/configuration-reference/#steps 15 | steps: 16 | - checkout 17 | - run: 18 | name: "Say hello" 19 | command: "echo Hello, World!" 20 | 21 | # Invoke jobs via workflows 22 | # See: https://circleci.com/docs/2.0/configuration-reference/#workflows 23 | workflows: 24 | say-hello-workflow: 25 | jobs: 26 | - say-hello 27 | -------------------------------------------------------------------------------- /website/docs/tutorials/create-a-blog-post.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 3 3 | --- 4 | 5 | # Create a Blog Post 6 | 7 | Docusaurus creates a **page for each blog post**, but also a **blog index page**, a **tag system**, an **RSS** feed... 8 | 9 | ## Create your first Post 10 | 11 | Create a file at `blog/2021-02-28-greetings.md`: 12 | 13 | ```md title="blog/2021-02-28-greetings.md" 14 | --- 15 | slug: greetings 16 | title: Greetings! 17 | authors: 18 | - name: Joel Marcey 19 | title: Co-creator of Docusaurus 1 20 | url: https://github.com/JoelMarcey 21 | image_url: https://github.com/JoelMarcey.png 22 | - name: Sébastien Lorber 23 | title: Docusaurus maintainer 24 | url: https://sebastienlorber.com 25 | image_url: https://github.com/slorber.png 26 | tags: [greetings] 27 | --- 28 | 29 | Congratulations, you have made your first post! 30 | 31 | Feel free to play around and edit this post as much you like. 32 | ``` 33 | 34 | A new blog post is now available at [http://localhost:3000/blog/greetings](http://localhost:3000/blog/greetings). 35 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "modelbox" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | entity = {path = "entity"} 10 | migration = {path ="migration"} 11 | tokio = { version = "1.17", features = ["macros", "rt-multi-thread", "full"] } 12 | prost = "0.11.8" 13 | prost-types = "0.11.8" 14 | tokio-stream = "0.1.12" 15 | tonic = "^0.8" 16 | tonic-reflection = "0.6.0" 17 | tracing = "0.1" 18 | tracing-subscriber = "0.3" 19 | clap = { version = "4.1.4", features = ["derive"] } 20 | serde = { version = "1.0", features = ["derive"] } 21 | serde_yaml = "0.9" 22 | thiserror = "1.0.38" 23 | time = { version = "0.3", features = ["macros"] } 24 | serde_json = "1.0.93" 25 | object_store = {version = "0.5.5", features = ["aws", "gcp"]} 26 | sea-orm = { version = "^0", features=["debug-print", "runtime-tokio-rustls", "sqlx-postgres", "sqlx-sqlite", "with-time", "with-json", "mock"] } 27 | sea-query = "0" 28 | 29 | [dev-dependencies] 30 | indoc = "2" 31 | 32 | [build-dependencies] 33 | tonic-build = "0.8.4" 34 | -------------------------------------------------------------------------------- /tests/integration_test.rs: -------------------------------------------------------------------------------- 1 | use time::{PrimitiveDateTime, OffsetDateTime}; 2 | 3 | mod setup; 4 | 5 | use modelbox::repository; 6 | 7 | fn now() -> PrimitiveDateTime { 8 | let n = OffsetDateTime::now_utc(); 9 | PrimitiveDateTime::new(n.date(), n.time()) 10 | } 11 | 12 | #[tokio::test] 13 | async fn test_create_example() { 14 | let db = setup::create_db().await.unwrap(); 15 | let repository = repository::Repository::new_with_db(db); 16 | let experiment = entity::experiments::Model { 17 | id: "abcd".into(), 18 | name: "gpt2".into(), 19 | external_id: "ext_1".into(), 20 | owner: "diptanu@tensorland.ai".into(), 21 | namespace: "langtech".into(), 22 | ml_framework: 1, 23 | created_at: now(), 24 | updated_at: now(), 25 | }; 26 | 27 | repository 28 | .create_exeperiment(experiment.clone()) 29 | .await 30 | .unwrap(); 31 | 32 | let maybe_experiment_out = repository.get_experiment("abcd".into()).await.unwrap(); 33 | assert!(maybe_experiment_out.is_some()); 34 | let experiment_out = maybe_experiment_out.unwrap(); 35 | assert_eq!(experiment_out, experiment); 36 | } 37 | -------------------------------------------------------------------------------- /website/docs/tutorials/create-a-page.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 1 3 | --- 4 | 5 | # Create a Page 6 | 7 | Add **Markdown or React** files to `src/pages` to create a **standalone page**: 8 | 9 | - `src/pages/index.js` → `localhost:3000/` 10 | - `src/pages/foo.md` → `localhost:3000/foo` 11 | - `src/pages/foo/bar.js` → `localhost:3000/foo/bar` 12 | 13 | ## Create your first React Page 14 | 15 | Create a file at `src/pages/my-react-page.js`: 16 | 17 | ```jsx title="src/pages/my-react-page.js" 18 | import React from 'react'; 19 | import Layout from '@theme/Layout'; 20 | 21 | export default function MyReactPage() { 22 | return ( 23 | 24 |

My React page

25 |

This is a React page

26 |
27 | ); 28 | } 29 | ``` 30 | 31 | A new page is now available at [http://localhost:3000/my-react-page](http://localhost:3000/my-react-page). 32 | 33 | ## Create your first Markdown Page 34 | 35 | Create a file at `src/pages/my-markdown-page.md`: 36 | 37 | ```mdx title="src/pages/my-markdown-page.md" 38 | # My Markdown page 39 | 40 | This is a Markdown page 41 | ``` 42 | 43 | A new page is now available at [http://localhost:3000/my-markdown-page](http://localhost:3000/my-markdown-page). 44 | -------------------------------------------------------------------------------- /website/docs/tutorials/create-a-document.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 2 3 | --- 4 | 5 | # Create a Document 6 | 7 | Documents are **groups of pages** connected through: 8 | 9 | - a **sidebar** 10 | - **previous/next navigation** 11 | - **versioning** 12 | 13 | ## Create your first Doc 14 | 15 | Create a Markdown file at `docs/hello.md`: 16 | 17 | ```md title="docs/hello.md" 18 | # Hello 19 | 20 | This is my **first Docusaurus document**! 21 | ``` 22 | 23 | A new document is now available at [http://localhost:3000/docs/hello](http://localhost:3000/docs/hello). 24 | 25 | ## Configure the Sidebar 26 | 27 | Docusaurus automatically **creates a sidebar** from the `docs` folder. 28 | 29 | Add metadata to customize the sidebar label and position: 30 | 31 | ```md title="docs/hello.md" {1-4} 32 | --- 33 | sidebar_label: 'Hi!' 34 | sidebar_position: 3 35 | --- 36 | 37 | # Hello 38 | 39 | This is my **first Docusaurus document**! 40 | ``` 41 | 42 | It is also possible to create your sidebar explicitly in `sidebars.js`: 43 | 44 | ```js title="sidebars.js" 45 | module.exports = { 46 | tutorialSidebar: [ 47 | { 48 | type: 'category', 49 | label: 'Tutorial', 50 | // highlight-next-line 51 | items: ['hello'], 52 | }, 53 | ], 54 | }; 55 | ``` 56 | -------------------------------------------------------------------------------- /website/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "website", 3 | "version": "0.0.0", 4 | "private": true, 5 | "scripts": { 6 | "docusaurus": "docusaurus", 7 | "start": "docusaurus start", 8 | "build": "docusaurus build", 9 | "swizzle": "docusaurus swizzle", 10 | "deploy": "docusaurus deploy", 11 | "clear": "docusaurus clear", 12 | "serve": "docusaurus serve", 13 | "write-translations": "docusaurus write-translations", 14 | "write-heading-ids": "docusaurus write-heading-ids" 15 | }, 16 | "dependencies": { 17 | "@docusaurus/core": "2.1.0", 18 | "@docusaurus/plugin-google-analytics": "^2.1.0", 19 | "@docusaurus/preset-classic": "2.1.0", 20 | "@mdx-js/react": "^1.6.22", 21 | "clsx": "^1.2.1", 22 | "prism-react-renderer": "^1.3.5", 23 | "react": "^17.0.2", 24 | "react-dom": "^17.0.2" 25 | }, 26 | "devDependencies": { 27 | "@docusaurus/module-type-aliases": "2.0.1" 28 | }, 29 | "browserslist": { 30 | "production": [ 31 | ">0.5%", 32 | "not dead", 33 | "not op_mini all" 34 | ], 35 | "development": [ 36 | "last 1 chrome version", 37 | "last 1 firefox version", 38 | "last 1 safari version" 39 | ] 40 | }, 41 | "engines": { 42 | "node": ">=16.14" 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /website/blog/2021-08-26-welcome/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | slug: Hello World 3 | title: Hello From ModelBox! 4 | authors: [diptanu] 5 | tags: [announcement, hello, modelbox] 6 | --- 7 | 8 | ModelBox is a new AI experimentation and model operations service. AI Metadata services are integrated mainly into larger platforms and impose a specific workflow on the users. Most of the tools in this space are also SAAS services, such as HuggingFace, Neptune AI, etc. 9 | 10 | The number of open source services that provide the primitives of logging metadata from experiments, model store/registry are few, and to mention services that are hackable and extendable to suit the needs of an organization with a specific workflow. We try to change that with ModelBox, by building the service with an interface first approach and having implementations that use specific storage technologies, etc. The API of ModelBox uses gRPC, with SDKs built around the API in the most popular languages used by engineers making AI services today. 11 | 12 | In the coming weeks or months, we will write more about how ModelBox can be used to solve common use cases while developing models and help with serving and evaluating trained models. 13 | 14 | Please get in touch with us on the GitHub Discussions or Discord if you want to be involved with the project! -------------------------------------------------------------------------------- /website/src/css/custom.css: -------------------------------------------------------------------------------- 1 | /** 2 | * Any CSS included here will be global. The classic template 3 | * bundles Infima by default. Infima is a CSS framework designed to 4 | * work well for content-centric websites. 5 | */ 6 | 7 | /* You can override the default Infima variables here. */ 8 | :root { 9 | --ifm-color-primary: #382970; 10 | --ifm-color-primary-dark: #322565; 11 | --ifm-color-primary-darker: #30235f; 12 | --ifm-color-primary-darkest: #271d4e; 13 | --ifm-color-primary-light: #3e2d7b; 14 | --ifm-color-primary-lighter: #402f81; 15 | --ifm-color-primary-lightest: #493592; 16 | --ifm-code-font-size: 95%; 17 | --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.1); 18 | } 19 | 20 | /* For readability concerns, you should choose a lighter palette in dark mode. */ 21 | [data-theme='dark'] { 22 | --ifm-color-primary: #0f101a; 23 | --ifm-color-primary-dark: #0e0e17; 24 | --ifm-color-primary-darker: #0d0e16; 25 | --ifm-color-primary-darkest: #0a0b12; 26 | --ifm-color-primary-light: #11121d; 27 | --ifm-color-primary-lighter: #11121e; 28 | --ifm-color-primary-lightest: #141522; 29 | --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.3); 30 | } 31 | 32 | .hero__subtitle { 33 | color: white; 34 | } 35 | 36 | .hero__title { 37 | color: white; 38 | } 39 | 40 | .navbar__link--active { 41 | color: black; 42 | } 43 | 44 | .avatar__name { 45 | color: black; 46 | } -------------------------------------------------------------------------------- /website/src/pages/index.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import clsx from 'clsx'; 3 | import Link from '@docusaurus/Link'; 4 | import useDocusaurusContext from '@docusaurus/useDocusaurusContext'; 5 | import Layout from '@theme/Layout'; 6 | import HomepageFeatures from '@site/src/components/HomepageFeatures'; 7 | 8 | import styles from './index.module.css'; 9 | 10 | function HomepageHeader() { 11 | const {siteConfig} = useDocusaurusContext(); 12 | return ( 13 |
14 |
15 |

{siteConfig.title}

16 |

{siteConfig.tagline}

17 |
18 | 21 | Introduction and Getting Started️ 22 | 23 |
24 |
25 |
26 | ); 27 | } 28 | 29 | export default function Home() { 30 | const {siteConfig} = useDocusaurusContext(); 31 | return ( 32 | 35 | 36 |
37 | 38 |
39 |
40 | ); 41 | } 42 | -------------------------------------------------------------------------------- /.goreleaser.yaml: -------------------------------------------------------------------------------- 1 | 2 | before: 3 | hooks: 4 | - go mod tidy 5 | builds: 6 | - env: 7 | - CGO_ENABLED=0 8 | goos: 9 | - linux 10 | - windows 11 | - darwin 12 | goarch: 13 | - amd64 14 | - arm64 15 | binary: modelbox 16 | main: ./cmd/modelbox/ 17 | 18 | archives: 19 | - id: archive 20 | name_template: "{{ .Binary }}-{{ .Tag }}-{{ .Os }}-{{ .Arch }}" 21 | checksum: 22 | name_template: 'checksums.txt' 23 | snapshot: 24 | name_template: "{{ incpatch .Version }}-next" 25 | changelog: 26 | sort: asc 27 | filters: 28 | exclude: 29 | - '^docs:' 30 | - '^test:' 31 | dockers: 32 | - image_templates: 33 | - "modelboxdotio/modelbox:{{ .Version }}-amd64" 34 | use: buildx 35 | dockerfile: Dockerfile 36 | build_flag_templates: 37 | - "--platform=linux/amd64" 38 | extra_files: 39 | - cmd/modelbox/assets/modelbox_server.yaml 40 | - image_templates: 41 | - "modelboxdotio/modelbox:{{ .Version }}-arm64v8" 42 | use: buildx 43 | goarch: arm64 44 | dockerfile: Dockerfile 45 | build_flag_templates: 46 | - "--platform=linux/arm64/v8" 47 | extra_files: 48 | - cmd/modelbox/assets/modelbox_server.yaml 49 | docker_manifests: 50 | - name_template: modelboxdotio/modelbox:{{ .Version }} 51 | image_templates: 52 | - modelboxdotio/modelbox:{{ .Version }}-amd64 53 | - modelboxdotio/modelbox:{{ .Version }}-arm64v8 54 | skip_push: true 55 | 56 | -------------------------------------------------------------------------------- /deploy/kubernetes/manifests/modelbox.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: modelbox 5 | --- 6 | apiVersion: v1 7 | data: 8 | modelbox.toml: |- 9 | blob_storage = "filesystem" 10 | metadata_storage = "integrated" 11 | listen_addr = ":8085" 12 | 13 | [blob_storage_filesystem] 14 | base_dir = "/tmp/modelboxblobs" 15 | 16 | [metadata_storage_integrated] 17 | path = "/tmp/modelbox.dat" 18 | 19 | [metadata_storage_postgres] 20 | host = "172.17.0.2" 21 | port = 5432 22 | user = "postgres" 23 | kind: ConfigMap 24 | metadata: 25 | name: config 26 | namespace: modelbox 27 | --- 28 | apiVersion: apps/v1 29 | kind: Deployment 30 | metadata: 31 | creationTimestamp: null 32 | labels: 33 | app: modelbox 34 | name: modelbox 35 | namespace: modelbox 36 | spec: 37 | replicas: 1 38 | selector: 39 | matchLabels: 40 | app: modelbox 41 | template: 42 | metadata: 43 | labels: 44 | app: modelbox 45 | spec: 46 | containers: 47 | - image: diptanu/modelbox:0.1 48 | name: modelbox 49 | ports: 50 | - containerPort: 8085 51 | args: 52 | - server 53 | - start 54 | - --config-path=/modelbox/config.toml 55 | volumeMounts: 56 | - name: config 57 | mountPath: /modelbox 58 | volumes: 59 | - name: config 60 | configMap: 61 | name: config 62 | --- 63 | apiVersion: v1 64 | kind: Service 65 | metadata: 66 | name: modelbox 67 | namespace: modelbox 68 | labels: 69 | app: modelbox 70 | spec: 71 | ports: 72 | - port: 8085 73 | protocol: TCP 74 | selector: 75 | app: modelbox 76 | -------------------------------------------------------------------------------- /BUILD.md: -------------------------------------------------------------------------------- 1 | # Building ModelBox 2 | 3 | This guide is intended for users installing Modelbox and developers who wish to contribute to the project to build modelbox from source. 4 | 5 | ## Build using the go toolchain 6 | 7 | ``` 8 | go build -o build/modelbox ./cmd/modelbox 9 | ```` 10 | 11 | ## Build using goreleaser for development 12 | ``` 13 | goreleaser build --single-target --snapshot 14 | ``` 15 | 16 | ## Build using goreleaser for all supported platforms 17 | 18 | This is the preferred way to build ModelBox as it creates binaries for all the versions we support - 19 | 20 | Install goreleaser first - https://goreleaser.com/install/ 21 | 22 | Once gorealeaser is installed, run the following command - 23 | ``` 24 | goreleaser build --snapshot --rm-dist 25 | ``` 26 | 27 | ## Building a Docker Container to serve ModelBox server locally 28 | This will build the modelbox server binary from source and create a Docker container to run the server. This is only possible if goreleaser is invoked in the realease mode. 29 | ``` 30 | goreleaser release --snapshot --rm-dist 31 | ``` 32 | 33 | This creates the following docker images - 34 | ``` 35 | (base) diptanuc@firefly:~/Projects/modelbox$ docker images 36 | REPOSITORY TAG IMAGE ID CREATED SIZE 37 | diptanu/modelbox 0.0.1-next-amd64 893ae15a50b7 2 minutes ago 14.9MB 38 | diptanu/modelbox 0.0.1-next-arm64v8 67ff73eb4604 2 minutes ago 14.4MB 39 | ``` 40 | 41 | The Docker image has a copy of the configuration which is generated by `modelbox server init-config` so it stores metadata in ephemeral storage and uses in-memory storage for metrics. For real usage in production, the config should be modified and bind-mounted into the container at `/app/modelbox.toml` -------------------------------------------------------------------------------- /agents/py/client.py: -------------------------------------------------------------------------------- 1 | import grpc 2 | from modelbox import admin_pb2 3 | from modelbox import admin_pb2_grpc 4 | from google.protobuf import timestamp_pb2 5 | 6 | 7 | class AdminClient: 8 | 9 | def __init__(self, addr): 10 | self._addr = addr 11 | self._channel = grpc.insecure_channel(addr) 12 | self._client = admin_pb2_grpc.ModelBoxAdminStub(self._channel) 13 | 14 | def register_agent(self, node_info: admin_pb2.NodeInfo, name:str) -> admin_pb2.RegisterAgentRequest: 15 | req = admin_pb2.RegisterAgentRequest(node_info=node_info, agent_name=name) 16 | return self._client.RegisterAgent(req) 17 | 18 | def heartbeat(self, node_id: str) -> admin_pb2.HeartbeatResponse: 19 | ts = timestamp_pb2.Timestamp() 20 | req = admin_pb2.HeartbeatRequest(node_id=node_id, at=ts.GetCurrentTime()) 21 | return self._client.Heartbeat(req) 22 | 23 | def get_runnable_actions(self, action: str, arch: str) -> admin_pb2.GetRunnableActionInstancesResponse: 24 | req = admin_pb2.GetRunnableActionInstancesRequest(action_name=action, arch=arch) 25 | return self._client.GetRunnableActionInstances(req) 26 | 27 | def update_action_status(self, action_instance: str, status: int, outcome: int, reason: str, time: int) -> admin_pb2.UpdateActionStatusResponse: 28 | req =admin_pb2.UpdateActionStatusRequest(action_instance_id=action_instance, status=status, outcome=outcome, outcome_reason=reason, update_time=time) 29 | return self._client.UpdateActionStatus(req) 30 | 31 | def close(self): 32 | if self._channel is not None: 33 | self._channel.close() 34 | 35 | def __enter__(self): 36 | return self 37 | 38 | def __exit__(self, exc_type, exc_value, traceback): 39 | return self.close() -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | SHELL := bash 2 | 3 | .PHONY: install-deps 4 | install-deps: 5 | @echo "==> Installing dependencies" 6 | go install github.com/goreleaser/goreleaser@latest 7 | 8 | .PHONY: build 9 | build: 10 | @echo "==> Build Modelbox" 11 | goreleaser release --snapshot --rm-dist 12 | 13 | 14 | .PHONY: sync-docker-hub 15 | sync-docker-hub: 16 | @echo "==> Sync with docker hub" 17 | docker push modelboxdotio/modelbox:0.0.1-next-arm64v8 18 | docker push modelboxdotio/modelbox:0.0.1-next-amd64 19 | docker manifest create modelboxdotio/modelbox:latest modelboxdotio/modelbox:0.0.1-next-arm64v8 modelboxdotio/modelbox:0.0.1-next-amd64 20 | docker manifest push --purge modelboxdotio/modelbox:latest 21 | 22 | .PHONY: test 23 | test-server: 24 | @echo "==> Test Modelbox Server" 25 | go test ./server/storage/... 26 | 27 | .PHONY: install-sdk-py 28 | install-sdk-py: 29 | @echo "==> Installing Python SDK" 30 | cd sdk-py && pip install . 31 | 32 | .PHONY: test-sdk-py 33 | test-sdk-py: 34 | @echo "==> Testing python sdk" 35 | cd sdk-py && pip install . 36 | cd sdk-py && python tests/test_modelbox_api.py 37 | 38 | .PHONY: build-sdk-py 39 | build-sdk-py: 40 | @echo "==> Building modelbox py distribution " 41 | cd sdk-py && python -m build . 42 | 43 | .PHONY: upload-sdk-py-test 44 | upload-sdk-py-test: 45 | @echo "===> Uploading to test.pypi" 46 | pip install twine 47 | cd sdk-py && twine upload --repository testpypi dist/* 48 | 49 | .PHONY: upload-sdk-py 50 | upload-sdk-py: 51 | @echo "===> Uploading to pypi" 52 | pip install twine 53 | cd sdk-py && twine upload dist/* 54 | 55 | 56 | .PHONY: update-protos 57 | update-protos: 58 | @echo "==> Updating protos" 59 | ./proto/gen_proto.sh 60 | 61 | .PHONY: gen-static 62 | gen-static: 63 | @echo "==> Generating static files" 64 | cd cmd/modelbox && go-bindata assets/... 65 | -------------------------------------------------------------------------------- /website/docs/getting_started.md: -------------------------------------------------------------------------------- 1 | # Getting Started 2 | 3 | This guide aims to get an instance of ModelBox started and have you train a model, and then look at the metadata and metrics collected by ModelBox to analyze the experiment, models and other artifacts. 4 | 5 | There are three ways to get started with ModelBox. The docker-compose method is preferred if you have Docker installed on your machine. If you don't have Docker, GitPod would be the second best alternative. 6 | Lastly, you could download the ModelBox binary and run it locally as well, either with ephemeral storage or use the various storage dependencies. 7 | 8 | ## Docker Compose 9 | 10 | This is the quickest way to get started if you have docker and docker-compose installed. 11 | 12 | ``` 13 | docker compose --profile local up 14 | ``` 15 | 16 | This starts the ModelBox server with all the dependencies and a container with a Jupyter Notebook that demonstrates how to integrate a Pytorch trainer with ModelBox. 17 | 18 | The ModelBox server hosts the API at the address - `172.21.0.2:8085` 19 | 20 | The Jupyter notebook with the tutorials is available at the address - `https://localhost:8888` 21 | 22 | Train a Pytorch Model by following the [notebook](https://github.com/tensorland/modelbox/blob/main/tutorials/Pytorch_Lightning_Integration_Tutorial.ipynb) 23 | 24 | 25 | 26 | 27 | ## Local Server with Ephemeral Metadata Storage 28 | 29 | 1. Install the dependencies 30 | 31 | 2. Build ModelBox 32 | 33 | 3. Generate the Server and client configs. 34 | 35 | 4. Train a Model 36 | 37 | ## Local Server with Local Datastores 38 | 39 | 1. Follow steps 1-3 from the above section which demonstrates how to run modelbox locally. 40 | 41 | 2. Install Postgres/MySQL Server. 42 | 43 | 3. Decide which metrics backend to use. 44 | 45 | 4. Decide which blob storage backend to use. 46 | 47 | 5. Start the server and train a model. -------------------------------------------------------------------------------- /website/sidebars.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Creating a sidebar enables you to: 3 | - create an ordered group of docs 4 | - render a sidebar for each doc of that group 5 | - provide next/previous navigation 6 | 7 | The sidebars can be generated from the filesystem, or explicitly defined here. 8 | 9 | Create as many sidebars as you want. 10 | */ 11 | 12 | // @ts-check 13 | 14 | /** @type {import('@docusaurus/plugin-content-docs').SidebarsConfig} */ 15 | const sidebars = { 16 | // By default, Docusaurus generates a sidebar from the docs folder structure 17 | //tutorialSidebar: [{type: 'autogenerated', dirName: '.'}], 18 | 19 | docs:[ 20 | 'intro', 21 | 'getting_started', 22 | 'install', 23 | 'compare', 24 | { 25 | type: 'category', 26 | label: 'Tutorials', 27 | 28 | link: { 29 | type: 'generated-index', 30 | description: "Tutorials for performing common use-cases with ModelBox", 31 | }, 32 | collapsed: false, 33 | items: [ 34 | 'tutorials/log-metadata-pytorch', 35 | ], 36 | }, 37 | { 38 | type: 'category', 39 | label: 'Guides', 40 | link: { 41 | type: 'generated-index', 42 | description: "In-depth guide to using and operating ModelBox", 43 | }, 44 | collapsed: false, 45 | items: [ 46 | 'guides/concepts', 47 | 'guides/configuration', 48 | 'guides/experiments', 49 | 'guides/models', 50 | 'guides/metrics', 51 | 'guides/model-distribution', 52 | 'guides/tensorboard', 53 | 'guides/python-sdk', 54 | 'guides/cli', 55 | 'guides/service-observability', 56 | 'guides/develop', 57 | ], 58 | }, 59 | ], 60 | 61 | /* 62 | tutorialSidebar: [ 63 | { 64 | type: 'category', 65 | label: 'Tutorial', 66 | items: ['hello'], 67 | }, 68 | ], 69 | */ 70 | }; 71 | 72 | module.exports = sidebars; 73 | -------------------------------------------------------------------------------- /sdk-py/modelbox/notebook_widgets.py: -------------------------------------------------------------------------------- 1 | from email import header 2 | from importlib.metadata import metadata 3 | from modelbox.modelbox import Experiment 4 | 5 | from tabulate import tabulate 6 | from IPython.display import Markdown 7 | 8 | import matplotlib.pyplot as plt 9 | 10 | 11 | class ExperimentDisplay: 12 | def __init__(self, experiment: Experiment) -> None: 13 | self._experiment = experiment 14 | 15 | def info(self): 16 | experiment_info = [ 17 | ["id", self._experiment.id], 18 | ["name", self._experiment.name], 19 | ["owner", self._experiment.owner], 20 | ["namespace", self._experiment.namespace], 21 | ["creation time", self._experiment.created_at], 22 | ["updated time", self._experiment.updated_at], 23 | ] 24 | return Markdown( 25 | tabulate(experiment_info, headers=["Experiment", ""], tablefmt="github") 26 | + "\n" 27 | + "##### Metadata" 28 | + "\n" 29 | + tabulate( 30 | self._experiment.metadata().metadata.items(), 31 | headers=["", ""], 32 | tablefmt="github", 33 | ) 34 | ) 35 | 36 | def metrics(self): 37 | all_metrics = self._experiment.all_metrics() 38 | fig, axs = plt.subplots(len(all_metrics.keys())) 39 | index = 0 40 | for key, metrics in all_metrics.items(): 41 | m_values = [(mv.step, mv.value) for mv in metrics] 42 | axs[index].plot(*zip(*m_values)) 43 | index = index + 1 44 | plt.show() 45 | 46 | 47 | def events(self): 48 | events = self._experiment.events() 49 | events_table =[] 50 | for event in events: 51 | events_table.append((event.wallclock_time, event.name, event.source.name)) 52 | return Markdown(tabulate(events_table, headers=["wallclock", "event", "source"], tablefmt="github")) 53 | 54 | 55 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use std::path::PathBuf; 2 | 3 | use clap::{Args, Parser, Subcommand}; 4 | use server_config::ServerConfig; 5 | mod agent; 6 | mod grpc_server; 7 | mod model_helper; 8 | mod modelbox; 9 | mod server_config; 10 | mod repository; 11 | 12 | #[tokio::main] 13 | async fn main() { 14 | let cli = Cli::parse(); 15 | let subscriber = tracing_subscriber::FmtSubscriber::new(); 16 | tracing::subscriber::set_global_default(subscriber).unwrap(); 17 | 18 | if cli.config_path.is_none() { 19 | panic!("config path is required") 20 | } 21 | 22 | match cli.command { 23 | Commands::Server(server) => match server.commands { 24 | StartCommands::Start => { 25 | start_agent(cli.config_path.unwrap()).await; 26 | } 27 | StartCommands::InitConfig => { 28 | ServerConfig::generate_config(cli.config_path.unwrap()) 29 | .unwrap_or_else(|e| panic!("unable to write config {}", e)); 30 | } 31 | }, 32 | } 33 | } 34 | 35 | async fn start_agent(config_path: PathBuf) { 36 | let config = ServerConfig::from_path(config_path) 37 | .unwrap_or_else(|e| panic!("unable to read config {}", e)); 38 | let agent = agent::Agent::new(config).await; 39 | tokio::select! { 40 | _ = agent.start() => { 41 | println!("agent has stopped runnning") 42 | } 43 | _ = agent.wait_for_signal() => {} 44 | } 45 | } 46 | 47 | #[derive(Debug, Parser)] 48 | #[command(about = "tensorland cli", long_about = None)] 49 | struct Cli { 50 | #[arg(global = true)] 51 | config_path: Option, 52 | 53 | #[command(subcommand)] 54 | command: Commands, 55 | } 56 | 57 | #[derive(Debug, Subcommand)] 58 | enum Commands { 59 | Server(StartArgs), 60 | } 61 | 62 | #[derive(Debug, Args)] 63 | struct StartArgs { 64 | #[command(subcommand)] 65 | commands: StartCommands, 66 | } 67 | 68 | #[derive(Debug, Subcommand)] 69 | enum StartCommands { 70 | Start, 71 | InitConfig, 72 | } 73 | -------------------------------------------------------------------------------- /website/docs/guides/metrics.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 4 3 | --- 4 | 5 | # Logging Experiment and Model Metrics 6 | ModelBox integrates with metrics storage services to store training hardware, experiment and model metrics. 7 | 8 | ## Python SDK 9 | Metrics can be logged against any object in ModelBox - models, experiments, specific model versions, etc. A `MetricValue` is logged for the object id at a given timestamp. 10 | 11 | ### API 12 | * `MetricValue` 13 | ``` 14 | class MetricValue: 15 | step: int 16 | wallclock_time: int 17 | value: Union[float, str, bytes] 18 | ``` 19 | 20 | The value could be a float to represent a scaler value or bytes or strings to represent serialized tensors. 21 | The `step` is optional and should be a real number if it represents the logical step at a given time of an experiment. 22 | The `wallclock` time is the physical clock time at which the metric was logged. 23 | 24 | * SDK API 25 | 26 | ``` 27 | log_metrics(self, parent_id: str, key: str, value: MetricValue) 28 | ``` 29 | 30 | ## gRPC API 31 | ``` 32 | // Log Metrics for an experiment, model or checkpoint 33 | rpc LogMetrics(LogMetricsRequest) returns (LogMetricsResponse); 34 | 35 | // Get metrics logged for an experiment, model or checkpoint. 36 | rpc GetMetrics(GetMetricsRequest) returns (GetMetricsResponse); 37 | 38 | // Metrics contain the metric values for a given key 39 | message Metrics { 40 | string key = 1; 41 | 42 | repeated MetricsValue values = 2; 43 | } 44 | 45 | // Metric Value at a given point of time. 46 | message MetricsValue { 47 | uint64 step = 1; 48 | 49 | uint64 wallclock_time = 2; 50 | 51 | oneof value { 52 | float f_val = 5; 53 | 54 | string s_tensor = 6; 55 | 56 | bytes b_tensor = 7; 57 | } 58 | } 59 | 60 | // Message for logging a metric value at a given time 61 | message LogMetricsRequest { 62 | string parent_id = 1; 63 | 64 | string key = 2; 65 | 66 | MetricsValue value = 3; 67 | } 68 | 69 | message LogMetricsResponse {} 70 | 71 | message GetMetricsRequest { 72 | string parent_id = 1; 73 | } 74 | 75 | message GetMetricsResponse { 76 | repeated Metrics metrics = 1; 77 | } 78 | ``` 79 | -------------------------------------------------------------------------------- /website/src/components/HomepageFeatures/index.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import clsx from 'clsx'; 3 | import styles from './styles.module.css'; 4 | 5 | const FeatureList = [ 6 | { 7 | title: 'AI Metadata Store', 8 | Svg: require('@site/static/img/undraw_docusaurus_mountain.svg').default, 9 | description: ( 10 | <> 11 | Log, Search and Compare metadata from experiments, models and checkpoints. 12 | Support for logging metrics and events from various services involved in training 13 | and deployment of models. 14 | 15 | ), 16 | }, 17 | { 18 | title: 'Model Registry', 19 | Svg: require('@site/static/img/undraw_docusaurus_tree.svg').default, 20 | description: ( 21 | <> 22 | Track and store trained models or upload them to a built in artifact server. Automatically 23 | apply recipes to transform models to representations understood by inference platforms 24 | and runtimes. 25 | 26 | ), 27 | }, 28 | { 29 | title: 'Continuous Model Evaluation', 30 | Svg: require('@site/static/img/undraw_docusaurus_react.svg').default, 31 | description: ( 32 | <> 33 | Measure performance metrics such as throughput and accuracy of models. 34 | Write deployment checks based on metrics to automatically tag models to promote to production. 35 | 36 | ), 37 | }, 38 | ]; 39 | 40 | function Feature({Svg, title, description}) { 41 | return ( 42 |
43 |
44 | 45 |
46 |
47 |

{title}

48 |

{description}

49 |
50 |
51 | ); 52 | } 53 | 54 | export default function HomepageFeatures() { 55 | return ( 56 |
57 |
58 |
59 | {FeatureList.map((props, idx) => ( 60 | 61 | ))} 62 |
63 |
64 |
65 | ); 66 | } 67 | -------------------------------------------------------------------------------- /website/docs/intro.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 1 3 | --- 4 | 5 | # Introduction 6 | 7 | ModelBox is an AI model and experiment metadata management service. It provides primitives such as metadata management, model storage, distribution, and versioning for Deep Learning frameworks. 8 | 9 | Metadata and events can be exported as a stream by other systems to facilitate bespoke workflows such as compliance, access control, auditing, and deployment of models. 10 | 11 | ![Intro_Img](guides/img/modelbox_high_level_picture.png) 12 | 13 | ## Features 14 | #### Experiment Metadata and Metrics Logging 15 | - Log hyperparameters, accuracy/loss, and other quality-related metrics during training. 16 | - Log trainer events such as data-loading and checkpoint operations, epoch start and end times to help debug performance issues. 17 | 18 | #### Model Management 19 | - Log metadata associated with a model, such as binaries, notebooks, model metrics, etc. 20 | - Manage the lineage of models with experiments and datasets used to train the model. 21 | - Label models with valuable metadata for operational purposes, such as the services consuming them, privacy sensitivity, etc. 22 | - Load models and deployment artifacts in inference services directly from ModelBox. 23 | 24 | #### Events 25 | - Log events about the system/trainer state during training and models from experiment jobs, workflow systems, and other AI/MLOps services. 26 | - Any changes made to experiments and model metadata are logged as change events in the system. 27 | - External systems can watch events in real-time and trigger custom workflows. 28 | 29 | #### SDK 30 | - SDKs in Python, Go, Rust and C++ to integrate with ML frameworks and inference services. 31 | - SDK is built on top of gRPC. 32 | 33 | #### Reliable and Easy to Use Control Plane 34 | - Features and APIs are designed with reliability in mind. 35 | - The service is built and distributed as a single binary. 36 | - Metrics related to the control plane, such as API latency, database connection stats, and system resource usage, are available as Prometheus metrics. 37 | 38 | #### Extensibility 39 | - Hackable and Interface first design 40 | - Additional data store backends can be supported easily. 41 | 42 | ## Supported storage backends 43 | 44 | #### Metadata 45 | - MySQL 46 | - PostgreSQL 47 | - Ephemeral Storage 48 | 49 | #### Metrics 50 | - Timescaledb 51 | - Ephemeral Storage 52 | 53 | #### Artifacts/Blobs 54 | - AWS S3 55 | - File System(Ephemeral, NFS) 56 | -------------------------------------------------------------------------------- /docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | networks: 3 | dependencies: 4 | ipam: 5 | config: 6 | - subnet: 172.20.0.0/24 7 | server: 8 | ipam: 9 | config: 10 | - subnet: 172.21.0.0/24 11 | services: 12 | modelbox: 13 | image: modelboxdotio/modelbox:0.0.1-next-amd64 14 | profiles: ["local"] 15 | command: ["/app/scripts/compose_start.sh"] 16 | volumes: 17 | - type: volume 18 | source: data 19 | target: /tmp # stores data 20 | - type: bind 21 | source: ./cmd/modelbox/assets/ 22 | target: "/app/config" # binds config file 23 | - type: bind 24 | source: ./deploy/scripts/ 25 | target: "/app/scripts/" 26 | - type: bind 27 | source: ./server/storage/schemas/ 28 | target: /app/schemas/ 29 | depends_on: 30 | - "postgres" 31 | - "timescaledb" 32 | networks: 33 | server: 34 | ipv4_address: 172.21.0.2 35 | dependencies: 36 | postgres: 37 | image: postgres 38 | profiles: ["local", "unittests"] 39 | restart: always 40 | environment: 41 | - POSTGRES_PASSWORD=foo 42 | networks: 43 | dependencies: 44 | ipv4_address: 172.20.0.5 45 | mysql: 46 | image: mysql 47 | profiles: ["unittests"] 48 | command: --default-authentication-plugin=mysql_native_password 49 | restart: always 50 | networks: 51 | dependencies: 52 | ipv4_address: 172.20.0.6 53 | environment: 54 | - MYSQL_ROOT_PASSWORD=foo 55 | timescaledb: 56 | image: timescale/timescaledb:latest-pg14 57 | profiles: ["local", "unittests"] 58 | restart: always 59 | networks: 60 | dependencies: 61 | ipv4_address: 172.20.0.7 62 | environment: 63 | - POSTGRES_PASSWORD=foo 64 | jupyter: 65 | image: jupyter/scipy-notebook:0fd03d9356de 66 | command: ["/home/jovyan/work/scripts/jupyter_server_start.sh"] 67 | profiles: ["local"] 68 | restart: always 69 | ports: 70 | - "8888:8888" 71 | volumes: 72 | - type: bind 73 | source: ./sdk-py/ 74 | target: "/home/jovyan/work/sdk-py/" # binds config file 75 | - type: bind 76 | source: ./tutorials/ 77 | target: "/home/jovyan/work/tutorials/" 78 | - type: bind 79 | source: ./deploy/scripts 80 | target: "/home/jovyan/work/scripts/" 81 | 82 | networks: 83 | server: 84 | ipv4_address: 172.21.0.5 85 | 86 | volumes: 87 | data: 88 | -------------------------------------------------------------------------------- /python-rpc-client/.github/workflows/CI.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - main 5 | - master 6 | pull_request: 7 | workflow_dispatch: 8 | 9 | jobs: 10 | linux: 11 | runs-on: ubuntu-latest 12 | strategy: 13 | matrix: 14 | target: [x86_64, x86, aarch64, armv7, s390x, ppc64le] 15 | steps: 16 | - uses: actions/checkout@v3 17 | - uses: actions/setup-python@v4 18 | with: 19 | python-version: '3.10' 20 | - name: Build wheels 21 | uses: PyO3/maturin-action@v1 22 | with: 23 | target: ${{ matrix.target }} 24 | args: --release --out dist --find-interpreter 25 | manylinux: auto 26 | - name: Upload wheels 27 | uses: actions/upload-artifact@v3 28 | with: 29 | name: wheels 30 | path: dist 31 | 32 | windows: 33 | runs-on: windows-latest 34 | strategy: 35 | matrix: 36 | target: [x64, x86] 37 | steps: 38 | - uses: actions/checkout@v3 39 | - uses: actions/setup-python@v4 40 | with: 41 | python-version: '3.10' 42 | architecture: ${{ matrix.target }} 43 | - name: Build wheels 44 | uses: PyO3/maturin-action@v1 45 | with: 46 | target: ${{ matrix.target }} 47 | args: --release --out dist --find-interpreter 48 | - name: Upload wheels 49 | uses: actions/upload-artifact@v3 50 | with: 51 | name: wheels 52 | path: dist 53 | 54 | macos: 55 | runs-on: macos-latest 56 | strategy: 57 | matrix: 58 | target: [x86_64, aarch64] 59 | steps: 60 | - uses: actions/checkout@v3 61 | - uses: actions/setup-python@v4 62 | with: 63 | python-version: '3.10' 64 | - name: Build wheels 65 | uses: PyO3/maturin-action@v1 66 | with: 67 | target: ${{ matrix.target }} 68 | args: --release --out dist --find-interpreter 69 | - name: Upload wheels 70 | uses: actions/upload-artifact@v3 71 | with: 72 | name: wheels 73 | path: dist 74 | 75 | release: 76 | name: Release 77 | runs-on: ubuntu-latest 78 | if: "startsWith(github.ref, 'refs/tags/')" 79 | needs: [linux, windows, macos] 80 | steps: 81 | - uses: actions/download-artifact@v3 82 | with: 83 | name: wheels 84 | - name: Publish to PyPI 85 | uses: PyO3/maturin-action@v1 86 | env: 87 | MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} 88 | with: 89 | command: upload 90 | args: --skip-existing * 91 | -------------------------------------------------------------------------------- /proto/admin.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | import "google/protobuf/timestamp.proto"; 3 | import "google/protobuf/struct.proto"; 4 | 5 | option go_package = "github.com/tensorland/modelbox/sdk-go/proto"; 6 | 7 | package modelbox; 8 | 9 | // The RPC interface used by the workers 10 | service ModelBoxAdmin { 11 | // Register an agent capable of running plugins 12 | rpc RegisterAgent(RegisterAgentRequest) returns (RegisterAgentResponse); 13 | // Workers heartbeat with the server about their presence 14 | // and work progress periodically 15 | rpc Heartbeat(HeartbeatRequest) returns (HeartbeatResponse); 16 | 17 | // Download the list of work that can be exectuted by a action runner 18 | rpc GetRunnableActionInstances(GetRunnableActionInstancesRequest) returns (GetRunnableActionInstancesResponse); 19 | 20 | // Update action status 21 | rpc UpdateActionStatus(UpdateActionStatusRequest) returns (UpdateActionStatusResponse); 22 | 23 | // Returns the list of servers in a cluster. 24 | rpc GetClusterMembers(GetClusterMembersRequest) returns (GetClusterMembersResponse); 25 | } 26 | 27 | message GetClusterMembersRequest {} 28 | 29 | message ClusterMember { 30 | string id = 1; 31 | string host_name = 2; 32 | string rpc_addr = 3; 33 | string http_addr = 4; 34 | } 35 | 36 | message GetClusterMembersResponse { 37 | repeated ClusterMember members = 1; 38 | } 39 | message NodeInfo { 40 | string host_name = 1; 41 | string ip_addr = 2; 42 | string arch = 3; 43 | } 44 | 45 | message HeartbeatRequest { 46 | string node_id = 1; 47 | google.protobuf.Timestamp at = 20; 48 | } 49 | 50 | message HeartbeatResponse { 51 | } 52 | 53 | message SubscribeEventRequest { 54 | string namespace = 1; 55 | string ml_framework = 2; 56 | string owner = 3; 57 | repeated string actions = 4; 58 | } 59 | 60 | message RegisterAgentRequest { 61 | NodeInfo node_info = 1; 62 | string agent_name = 2; 63 | } 64 | 65 | message RegisterAgentResponse{ 66 | string node_id = 1; 67 | } 68 | 69 | message GetRunnableActionInstancesRequest { 70 | string action_name = 1; 71 | string arch = 2; 72 | } 73 | 74 | message RunnableAction { 75 | string id = 1; 76 | string action_id = 2; 77 | string command = 3; 78 | map params = 5; 79 | } 80 | 81 | message GetRunnableActionInstancesResponse { 82 | repeated RunnableAction instances = 1; 83 | } 84 | 85 | message UpdateActionStatusRequest { 86 | string action_instance_id = 1; 87 | // Make the following uint32 enum 88 | uint32 status = 2; 89 | uint32 outcome = 3; 90 | string outcome_reason = 4; 91 | uint64 udpate_time = 5; 92 | } 93 | 94 | message UpdateActionStatusResponse { 95 | 96 | } -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/tensorland/modelbox 2 | 3 | go 1.19 4 | 5 | require ( 6 | github.com/VividCortex/mysqlerr v1.0.0 7 | github.com/aws/aws-sdk-go v1.44.82 8 | github.com/go-chi/chi/v5 v5.0.7 9 | github.com/go-sql-driver/mysql v1.6.0 10 | github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 11 | github.com/improbable-eng/grpc-web v0.15.0 12 | github.com/jmoiron/sqlx v1.3.5 13 | github.com/lib/pq v1.2.0 14 | github.com/mattn/go-sqlite3 v1.14.16 15 | github.com/olekukonko/tablewriter v0.0.5 16 | github.com/prometheus/client_golang v1.13.0 17 | github.com/robertkrimen/otto v0.2.1 18 | github.com/shirou/gopsutil/v3 v3.22.9 19 | github.com/spf13/cobra v1.5.0 20 | github.com/stretchr/testify v1.8.1 21 | github.com/vmihailenco/msgpack/v5 v5.3.5 22 | go.uber.org/zap v1.21.0 23 | google.golang.org/grpc v1.46.0 24 | google.golang.org/protobuf v1.28.1 25 | gopkg.in/yaml.v3 v3.0.1 26 | ) 27 | 28 | require ( 29 | github.com/beorn7/perks v1.0.1 // indirect 30 | github.com/cenkalti/backoff/v4 v4.1.1 // indirect 31 | github.com/cespare/xxhash/v2 v2.1.2 // indirect 32 | github.com/davecgh/go-spew v1.1.1 // indirect 33 | github.com/desertbit/timer v0.0.0-20180107155436-c41aec40b27f // indirect 34 | github.com/go-ole/go-ole v1.2.6 // indirect 35 | github.com/golang/protobuf v1.5.2 // indirect 36 | github.com/inconshreveable/mousetrap v1.0.0 // indirect 37 | github.com/jmespath/go-jmespath v0.4.0 // indirect 38 | github.com/klauspost/compress v1.11.7 // indirect 39 | github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect 40 | github.com/mattn/go-runewidth v0.0.9 // indirect 41 | github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect 42 | github.com/pmezard/go-difflib v1.0.0 // indirect 43 | github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect 44 | github.com/prometheus/client_model v0.2.0 // indirect 45 | github.com/prometheus/common v0.37.0 // indirect 46 | github.com/prometheus/procfs v0.8.0 // indirect 47 | github.com/rs/cors v1.7.0 // indirect 48 | github.com/spf13/pflag v1.0.5 // indirect 49 | github.com/tklauser/go-sysconf v0.3.10 // indirect 50 | github.com/tklauser/numcpus v0.4.0 // indirect 51 | github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect 52 | github.com/yusufpapurcu/wmi v1.2.2 // indirect 53 | go.uber.org/atomic v1.7.0 // indirect 54 | go.uber.org/multierr v1.6.0 // indirect 55 | golang.org/x/net v0.0.0-20220225172249-27dd8689420f // indirect 56 | golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f // indirect 57 | golang.org/x/text v0.4.0 // indirect 58 | google.golang.org/genproto v0.0.0-20210126160654-44e461bb6506 // indirect 59 | gopkg.in/sourcemap.v1 v1.0.5 // indirect 60 | nhooyr.io/websocket v1.8.6 // indirect 61 | ) 62 | -------------------------------------------------------------------------------- /website/docs/guides/develop.md: -------------------------------------------------------------------------------- 1 | # Developing ModelBox 2 | 3 | We cover how to contribute to ModelBox using GitPod and some miscellaneous topics related to the development of the service and SDK. 4 | 5 | ## Gitpod 6 | 7 | Gitpod provides an ephemeral development environment which is ideal for - 8 | 1. Getting started with ModelBox and evaluate the service if you don't have Docker available locally. 9 | 2. Contributing code to ModelBox without setting up the development environment locally. 10 | 11 | Click the following button to have a GitPod workspace. When the workspace starts, it will automatically bring up a docker environment in a terminal and the PyTorch notebook can be run inside the workspace. 12 | 13 | [![Open in Gitpod](https://gitpod.io/button/open-in-gitpod.svg)](https://gitpod.io/#https://github.com/tensorland/modelbox) 14 | 15 | 16 | ## Install Python SDK 17 | 18 | The Python SDK can be installed in the local environment using `pip`. Run the following command from the root directory. 19 | 20 | ``` 21 | make install-sdk-py 22 | ``` 23 | 24 | We automatically install the python lib in the jupyter container. 25 | 26 | ## Build Python SDK and push to PyPi 27 | 28 | ### Create a source distribution of modelbox 29 | ``` 30 | make build-sdk-py 31 | ``` 32 | 33 | ### Upload to test pypi 34 | ``` 35 | make upload-sdk-py-test 36 | ``` 37 | 38 | 39 | ### Upload to PyPi 40 | ``` 41 | make upload-sdk-py 42 | ``` 43 | 44 | 45 | ## Run Tests 46 | 47 | The tests require the dependencies of the server to be started first. 48 | 49 | 1. Start the dependencies for tests 50 | ``` 51 | docker compose --profile unittests up 52 | ``` 53 | 54 | 2. Run the server tests 55 | ``` 56 | make test-server 57 | ``` 58 | 59 | The SDK tests only the interfaces to the server and other logic internal to the library, so a full-blown server is not required. We create an ephemeral gRPC server before running the tests. The following commands will run the SDK tests. 60 | 61 | ``` 62 | cd sdk-py 63 | python tests/test_modelbox_api.py 64 | ``` 65 | 66 | ## Push latest snapshot containers of ModelBox to DockerHub 67 | 68 | 1. Build the server and the docker containers. 69 | ``` 70 | goreleaser release --snapshot --rm-dist 71 | ``` 72 | 73 | 2. Push the images to dockerhub. *Replace the versions* 74 | ``` 75 | docker push modelboxdotio/modelbox:0.0.1-next-arm64v8 76 | docker push modelboxdotio/modelbox:0.0.1-next-amd64 77 | ``` 78 | 79 | 3. Create a manifest to link the containers of various arch under one tag 80 | ``` 81 | docker manifest create modelboxdotio/modelbox:latest modelboxdotio/modelbox:0.0.1-next-arm64v8 modelboxdotio/modelbox:0.0.1-next-amd64 82 | ``` 83 | 84 | 4. Push the manifest 85 | ``` 86 | docker manifest push --purge modelboxdotio/modelbox:latest 87 | ``` -------------------------------------------------------------------------------- /src/agent.rs: -------------------------------------------------------------------------------- 1 | use super::server_config::ServerConfig; 2 | use object_store::aws::AmazonS3Builder; 3 | use object_store::gcp::GoogleCloudStorageBuilder; 4 | use object_store::local::LocalFileSystem; 5 | use std::sync::Arc; 6 | use tokio::signal; 7 | 8 | pub struct Agent { 9 | grpc_agent: crate::grpc_server::GrpcServer, 10 | repository: Arc, 11 | } 12 | 13 | impl Agent { 14 | pub async fn new(config: ServerConfig) -> Self { 15 | tracing::info!("creating agent"); 16 | 17 | tracing::info!( 18 | "creating object store client: {:?}", 19 | config.object_store.provider 20 | ); 21 | let object_store = Agent::get_object_store(&config) 22 | .unwrap_or_else(|e| panic!("unable to create object store client {}", e)); 23 | let grpc_agent = 24 | super::grpc_server::GrpcServer::new(config.grpc_listen_addr.clone(), object_store) 25 | .unwrap_or_else(|e| panic!("unable to create grpc server {}", e)); 26 | let respository = super::repository::Repository::new(&config.database_url()) 27 | .await 28 | .unwrap_or_else(|e| panic!("unable to create db {}", e)); 29 | tracing::info!("finished creating agent"); 30 | Agent { 31 | grpc_agent, 32 | repository: Arc::new(respository), 33 | } 34 | } 35 | 36 | pub fn get_object_store( 37 | server_config: &ServerConfig, 38 | ) -> Result, Box> { 39 | match server_config.object_store.provider { 40 | super::server_config::ObjectStoreProvider::S3 => { 41 | let s3 = AmazonS3Builder::from_env() 42 | .with_bucket_name(&server_config.object_store.bucket) 43 | .build()?; 44 | Ok(Arc::new(s3)) 45 | } 46 | super::server_config::ObjectStoreProvider::Gcs => { 47 | let gcs = GoogleCloudStorageBuilder::from_env() 48 | .with_bucket_name(&server_config.object_store.bucket) 49 | .build()?; 50 | Ok(Arc::new(gcs)) 51 | } 52 | super::server_config::ObjectStoreProvider::FileSystem => { 53 | let fs = LocalFileSystem::new_with_prefix(&server_config.object_store.bucket)?; 54 | Ok(Arc::new(fs)) 55 | } 56 | } 57 | } 58 | 59 | pub async fn start(&self) -> Result<(), Box> { 60 | tracing::info!("starting grpc server"); 61 | let agent = &self.grpc_agent; 62 | agent.start(self.repository.clone()).await?; 63 | Ok(()) 64 | } 65 | 66 | pub async fn wait_for_signal(&self) { 67 | match signal::ctrl_c().await { 68 | Ok(()) => { 69 | tracing::info!("received sigterm, shutting down cleanly"); 70 | } 71 | Err(err) => { 72 | tracing::error!("unable to listen for shutdown signal: {}", err); 73 | } 74 | } 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /sdk-py/README.md: -------------------------------------------------------------------------------- 1 | # ModelBox Python API 2 | 3 | This package contains the client library for the ModelBox API for managing Deep Learning models, checkpoints from experiments, and other model operations related services. 4 | 5 | ## Concepts and Understanding the ModelBox API 6 | 7 | ### Namespace 8 | A Namespace is a mechanism to organize related models or models published by a team. They are also use for access control and such to the metadata of uploaded models, invoking benchmarks or other model transformation work. Namespaces are automatically created when a new model or experieemnt specifies the namespace it wants to be associated with. 9 | 10 | ### Model 11 | A model is an object to track common metadata, and to apply policies on models created by experiments to solve a machine learning task. For ex. datasets to evaluate all trained models of a task can be tracked using this object. Users can also add rules around retention policies of trained versions, setup policies for labelling a trained model if it has better metrics on a dataset, and meets all other criterion. 12 | 13 | ### Model Version 14 | A model version is a trained model, it includes the model binary, related files that a user wants to track - dataset file handles, any other metadata, model metrics, etc. Model versions are always related to a Model and all the policies created for a Model are applied to Model Versions. 15 | 16 | ### Experiment and Checkpoints 17 | Experiments are used to injest model checkpoints created during a training run. ModelBox is not an experiment metadata tracker so there is no support for rich experiment management which are available on experiment trackers such as Weights and Biases, the experiment abstraction here exists so that we can track and injest model checkpoints which eventually become model versions if they have good metrics and does well in benchmarks. 18 | 19 | ## Example 20 | 21 | ``` 22 | from modelbox import ModelBoxClient, MLFramework 23 | 24 | client = ModelBoxClient(SERVER_ADDR) 25 | 26 | model = self._client.create_model( 27 | "yolo", 28 | "owner@email.com", 29 | "ai/vision/", 30 | "object_detection", 31 | "yolo_des", 32 | {"meta": "foo"}, 33 | ) 34 | model_version = self._client.create_model_version( 35 | model.id, 36 | "yolo4_v1", 37 | "v1", 38 | "A Yolo v4 trained with custom dataset", 39 | ["s3://path/to/bucket/model.pt], 40 | {"model_hyperparam_1": "value"}, 41 | MLFramework.PYTORCH, 42 | ) 43 | 44 | client.close() 45 | ``` 46 | 47 | 48 | ## Local Development and Installation 49 | The modelbox client library can be installed locally in the following way - 50 | ``` 51 | cd /client-py/ 52 | pip install . 53 | ``` 54 | This installs the version of the client checked out with the repo. 55 | 56 | Build the client and create distribution packages 57 | ``` 58 | cd /client-py/ 59 | python -m build . 60 | ``` 61 | 62 | Run Tests 63 | ``` 64 | cd /client-py/ 65 | python tests/test_modelbox_api.py 66 | ``` -------------------------------------------------------------------------------- /website/docs/guides/concepts.md: -------------------------------------------------------------------------------- 1 | # Concepts and Entities 2 | 3 | ![Concept](img/API_Concepts.png) 4 | 5 | ## Namespace 6 | 7 | A Namespace is a mechanism to organize related models or models published by a team. They are also used for access control to the metadata of uploaded models, invoking benchmarks, or other model transformation work. Namespaces are automatically created when a new model or experiment specifies a new namespace. 8 | 9 | ## Model 10 | 11 | A model is an object to track standard metadata and apply policies on models created by experiments to solve a machine learning task. For example - datasets to evaluate and train the model can be tracked. Users can also add rules around retention policies of trained models and set up policies for labeling a trained model if it has good metrics on a dataset and meets all other production criteria. 12 | 13 | ## Model Version 14 | 15 | A model version is a trained model, it includes the model binary, related files that a user wants to track - dataset file handles, any other metadata, model metrics, etc. Model versions are always related to a Model, and all the policies created for a Model are applied to Model Versions. 16 | 17 | ## Experiment and Checkpoints 18 | 19 | Experiments are the abstraction to store metadata and events from training jobs that produce trained models. Checkpoints from experiments are automatically ingested and can be a means to get fault-tolerant training or set up automatic conversions to models if the metrics are good. Some examples of metadata logged from an experiment are hyperparameters, structure and shape of the models, training accuracy, loss and other related metrics, hardware metrics of the trainer nodes, checkpoint binaries, and even training code with dependencies, etc. 20 | 21 | ## Metrics 22 | 23 | Metrics can be logged for experiments and models. Metrics are key, value pairs, the value being a series of float, tensor(serialized as strings), or even bytes that are logged over time. Every metric log can have a step and wallclock attribute associated with them, which makes them useful in tracking things like accuracy during training or hardware metrics. Model Metrics can be expressed as simple key/value pairs. 24 | 25 | ## Artifacts 26 | 27 | Artifacts such as datasets and trained models or checkpoints can be either uploaded to ModelBox, or if they are stored externally, they can be tracked as metadata attached to experiments and model objects. 28 | 29 | ## Events 30 | 31 | Events are generated by external systems running the training experiments, inference engines consuming the Models, or even other ML Operations services consuming the models or metadata to benchmark or deploy a model. Events are useful for debugging or operability of models or training platforms. 32 | 33 | For example, if events are logged at the start of an epoch, before and after writing a checkpoint, looking at the timestamps allows an engineer to understand which operation is taking too much time if training slows down. 34 | 35 | ## Audit Events 36 | 37 | Audit events are automatically generated by ModelBox when metadata about an experiment or model is updated, created, or destroyed. Events are also logged automatically when artifacts are uploaded or downloaded. They are useful in production systems to know when and where models are consumed, when new models are created by experiments, etc. 38 | -------------------------------------------------------------------------------- /website/docs/install.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 2 3 | --- 4 | 5 | # Install and Operation 6 | [![Open in Gitpod](https://gitpod.io/button/open-in-gitpod.svg)](https://gitpod.io/#https://github.com/tensorland/modelbox) 7 | 8 | ModelBox can be installed and run in several models depending on the use case. Here we review a few of those use cases and discuss possible ways to run and operate the service. 9 | 10 | 11 | ## Service Components 12 | 13 | The service consists of the following components - 14 | 15 | 1. ModelBox Server - The central control plane of ModelBox stores metadata related to experiments and models. 16 | 2. Metadata Storage Backend - Storage service for experiment and model metadata. 17 | 3. Blob Server - The server can be optionally run in the blob serving mode, where it only offers APIs to download and upload artifacts. 18 | 4. Metrics Backend - Storage service for time series data of experiments and models. 19 | 20 | 21 | ## Evaluating ModelBox Locally 22 | 23 | The best way to evaluate ModelBox is to run it locally using ephemeral storage. This mode allows users to train new models and learn how to log, read and compare metadata using the SDK without thinking about deploying in a cluster. 24 | 25 | #### Configuration 26 | 27 | Generate the default config for ModelBox server. The CLI has a command to generate the default config. It generates configuration to run the server with ephemeral storage. 28 | 29 | 30 | ``` 31 | $ modelbox server init-config 32 | ``` 33 | 34 | 35 | The config generated should be the following in a file called modelbox_server.toml - 36 | 37 | 38 | ``` 39 | artifact_storage = "filesystem" 40 | metadata_storage = "ephemeral" 41 | metrics_storage = "inmemory" 42 | listen_addr = ":8085" 43 | 44 | [artifact_storage_filesystem] 45 | base_dir = "/tmp/modelboxblobs" 46 | 47 | [artifact_storage_s3] 48 | region = "us-east-1" 49 | bucket = "modelbox-artifacts" 50 | 51 | [metadata_storage_integrated] 52 | path = "/tmp/modelbox.dat" 53 | 54 | [metadata_storage_postgres] 55 | host = "172.17.0.3" 56 | port = 5432 57 | username = "postgres" 58 | password = "foo" 59 | dbname = "modelbox" 60 | 61 | [metadata_storage_mysql] 62 | host = "172.17.0.2" 63 | port = 3306 64 | username = "root" 65 | password = "foo" 66 | dbname = "modelbox" 67 | 68 | [metrics_storage_timescaledb] 69 | host = "172.17.0.4" 70 | port = 5432 71 | username = "postgres" 72 | password = "foo" 73 | dbname = "modelbox_metrics" 74 | ``` 75 | 76 | #### Start the Server 77 | 78 | ``` 79 | $ modelbox server start --config-path ./path/to/modelbox_server.toml 80 | ``` 81 | 82 | That's it! Once the server is started, the ModelBox SDK or CLI can be used to interact with the service. 83 | 84 | ## Thoughts on Production Deployment Scenarios 85 | 86 | In production, it is expected that HA data storage services are used for metadata and metrics storage. The ModelBox server should also run in a HA mode by running multiple instances of the server in a cluster. The service metrics should be monitored, and the appropriate number of instances of services should be chosen to keep the API latency and resource usage of the server to reasonable limits. 87 | 88 | ![High Level Architecture](guides/img/ModelBox_HighLevel.png) 89 | 90 | ModelBox supports the following databases, metrics and blob storage services - 91 | 92 | ### Metadata Store 93 | - MySQL 94 | - PostgreSQL 95 | - Ephemeral Storage 96 | 97 | ### Metrics Store 98 | - Timescaledb 99 | - Ephemeral Storage 100 | 101 | ### Artifacts and Blob Storage 102 | - AWS S3 103 | - File System(Ephemeral, NFS) 104 | 105 | ## Build ModelBox 106 | 107 | ModelBox can be built from source or be downloaded from GitHub. 108 | 109 | 110 | #### Building from Source using GoReleaser 111 | 112 | Install goreleaser from here. After it’s installed, the binary can be built. 113 | 114 | 115 | ``` 116 | goreleaser build --rm-dist --snapshot 117 | ``` -------------------------------------------------------------------------------- /website/docs/guides/configuration.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 1 3 | --- 4 | 5 | # Configuring ModelBox Server 6 | ModelBox server reads a toml configuration file to initialize the various service dependencies. A sample configuration is here - 7 | 8 | ``` 9 | artifact_storage = "filesystem" 10 | 11 | metadata_storage = "ephemeral" 12 | 13 | metrics_storage = "inmemory" 14 | 15 | listen_addr = ":8085" 16 | 17 | [artifact_storage_filesystem] 18 | base_dir = "/tmp/modelboxblobs" 19 | 20 | [artifact_storage_s3] 21 | region = "us-east-1" 22 | bucket = "modelbox-artifacts" 23 | 24 | [metadata_storage_integrated] 25 | path = "/tmp/modelbox.dat" 26 | 27 | [metadata_storage_postgres] 28 | host = "172.17.0.3" 29 | port = 5432 30 | username = "postgres" 31 | password = "foo" 32 | dbname = "modelbox" 33 | 34 | [metadata_storage_mysql] 35 | host = "172.17.0.2" 36 | port = 3306 37 | username = "root" 38 | password = "foo" 39 | dbname = "modelbox" 40 | 41 | [metrics_storage_timescaledb] 42 | host = "172.17.0.4" 43 | port = 5432 44 | username = "postgres" 45 | password = "foo" 46 | dbname = "modelbox_metrics" 47 | ``` 48 | 49 | ## Generate Server Configuration 50 | ``` 51 | modelbox server init-config 52 | ``` 53 | 54 | ## Server Parameters 55 | * `artifact_storage` - 56 | - `s3` - Artifacts are stored in AWS S3. `artifact_storage_s3` section is read for S3 specific configuration. 57 | - `filesystem` - Artifacts are stored in filesystem. `artifact_storage_filesystem` section in read for file-system specific configuration. 58 | 59 | * `metadata_storage` - Backend to use for storing experiment and model metadata. 60 | - `mysql` - `metadata_storage_mysql` is read to configure mysql. 61 | - `postgres` - `metadata_storage_postgres` is read to configure postgres. 62 | - `ephemeral` - `metadata_storage_ephemeral1 is read to configure filesystem based storage. 63 | 64 | * `metrics_storage` - Backend to use for metrics storage. Possible options - 65 | - `inmemory` - Metrics are stored in memory. No further configuration is required. 66 | - `timescaledb` - Metrics are stored in timescaledb. `metrics_storage_timescaledb` section is read to configure timescaledb. 67 | 68 | * `listen_addr` - Network interfaces and ports on which the modelbox server binds to. 69 | 70 | * `artifact_storage_filesystem` - Configuration to store artifacts in the filesystem. 71 | - `base_dir` - Base directory in the filesystem where artifacts are stored. 72 | 73 | * `artifact_storage_s3` - Configuration to store artifacts in S3 74 | - `region` - Region of the bucket 75 | - `bucket` - Bucket to store the artifacts. 76 | 77 | * `metadata_storage_integrated` - Configuration to store server metadata in filesystem 78 | - `path` - Path of the file where boltdb stores data. 79 | 80 | * `metadata_storage_postgres` - Configuration of Postgres database for storing server metadata 81 | - `host` - host or dns address of the Postgres service 82 | - `port` - Port on which the database is listening 83 | - `username` - username of the database. 84 | - `password` - password of the database. 85 | - `dbname` - name of the database. 86 | 87 | * `metadata_storage_mysql` - Configuration of MySQL database for storing server metadata 88 | - `host` - host or dns address of the MySQL service 89 | - `port` - port on which the database is listening 90 | - `username` - service username to access the database. 91 | - `password` - password to access the database. 92 | - `dbname` - name of the database. 93 | 94 | * `metrics_storage_timescaledb` - Configuration for TimescaleDB. 95 | - `host` - host or dns address of the Postgres service 96 | - `port` - port on which the database is listening 97 | - `username` - service username to access the database. 98 | - `password` - password to access the database. 99 | - `dbname` - name of the database. 100 | 101 | # Configuring ModelBox Client 102 | ModelBox Client binary requires a config to interact with the API endpoint. An example client configuration - 103 | 104 | ``` 105 | server_addr = ":8085" 106 | ``` 107 | 108 | ## Generate Client Configuration 109 | ``` 110 | modelbox client init-config 111 | ``` 112 | 113 | ## Client Parameters 114 | * `server_addr` : Address of the server API endpoint -------------------------------------------------------------------------------- /src/server_config.rs: -------------------------------------------------------------------------------- 1 | use serde::{Deserialize, Serialize}; 2 | use std::fs; 3 | use std::path::PathBuf; 4 | 5 | use thiserror::Error; 6 | 7 | #[derive(Error, Debug)] 8 | pub enum ConfigParsingError { 9 | #[error("unable to read config file")] 10 | IoError(#[from] std::io::Error), 11 | 12 | #[error("unable to de-serialize yaml")] 13 | DeserializationError { 14 | #[from] 15 | source: serde_yaml::Error, 16 | }, 17 | } 18 | 19 | #[derive(Debug, PartialEq, Serialize, Deserialize)] 20 | pub enum ObjectStoreProvider { 21 | S3, 22 | Gcs, 23 | FileSystem, 24 | } 25 | 26 | #[derive(Debug, PartialEq, Serialize, Deserialize)] 27 | pub struct ObjectStoreConfig { 28 | pub bucket: String, 29 | pub provider: ObjectStoreProvider, 30 | } 31 | 32 | #[derive(Debug, PartialEq, Serialize, Deserialize)] 33 | pub struct ServerConfig { 34 | pub grpc_listen_addr: String, 35 | pub database_host: String, 36 | pub database_name: String, 37 | pub database_username: String, 38 | pub database_password: String, 39 | pub object_store: ObjectStoreConfig, 40 | } 41 | 42 | impl ServerConfig { 43 | pub fn database_url(&self) -> String { 44 | format!( 45 | "postgres://{}/{}?user={}&password={}", 46 | &self.database_host, 47 | &self.database_name, 48 | &self.database_username, 49 | &self.database_password 50 | ) 51 | } 52 | } 53 | 54 | impl Default for ServerConfig { 55 | fn default() -> Self { 56 | Self { 57 | grpc_listen_addr: "127.0.0.1:8085".into(), 58 | database_host: "localhost:5432".into(), 59 | database_name: "tensorland".into(), 60 | database_username: "postgres".into(), 61 | database_password: "foo".into(), 62 | object_store: ObjectStoreConfig { 63 | bucket: "/tmp/modelbox/".into(), 64 | provider: ObjectStoreProvider::FileSystem, 65 | }, 66 | } 67 | } 68 | } 69 | 70 | impl ServerConfig { 71 | pub fn from_path(path: PathBuf) -> Result { 72 | let yaml = fs::read_to_string(path.as_path())?; 73 | ServerConfig::from_str(yaml) 74 | } 75 | 76 | fn from_str(content: String) -> Result { 77 | let config: Result = serde_yaml::from_str(&content) 78 | .map_err(|e| ConfigParsingError::DeserializationError { source: e }); 79 | config 80 | } 81 | 82 | pub fn generate_config(path: PathBuf) -> Result<(), ConfigParsingError> { 83 | let config = ServerConfig::default(); 84 | let str = serde_yaml::to_string(&config) 85 | .map_err(|e| ConfigParsingError::DeserializationError { source: e })?; 86 | std::fs::write(path.as_path(), str)?; 87 | Ok(()) 88 | } 89 | } 90 | 91 | #[cfg(test)] 92 | mod tests { 93 | use indoc::indoc; 94 | 95 | use super::{ConfigParsingError, ServerConfig}; 96 | 97 | #[test] 98 | fn invalid_config_path() { 99 | let config = ServerConfig::from_path("/invalid/path".into()); 100 | assert!(matches!(config, Err(ConfigParsingError::IoError(_)))); 101 | } 102 | 103 | #[test] 104 | fn valid_yaml() { 105 | let valid_config = indoc! {r#" 106 | --- 107 | grpc_listen_addr: "127.0.0.1:9089" 108 | database_host: "localhost:5234" 109 | database_name: "tensorland" 110 | database_username: "postgres" 111 | database_password: "foo" 112 | object_store: 113 | bucket: "/tmp/modelbox/" 114 | provider: FileSystem 115 | "#}; 116 | let config = ServerConfig::from_str(valid_config.into()).unwrap(); 117 | assert_eq!( 118 | config.database_url(), 119 | "postgres://localhost:5234/tensorland?user=postgres&password=foo" 120 | ); 121 | assert_eq!(config.grpc_listen_addr, "127.0.0.1:9089"); 122 | assert!(matches!( 123 | config.object_store.provider, 124 | super::ObjectStoreProvider::FileSystem 125 | )); 126 | assert_eq!(config.object_store.bucket, "/tmp/modelbox/") 127 | } 128 | 129 | #[test] 130 | fn invalid_yaml() {} 131 | } 132 | -------------------------------------------------------------------------------- /website/docusaurus.config.js: -------------------------------------------------------------------------------- 1 | // @ts-check 2 | // Note: type annotations allow type checking and IDEs autocompletion 3 | 4 | const lightCodeTheme = require('prism-react-renderer/themes/github'); 5 | const darkCodeTheme = require('prism-react-renderer/themes/dracula'); 6 | 7 | /** @type {import('@docusaurus/types').Config} */ 8 | const config = { 9 | title: 'ModelBox', 10 | tagline: 'An extensible AI Metadata and Model Operations Service', 11 | url: 'https://modelbox.io', 12 | baseUrl: '/', 13 | onBrokenLinks: 'throw', 14 | onBrokenMarkdownLinks: 'warn', 15 | favicon: 'img/favicon.ico', 16 | 17 | // GitHub pages deployment config. 18 | // If you aren't using GitHub pages, you don't need these. 19 | organizationName: 'tensorland', // Usually your GitHub org/user name. 20 | projectName: 'modelbox', // Usually your repo name. 21 | 22 | // Even if you don't use internalization, you can use this field to set useful 23 | // metadata like html lang. For example, if your site is Chinese, you may want 24 | // to replace "en" with "zh-Hans". 25 | i18n: { 26 | defaultLocale: 'en', 27 | locales: ['en'], 28 | }, 29 | 30 | presets: [ 31 | [ 32 | 'classic', 33 | /** @type {import('@docusaurus/preset-classic').Options} */ 34 | ({ 35 | docs: { 36 | sidebarPath: require.resolve('./sidebars.js'), 37 | // Please change this to your repo. 38 | // Remove this to remove the "edit this page" links. 39 | editUrl: 40 | 'https://github.com/facebook/docusaurus/tree/main/packages/create-docusaurus/templates/shared/', 41 | }, 42 | blog: { 43 | showReadingTime: true, 44 | // Please change this to your repo. 45 | // Remove this to remove the "edit this page" links. 46 | editUrl: 47 | 'https://github.com/facebook/docusaurus/tree/main/packages/create-docusaurus/templates/shared/', 48 | }, 49 | theme: { 50 | customCss: require.resolve('./src/css/custom.css'), 51 | }, 52 | googleAnalytics: { 53 | trackingID: 'G-5FXR0WKMER', 54 | anonymizeIP: true, 55 | }, 56 | }), 57 | ], 58 | ], 59 | 60 | themeConfig: 61 | /** @type {import('@docusaurus/preset-classic').ThemeConfig} */ 62 | ({ 63 | navbar: { 64 | title: 'ModelBox', 65 | logo: { 66 | alt: 'ModelBox Logo', 67 | src: 'img/ModelBox3.png', 68 | }, 69 | items: [ 70 | { 71 | type: 'doc', 72 | docId: 'intro', 73 | position: 'left', 74 | label: 'Documentation', 75 | }, 76 | {to: '/blog', label: 'Blog', position: 'left'}, 77 | { 78 | href: 'https://github.com/tensorland/modelbox', 79 | label: 'GitHub', 80 | position: 'right', 81 | }, 82 | ], 83 | }, 84 | footer: { 85 | style: 'dark', 86 | links: [ 87 | { 88 | title: 'Docs', 89 | items: [ 90 | { 91 | label: 'Tutorial', 92 | to: '/docs/intro', 93 | }, 94 | ], 95 | }, 96 | { 97 | title: 'Community', 98 | items: [ 99 | { 100 | label: 'GitHub Discussions', 101 | href: 'https://github.com/tensorland/modelbox/discussions', 102 | }, 103 | { 104 | label: 'Discord', 105 | href: 'https://discord.gg/mrXrq3DmV8', 106 | }, 107 | { 108 | label: 'Twitter', 109 | href: 'https://twitter.com/modelboxio', 110 | }, 111 | ], 112 | }, 113 | { 114 | title: 'More', 115 | items: [ 116 | { 117 | label: 'Blog', 118 | to: '/blog', 119 | }, 120 | { 121 | label: 'GitHub', 122 | href: 'https://github.com/tensorland/modelbox', 123 | }, 124 | ], 125 | }, 126 | ], 127 | copyright: `Copyright © ${new Date().getFullYear()} ModelBox.`, 128 | }, 129 | prism: { 130 | theme: lightCodeTheme, 131 | darkTheme: darkCodeTheme, 132 | }, 133 | }), 134 | }; 135 | 136 | module.exports = config; 137 | -------------------------------------------------------------------------------- /website/docs/guides/experiments.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 3 3 | --- 4 | 5 | # Logging Experiment Metadata 6 | 7 | Log experiment metadata like hyperparameters, discrete operational events from the trainer, model metrics after every epoch, and even hardware metrics where training is being run. 8 | 9 | 10 | ## Tutorials 11 | Several tutorials go over logging metadata - 12 | 1. [Python SDK Guide](https://github.com/tensorland/modelbox/blob/main/tutorials/Tutorial_Python_SDK.ipynb) 13 | 2. [Pytorch Integration](https://github.com/tensorland/modelbox/blob/main/tutorials/Tutorial_Pytorch.ipynb) 14 | 15 | 16 | ## Python SDK 17 | 18 | #### Create an Experiment 19 | ``` 20 | experiment = mbox.new_experiment( 21 | name="yolo-v4", owner="foo@bar.com", namespace="cv", external_id="ext1", framework=MLFramework.PYTORCH 22 | ) 23 | ``` 24 | 25 | #### Log additional metadata 26 | We can log arbitrary metadata related to the experiment as python dictionaries. 27 | 28 | ``` 29 | experiment.update_metadata(key="hyperparams", value={"fc_layers": 3, "lr": 0.0002}) 30 | // result has the updated_at timestamp 31 | ``` 32 | 33 | #### Log Metrics 34 | Arbitrary metrics can be logged at any point of the experiment lifecycle. The step represents the step in the experiment such as epoch or update step, etc. The wallclock time is the human interpretable time at which the metrics is created, and the value is the metric value. The following types of values are supported - float, strings and bytes. Tensors can be serialized to bytes or strings. 35 | 36 | ``` 37 | experiment.log_metrics(metrics={'loss': 2.4, 'accu': 97.6}, step=10, wallclock=12345) 38 | ``` 39 | 40 | #### Log Events 41 | Events can be logged while training models to make debugging and improve the observability of training workflows. Other MLOps and inference systems can also log events against a model to provide information about how and where a model is being consumed or transformed. 42 | 43 | The following code logs an event about checkpoint store event from a trainer and records the wallclock time and checkpoint size. The events could be read to troubleshoot performance issues of checkpoint write operations. 44 | ``` 45 | experiment.log_event(Event(name="checkpoint_started", source=EventSource(name="trainer"), wallclock_time = 12000 , metadata={"chk_size": 2345})) 46 | torch.save() 47 | experiment.log_event(Event(name="checkpoint_finish", source=EventSource(name="trainer"), wallclock_time = 12500 , metadata={"write_speed": 2000})) 48 | ``` 49 | 50 | #### Upload Artifacts 51 | Use this API to upload any artifact that's useful to reproduce an experiment or troubleshoot any model quality issues. 52 | ``` 53 | upload_artifact(files) 54 | ``` 55 | * files - A list of file paths to be uploaded. 56 | 57 | #### Track Artifacts stored elsewhere 58 | Use this API to track any artifact which is stored elsewhere and is relevant to the experiment. 59 | ``` 60 | track_artifacts(files) 61 | ``` 62 | * files - A list of file paths to be tracked. 63 | 64 | #### Listing Artifacts 65 | Once artifacts are tracked, the list of artifacts can be fetched. This will download the Artifact metadata 66 | ``` 67 | artifacts() -> List[Artifact] 68 | ``` 69 | 70 | Artifact has the following attributes - 71 | ``` 72 | class Artifact: 73 | parent: str 74 | path: str 75 | mime_type: ArtifactMime = ArtifactMime.Unknown 76 | checksum: str = "" 77 | id: str = "" 78 | ``` 79 | 80 | ## gRPC API 81 | The gRPC APIs that are used by the SDKs for logging experiment metadata - 82 | 83 | ``` 84 | // Creates a new experiment 85 | rpc CreateExperiment(CreateExperimentRequest) 86 | returns (CreateExperimentResponse); 87 | 88 | // Persists a set of metadata related to objects 89 | rpc UpdateMetadata(UpdateMetadataRequest) returns (UpdateMetadataResponse); 90 | 91 | // Log Metrics for an experiment, model or checkpoint 92 | rpc LogMetrics(LogMetricsRequest) returns (LogMetricsResponse); 93 | 94 | message CreateExperimentRequest { 95 | string name = 1; 96 | string owner = 2; 97 | string namespace = 3; 98 | MLFramework framework = 4; 99 | string task = 5; 100 | Metadata metadata = 6; 101 | string external_id = 7; 102 | } 103 | 104 | message CreateExperimentResponse { 105 | string experiment_id = 1; 106 | bool experiment_exists = 2; 107 | google.protobuf.Timestamp created_at = 20; 108 | google.protobuf.Timestamp updated_at = 21; 109 | } 110 | 111 | message UpdateMetadataRequest { 112 | string parent_id = 1; 113 | Metadata metadata = 2; 114 | } 115 | 116 | message UpdateMetadataResponse { 117 | int32 num_keys_written = 1; 118 | google.protobuf.Timestamp updated_at = 5; 119 | } 120 | ``` 121 | 122 | -------------------------------------------------------------------------------- /agents/py/agent.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from dataclasses import dataclass 3 | import asyncio 4 | import signal 5 | import logging 6 | import platform 7 | import socket 8 | import sys 9 | from typing import List 10 | 11 | from client import AdminClient 12 | from modelbox import admin_pb2 13 | 14 | 15 | @dataclass 16 | class AgentConfig: 17 | server_addr: str 18 | heartbeat_dur: int 19 | name: str 20 | ip_addr: str 21 | 22 | 23 | @dataclass 24 | class Node: 25 | hostname: str 26 | arch: str 27 | 28 | # TODO Make this configurable by adding some flags and such 29 | logging.basicConfig(stream=sys.stdout, level=logging.INFO) 30 | logger = logging.getLogger(__name__) 31 | 32 | 33 | class ModelBoxAgent: 34 | def __init__(self, config: AgentConfig, worker: str) -> None: 35 | super().__init__() 36 | self._config: AgentConfig = config 37 | self._client: AdminClient = AdminClient(self._config.server_addr) 38 | self._worker = worker 39 | self._node: Node = Node(hostname=platform.node(), arch=platform.machine()) 40 | self._server_node_id: str = None 41 | 42 | async def register_node(self): 43 | logger.info(f"registering node") 44 | advertise_addr = ( 45 | self._get_default_addr() 46 | if self._config.ip_addr is None 47 | else self._config.ip_addr 48 | ) 49 | node_info = admin_pb2.NodeInfo( 50 | host_name=self._node.hostname, ip_addr=advertise_addr, arch=self._node.arch 51 | ) 52 | while True: 53 | try: 54 | resp = self._client.register_agent( 55 | node_info=node_info, name=self._config.name 56 | ) 57 | self._server_node_id = resp.node_id 58 | logger.info(f"registered node, server node id:{self._server_node_id}") 59 | break 60 | except Exception as ex: 61 | logger.error( 62 | f"unable to register agent with server {ex}. Trying again in {self._config.heartbeat_dur}" 63 | ) 64 | await asyncio.sleep(self._config.heartbeat_dur) 65 | continue 66 | 67 | async def heartbeat(self): 68 | logger.info(f"starting to heartbeat sever {self._config.heartbeat_dur}s") 69 | while True: 70 | try: 71 | logger.info("heartbeat....") 72 | response = self._client.heartbeat(node_id=self._server_node_id) 73 | except Exception as ex: 74 | logger.error(f"couldn't register heartbeat {ex}") 75 | await asyncio.sleep(self._config.heartbeat_dur) 76 | 77 | async def poll_for_work(self): 78 | logger.info(f"polling for work every {self._config.heartbeat_dur}") 79 | while True: 80 | try: 81 | logger.info("work poll....") 82 | response: admin_pb2.GetRunnableActionInstancesResponse = self._client.get_runnable_actions(self._worker, self._node.arch) 83 | logger.info(f"respone {response}") 84 | except Exception as ex: 85 | logger.error(f"unable to get work {ex}") 86 | await asyncio.sleep(self._config.heartbeat_dur) 87 | pass 88 | 89 | async def agent_runner(self): 90 | try: 91 | # Register Node 92 | await self.register_node() 93 | 94 | # Start the heartbeat and poll for work concurrently 95 | await asyncio.gather(self.heartbeat(), self.poll_for_work()) 96 | except asyncio.CancelledError: 97 | logger.info("exiting agent") 98 | 99 | def _get_default_addr(self) -> str: 100 | # TODO This is really hacky. We should use https://pypi.org/project/netifaces/ 101 | # to probe for interfaces and pick up a reasonable address as default 102 | s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 103 | s.connect(("8.8.8.8", 80)) 104 | return s.getsockname()[0] 105 | 106 | 107 | if __name__ == "__main__": 108 | parser = argparse.ArgumentParser( 109 | prog="modelbox-agent", description="modelbox agent" 110 | ) 111 | parser.add_argument( 112 | "--server_addr", default="localhost:8081", help="address of the admin api" 113 | ) 114 | parser.add_argument("--heartbeat_dur", default=5, help="heart beat duration") 115 | parser.add_argument( 116 | "--worker", help="list of workers(separated by space)" 117 | ) 118 | parser.add_argument("--name", default="default-agent", help="agent name") 119 | parser.add_argument( 120 | "--agent_ip_addr", default=None, help="advertise ip addr of the host" 121 | ) 122 | args = parser.parse_args() 123 | 124 | agent = ModelBoxAgent( 125 | config=AgentConfig( 126 | args.server_addr, 127 | args.heartbeat_dur, 128 | name=args.name, 129 | ip_addr=args.agent_ip_addr, 130 | ), 131 | worker=args.worker, 132 | ) 133 | loop = asyncio.get_event_loop() 134 | main_task = asyncio.ensure_future(agent.agent_runner()) 135 | for sig in [signal.SIGTERM, signal.SIGINT]: 136 | loop.add_signal_handler(sig, main_task.cancel) 137 | 138 | try: 139 | loop.run_until_complete(main_task) 140 | finally: 141 | loop.close() 142 | -------------------------------------------------------------------------------- /python-rpc-client/src/mock.rs: -------------------------------------------------------------------------------- 1 | use tokio_stream::wrappers::ReceiverStream; 2 | use tonic::Status; 3 | 4 | use super::modelbox::model_store_server::ModelStore; 5 | use super::modelbox::{ 6 | CreateExperimentRequest, CreateExperimentResponse, CreateModelRequest, CreateModelResponse, 7 | CreateModelVersionRequest, CreateModelVersionResponse, DownloadFileRequest, 8 | DownloadFileResponse, GetExperimentRequest, GetExperimentResponse, GetMetricsRequest, 9 | GetMetricsResponse, ListArtifactsRequest, ListArtifactsResponse, ListEventsRequest, 10 | ListEventsResponse, ListExperimentsRequest, ListExperimentsResponse, ListMetadataRequest, 11 | ListMetadataResponse, ListModelVersionsRequest, ListModelVersionsResponse, ListModelsRequest, 12 | ListModelsResponse, LogEventRequest, LogEventResponse, LogMetricsRequest, LogMetricsResponse, 13 | TrackArtifactsRequest, TrackArtifactsResponse, UpdateMetadataRequest, UpdateMetadataResponse, 14 | UploadFileRequest, UploadFileResponse, WatchNamespaceRequest, WatchNamespaceResponse, 15 | }; 16 | 17 | #[derive(Default)] 18 | pub struct MockModelStoreServer {} 19 | 20 | #[tonic::async_trait] 21 | impl ModelStore for MockModelStoreServer { 22 | async fn create_model( 23 | &self, 24 | _request: tonic::Request, 25 | ) -> Result, tonic::Status> { 26 | Ok(tonic::Response::new(CreateModelResponse::default())) 27 | } 28 | 29 | async fn list_models( 30 | &self, 31 | _request: tonic::Request, 32 | ) -> Result, tonic::Status> { 33 | unimplemented!() 34 | } 35 | async fn create_model_version( 36 | &self, 37 | _request: tonic::Request, 38 | ) -> Result, tonic::Status> { 39 | unimplemented!() 40 | } 41 | async fn list_model_versions( 42 | &self, 43 | _request: tonic::Request, 44 | ) -> Result, tonic::Status> { 45 | unimplemented!() 46 | } 47 | async fn create_experiment( 48 | &self, 49 | _request: tonic::Request, 50 | ) -> Result, tonic::Status> { 51 | unimplemented!() 52 | } 53 | async fn list_experiments( 54 | &self, 55 | _request: tonic::Request, 56 | ) -> Result, tonic::Status> { 57 | unimplemented!() 58 | } 59 | async fn get_experiment( 60 | &self, 61 | _request: tonic::Request, 62 | ) -> Result, tonic::Status> { 63 | unimplemented!() 64 | } 65 | async fn upload_file( 66 | &self, 67 | _request: tonic::Request>, 68 | ) -> Result, tonic::Status> { 69 | unimplemented!(); 70 | } 71 | 72 | type DownloadFileStream = ReceiverStream>; 73 | 74 | async fn download_file( 75 | &self, 76 | _request: tonic::Request, 77 | ) -> Result, tonic::Status> { 78 | unimplemented!(); 79 | } 80 | 81 | async fn update_metadata( 82 | &self, 83 | _request: tonic::Request, 84 | ) -> Result, tonic::Status> { 85 | unimplemented!() 86 | } 87 | 88 | async fn list_metadata( 89 | &self, 90 | _request: tonic::Request, 91 | ) -> Result, tonic::Status> { 92 | unimplemented!() 93 | } 94 | 95 | async fn track_artifacts( 96 | &self, 97 | _request: tonic::Request, 98 | ) -> Result, tonic::Status> { 99 | unimplemented!() 100 | } 101 | 102 | async fn list_artifacts( 103 | &self, 104 | _request: tonic::Request, 105 | ) -> Result, tonic::Status> { 106 | unimplemented!() 107 | } 108 | 109 | async fn log_metrics( 110 | &self, 111 | _request: tonic::Request, 112 | ) -> Result, tonic::Status> { 113 | unimplemented!() 114 | } 115 | 116 | async fn get_metrics( 117 | &self, 118 | _request: tonic::Request, 119 | ) -> Result, tonic::Status> { 120 | unimplemented!() 121 | } 122 | 123 | async fn log_event( 124 | &self, 125 | _request: tonic::Request, 126 | ) -> Result, tonic::Status> { 127 | unimplemented!() 128 | } 129 | 130 | async fn list_events( 131 | &self, 132 | _request: tonic::Request, 133 | ) -> Result, tonic::Status> { 134 | unimplemented!() 135 | } 136 | 137 | type WatchNamespaceStream = ReceiverStream>; 138 | async fn watch_namespace( 139 | &self, 140 | _request: tonic::Request, 141 | ) -> Result, tonic::Status> { 142 | unimplemented!() 143 | } 144 | } 145 | -------------------------------------------------------------------------------- /website/docs/guides/models.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 5 3 | --- 4 | 5 | 6 | # Creating Model and Model Versions 7 | 8 | ## Terminologies 9 | 10 | * Model - Model objects are used to hold common metadata across all versions of models trained to solve a particular Machine Learning task. For example, Language ID models which are trained must be evaluated against a particular test dataset that an organization cares about. This allows for building some common knowledge about the characteristics of the problem domain when evaluating new models. 11 | 12 | * ModelVersion - ModelVersions are trained models coming out of experiments. They usually have metrics related to accuracy and performance attached to them which helps in understanding the expected behavior when applications consume them. ModelVersions can be stored in external storage systems and their location can be tracked, or they can be uploaded in ModelBox directly and stored in the configured storage backend. 13 | 14 | ## Model - Python API 15 | 16 | ### Create Model 17 | ``` 18 | model = client.new_model(name, owner, namespace, task, description, artifacts, metadata) 19 | ``` 20 | 21 | * name - Name of the model 22 | * owner - Owner of the model 23 | * namespace - Namespace to which a model is attached. 24 | * task - Task that versions of this model. Ex - English ASR, Language ID, etc. 25 | * description - A brief description of the model. 26 | * artifacts - List of artifacts to track. This doesn't upload the artifacts. Use the upload_artifacts API to upload artifacts related to the model. 27 | * metadata - Arbitrary key/value metadata related to the model 28 | 29 | 30 | ## ModelVersion - Python API 31 | 32 | ### Create Model Version 33 | 34 | ``` 35 | model_version = model.new_model_version(version, name, description, artifacts, metadata, unique_tags, framework) 36 | ``` 37 | * version - Version of the model 38 | * name - Name of the model version. If this is omitted the name of the model is used. 39 | * description - Description of the model version. 40 | * artifacts - Artifacts tracked with the model version. 41 | * metadata - Additional key/value associated with the version. 42 | * unique_tags - Tags to identify the model version. These are unique across all the versions of a given model. They are useful in denoting something unique about a model such as the version deployed in production. 43 | * framework - Framework used to build the model. 44 | 45 | ## APIs to log metrics, metadata, track and upload Artifacts 46 | 47 | The following APIs are common across models and model versions. Once a model or model version object is created the following APIs are available on those objects. 48 | 49 | ### Log Metrics 50 | ``` 51 | log_metrics(metrics, step, wallclock) 52 | ``` 53 | * metrics - A dictionary of metrics keys and values. The values could be either a float, string or bytes. 54 | * step - The step in the training lifecycle when the metric was emitted. For example the epoch number of a training loop. 55 | * wallclock - the wallclock time when the metric was logged. In the absence of the step value, metrics are ordered by wallclock. 56 | 57 | ### Log Metadata 58 | ``` 59 | update_metadata(key, value) 60 | ``` 61 | 62 | * key - key to identifying the metadata 63 | * value - Any arbitrary python value that can be JSON encoded. 64 | 65 | ### Upload Artifacts 66 | ``` 67 | upload_artifact(files) 68 | ``` 69 | * files - A list of file paths to be uploaded. 70 | 71 | ### Track Artifacts stored elsewhere 72 | ``` 73 | track_artifacts(files) 74 | ``` 75 | * files - A list of file paths to be tracked. 76 | 77 | ### Listing Artifacts 78 | Once artifacts are tracked, the list of artifacts can be fetched. This will download the Artifact metadata 79 | ``` 80 | artifacts() -> List[Artifact] 81 | ``` 82 | 83 | Artifact has the following attributes - 84 | ``` 85 | class Artifact: 86 | parent: str 87 | path: str 88 | mime_type: ArtifactMime = ArtifactMime.Unknown 89 | checksum: str = "" 90 | id: str = "" 91 | ``` 92 | 93 | ## gRPC API 94 | 95 | The following gRPC APIs allow creating model and model versions 96 | 97 | ``` 98 | // Create a new Model under a namespace. If no namespace is specified, models 99 | // are created under a default namespace. 100 | rpc CreateModel(CreateModelRequest) returns (CreateModelResponse); 101 | 102 | // List Models uploaded for a namespace 103 | rpc ListModels(ListModelsRequest) returns (ListModelsResponse); 104 | 105 | // Creates a new model version for a model 106 | rpc CreateModelVersion(CreateModelVersionRequest) 107 | returns (CreateModelVersionResponse); 108 | 109 | // Lists model versions for a model. 110 | rpc ListModelVersions(ListModelVersionsRequest) 111 | returns (ListModelVersionsResponse); 112 | 113 | 114 | // UploadFile streams a files to ModelBox and stores the binaries to the condfigured storage 115 | rpc UploadFile(stream UploadFileRequest) returns (UploadFileResponse); 116 | 117 | message UploadFileRequest { 118 | oneof stream_frame { 119 | FileMetadata metadata = 1; 120 | bytes chunks = 2; 121 | } 122 | } 123 | 124 | message FileMetadata { 125 | string id = 1; 126 | 127 | // The ID of the checkpoint, experiment, model to which this file belongs to 128 | string parent_id = 2; 129 | 130 | // MIMEType of the file 131 | FileType file_type = 3; 132 | 133 | // checksum of the file 134 | string checksum = 4; 135 | 136 | // path of the file 137 | string path = 5; 138 | 139 | google.protobuf.Timestamp created_at = 20; 140 | google.protobuf.Timestamp updated_at = 21; 141 | } 142 | 143 | enum FileType { 144 | UNDEFINED = 0; 145 | MODEL = 1; 146 | CHECKPOINT = 2; 147 | TEXT = 3; 148 | IMAGE = 4; 149 | AUDIO = 5; 150 | VIDEO = 6; 151 | } 152 | ``` 153 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | diptanuc@gmail.com. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.0, available at 119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | [homepage]: https://www.contributor-covenant.org 125 | 126 | For answers to common questions about this code of conduct, see the FAQ at 127 | https://www.contributor-covenant.org/faq. Translations are available at 128 | https://www.contributor-covenant.org/translations. 129 | -------------------------------------------------------------------------------- /sdk-py/modelbox/lightning_logger.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import time 4 | from re import A 5 | from argparse import Namespace 6 | from typing import Optional, Mapping, Callable, Sequence, Union, Any, Dict 7 | from unicodedata import name 8 | 9 | from pytorch_lightning.loggers.base import LightningLoggerBase, rank_zero_experiment 10 | from pytorch_lightning.utilities.distributed import rank_zero_only 11 | 12 | from weakref import ReferenceType 13 | from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint 14 | 15 | from modelbox.modelbox import ModelBox, Experiment, MLFramework 16 | 17 | logger = logging.getLogger("pytorch_lightning") 18 | 19 | SERVER_ADDR = "localhost:8085" 20 | 21 | 22 | class ModelBoxLogger(LightningLoggerBase): 23 | def __init__( 24 | self, 25 | namespace: str, 26 | experiment_name: str, 27 | owner: str, 28 | external_id: str = "", 29 | server_addr: str = SERVER_ADDR, 30 | upload_checkpoints: bool = False, 31 | agg_key_funcs: Optional[ 32 | Mapping[str, Callable[[Sequence[float]], float]] 33 | ] = None, 34 | agg_default_func: Optional[Callable[[Sequence[float]], float]] = None, 35 | ): 36 | self._namepsace = namespace 37 | self._experiment_name = experiment_name 38 | self._owner = owner 39 | self._external_id = external_id 40 | super().__init__(agg_key_funcs=agg_key_funcs, agg_default_func=agg_default_func) 41 | 42 | self._experiment = None 43 | 44 | # Create the MBox client 45 | self._mbox = ModelBox(server_addr) 46 | self._experiment = None 47 | self._upload_checkpoints = upload_checkpoints 48 | self._checkpoint_paths = set() 49 | 50 | self._current_step = 0 51 | self._epoch = 0 52 | 53 | @property 54 | def name(self): 55 | if self._experiment is None: 56 | self._experiment = self.experiment 57 | return self._experiment_name 58 | 59 | @property 60 | def version(self): 61 | # Return the experiment version, int or str. 62 | return "0.1" 63 | 64 | @rank_zero_only 65 | def log_hyperparams(self, params): 66 | # params is an argparse.Namespace 67 | # your code to record hyperparameters goes here 68 | pass 69 | 70 | @property 71 | @rank_zero_experiment 72 | def experiment(self) -> Experiment: 73 | logger.info("modelbox - attempting to create a project") 74 | if self._experiment is None: 75 | self._experiment = self._mbox.new_experiment( 76 | name=self._experiment_name, 77 | owner=self._owner, 78 | namespace=self._namepsace, 79 | external_id=self._external_id, 80 | framework=MLFramework.PYTORCH, 81 | ) 82 | logger.info( 83 | "modelbox - created experiment with id: {}".format(self._experiment.id) 84 | ) 85 | return self._experiment 86 | 87 | @rank_zero_only 88 | def log_metrics(self, metrics, step): 89 | self._current_step = step 90 | self._epoch = metrics.pop('epoch') 91 | if self._experiment is None: 92 | return 93 | m = {} 94 | for k, v in metrics.items(): 95 | m[k] = v 96 | self._experiment.log_metrics(metrics=m, step=step) 97 | logger.info( 98 | "modelbox - log metrics, step: {}, metrics: {}".format(step, m) 99 | ) 100 | 101 | @rank_zero_only 102 | def log_hyperparams(self, params: Union[Dict[str, Any], Namespace], metrics: Optional[Dict[str, Any]] = None) -> None: 103 | self._experiment.update_metadata(key="hyperparams", value=params) 104 | logger.info(f"modelbox - log hpraams params {params}") 105 | logger.info(f"modelbox - log hpraams metrics {metrics}") 106 | 107 | 108 | @rank_zero_only 109 | def after_save_checkpoint( 110 | self, checkpoint_callback: "ReferenceType[ModelCheckpoint]" 111 | ) -> None: 112 | # Finding out paths of new checkpoints and recording them 113 | file_names = set() 114 | chk_state = checkpoint_callback.state_dict()["best_k_models"] 115 | for best_k_path in chk_state.keys(): 116 | file_names.add(best_k_path) 117 | new_chk_paths = file_names - self._checkpoint_paths 118 | for chk_path in new_chk_paths: 119 | logger.info("modelbox - recording checkpoint {}".format(chk_path)) 120 | chk_id = self._experiment.track_artifacts(files=[chk_path]) 121 | logger.info("modelbox - recorded checkpoint {}".format(chk_id)) 122 | 123 | # Updating the state with all the checkpoints we have just discovered 124 | self._checkpoint_paths = file_names 125 | 126 | @rank_zero_only 127 | def save(self): 128 | # Optional. Any code necessary to save logger data goes here 129 | pass 130 | 131 | @rank_zero_only 132 | def finalize(self, status): 133 | # Optional. Any code that needs to be run after training 134 | # finishes goes here 135 | pass 136 | 137 | @staticmethod 138 | def _get_full_model_name( 139 | model_path: str, checkpoint_callback: "ReferenceType[ModelCheckpoint]" 140 | ) -> str: 141 | """Returns model name which is string `model_path` appended to `checkpoint_callback.dirpath`.""" 142 | expected_model_path = f"{checkpoint_callback.dirpath}{os.path.sep}" 143 | if not model_path.startswith(expected_model_path): 144 | raise ValueError( 145 | f"{model_path} was expected to start with {expected_model_path}." 146 | ) 147 | # Remove extension from filepath 148 | filepath, _ = os.path.splitext(model_path[len(expected_model_path) :]) 149 | 150 | return filepath 151 | -------------------------------------------------------------------------------- /python-rpc-client/src/api_structs.rs: -------------------------------------------------------------------------------- 1 | use pyo3::prelude::*; 2 | use std::collections::HashMap; 3 | 4 | use super::modelbox; 5 | 6 | #[pyclass] 7 | #[derive(Default, Clone)] 8 | pub struct CreateExperiment { 9 | #[pyo3(get, set)] 10 | pub name: String, 11 | 12 | #[pyo3(get, set)] 13 | pub namespace: String, 14 | 15 | #[pyo3(get, set)] 16 | pub owner: String, 17 | 18 | #[pyo3(get, set)] 19 | pub ml_framework: i32, 20 | 21 | #[pyo3(get, set)] 22 | pub external_id: String, 23 | 24 | #[pyo3(get, set)] 25 | pub task: String, 26 | } 27 | 28 | #[pymethods] 29 | impl CreateExperiment { 30 | #[new] 31 | pub fn new( 32 | name: String, 33 | namespace: String, 34 | owner: String, 35 | framework: i32, 36 | external_id: String, 37 | task: String, 38 | ) -> Self { 39 | Self { 40 | name: name, 41 | namespace: namespace, 42 | owner: owner, 43 | ml_framework: framework, 44 | external_id: external_id, 45 | task: task, 46 | } 47 | } 48 | } 49 | 50 | #[pyclass] 51 | #[derive(Default, Clone)] 52 | pub struct CreateModel { 53 | #[pyo3(get, set)] 54 | pub name: String, 55 | 56 | #[pyo3(get, set)] 57 | pub namespace: String, 58 | 59 | #[pyo3(get, set)] 60 | pub owner: String, 61 | 62 | #[pyo3(get, set)] 63 | pub description: String, 64 | 65 | #[pyo3(get, set)] 66 | pub task: String, 67 | } 68 | 69 | #[pymethods] 70 | impl CreateModel { 71 | #[new] 72 | pub fn new( 73 | name: String, 74 | namespace: String, 75 | owner: String, 76 | description: String, 77 | task: String, 78 | ) -> Self { 79 | Self { 80 | name: name, 81 | namespace: namespace, 82 | owner: owner, 83 | description: description, 84 | task: task, 85 | } 86 | } 87 | } 88 | 89 | #[pyclass] 90 | #[derive(Default, Clone)] 91 | pub struct CreateModelResult { 92 | #[pyo3(get)] 93 | pub id: String, 94 | } 95 | 96 | #[pyclass] 97 | #[derive(Default, Clone)] 98 | pub struct CreateModelVersion { 99 | #[pyo3(get, set)] 100 | pub name: String, 101 | 102 | #[pyo3(get, set)] 103 | pub model_id: String, 104 | 105 | #[pyo3(get, set)] 106 | pub namespace: String, 107 | 108 | #[pyo3(get, set)] 109 | pub owner: String, 110 | 111 | #[pyo3(get, set)] 112 | pub description: String, 113 | 114 | #[pyo3(get, set)] 115 | pub ml_framework: MLFramework, 116 | 117 | #[pyo3(get, set)] 118 | pub version: String, 119 | 120 | #[pyo3(get, set)] 121 | pub tags: Vec, 122 | } 123 | 124 | #[pymethods] 125 | impl CreateModelVersion { 126 | #[new] 127 | pub fn new( 128 | name: String, 129 | model_id: String, 130 | namespace: String, 131 | owner: String, 132 | description: String, 133 | framework: MLFramework, 134 | version: String, 135 | tags: Vec, 136 | ) -> Self { 137 | Self { 138 | name: name, 139 | model_id: model_id, 140 | namespace: namespace, 141 | owner: owner, 142 | description: description, 143 | ml_framework: framework, 144 | version: version, 145 | tags: tags, 146 | } 147 | } 148 | } 149 | 150 | #[pyclass] 151 | #[derive(Default, Clone)] 152 | pub struct CreateModelVersionResult { 153 | #[pyo3(get)] 154 | pub id: String, 155 | } 156 | 157 | #[pyclass] 158 | #[derive(Default, Clone)] 159 | pub struct LogEvent { 160 | #[pyo3(get, set)] 161 | pub object_id: String, 162 | 163 | #[pyo3(get, set)] 164 | pub name: String, 165 | 166 | #[pyo3(get, set)] 167 | pub source: String, 168 | 169 | #[pyo3(get, set)] 170 | pub timestamp: i64, 171 | 172 | #[pyo3(get, set)] 173 | pub metadata: HashMap, 174 | } 175 | 176 | #[pymethods] 177 | impl LogEvent { 178 | #[new] 179 | pub fn new( 180 | object_id: String, 181 | name: String, 182 | source: String, 183 | timestamp: i64, 184 | metadata: HashMap, 185 | ) -> Self { 186 | Self { 187 | object_id: object_id, 188 | name: name, 189 | source: source, 190 | timestamp: timestamp, 191 | metadata: metadata, 192 | } 193 | } 194 | } 195 | 196 | #[pyclass] 197 | pub struct LogEventResult {} 198 | 199 | #[pyclass] 200 | pub struct CreateExpeirmentResult { 201 | #[pyo3(get)] 202 | pub id: String, 203 | } 204 | 205 | #[pyclass] 206 | #[derive(Clone, Copy)] 207 | pub enum MLFramework { 208 | Pytorch = 0, 209 | Tensorflow = 1, 210 | MXNet = 2, 211 | XGBoost = 3, 212 | LightGBM = 4, 213 | Sklearn = 5, 214 | H2O = 6, 215 | SparkML = 7, 216 | CatBoost = 8, 217 | Keras = 9, 218 | Other = 10, 219 | } 220 | 221 | impl MLFramework { 222 | pub fn to_proto(&self) -> modelbox::MlFramework { 223 | modelbox::MlFramework::from_i32(*self as i32).unwrap_or(modelbox::MlFramework::Unknown) 224 | } 225 | } 226 | 227 | impl Default for MLFramework { 228 | fn default() -> Self { 229 | MLFramework::Other 230 | } 231 | } 232 | 233 | #[pyclass] 234 | pub enum ArtifactMime { 235 | Unknown = 0, 236 | ModelVersion = 1, 237 | Checkpoint = 2, 238 | Text = 3, 239 | Image = 4, 240 | Video = 5, 241 | Audio = 6, 242 | } 243 | 244 | pub(crate) fn register(_py: Python<'_>, m: &PyModule) -> PyResult<()> { 245 | m.add_class::()?; 246 | m.add_class::()?; 247 | m.add_class::()?; 248 | m.add_class::()?; 249 | m.add_class::()?; 250 | m.add_class::()?; 251 | m.add_class::()?; 252 | m.add_class::()?; 253 | m.add_class::()?; 254 | m.add_class::()?; 255 | Ok(()) 256 | } 257 | -------------------------------------------------------------------------------- /website/static/img/logo.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python-rpc-client/src/lib.rs: -------------------------------------------------------------------------------- 1 | use pyo3::prelude::*; 2 | use std::fmt; 3 | 4 | mod modelbox; 5 | 6 | use modelbox::model_store_client::ModelStoreClient; 7 | use modelbox::{CreateExperimentRequest, GetExperimentRequest}; 8 | 9 | mod api_structs; 10 | 11 | mod mock; 12 | 13 | #[pyclass] 14 | #[derive(Default)] 15 | struct ModelBoxRpcClient { 16 | addr: String, 17 | rt: Option, 18 | client: Option>, 19 | } 20 | 21 | #[pymethods] 22 | impl ModelBoxRpcClient { 23 | #[new] 24 | pub fn new(addr: String) -> PyResult { 25 | let rt = tokio::runtime::Runtime::new().unwrap(); 26 | let client = rt 27 | .block_on(modelbox::model_store_client::ModelStoreClient::connect( 28 | addr.clone(), 29 | )) 30 | .map_err(|e| PyErr::new::(format!("{}", e)))?; 31 | Ok(Self { 32 | addr: addr, 33 | rt: Some(rt), 34 | client: Some(client), 35 | }) 36 | } 37 | 38 | pub fn experiment(&mut self, id: String) -> PyResult { 39 | let req = GetExperimentRequest { id: id }; 40 | let fut_experiment = self.client.as_mut().unwrap().get_experiment(req); 41 | let res = self 42 | .rt 43 | .as_ref() 44 | .unwrap() 45 | .block_on(fut_experiment) 46 | .map_err(|e| PyErr::new::(format!("{}", e)))?; 47 | 48 | match res.into_inner().experiment { 49 | Some(experiment) => Ok(experiment.name), 50 | None => Err(PyErr::new::( 51 | "experiment not found", 52 | )), 53 | } 54 | } 55 | 56 | pub fn create_experiment( 57 | &mut self, 58 | experiment: api_structs::CreateExperiment, 59 | ) -> PyResult { 60 | let req = CreateExperimentRequest { 61 | name: experiment.name, 62 | namespace: experiment.namespace, 63 | owner: experiment.owner, 64 | framework: experiment.ml_framework, 65 | external_id: experiment.external_id, 66 | task: experiment.task, 67 | }; 68 | let fut_experiment = self.client.as_mut().unwrap().create_experiment(req); 69 | let res = self 70 | .rt 71 | .as_ref() 72 | .unwrap() 73 | .block_on(fut_experiment) 74 | .map_err(|e| PyErr::new::(format!("{}", e)))?; 75 | 76 | Ok(api_structs::CreateExpeirmentResult { 77 | id: res.into_inner().experiment_id, 78 | }) 79 | } 80 | 81 | pub fn create_model( 82 | &mut self, 83 | model: api_structs::CreateModel, 84 | ) -> PyResult { 85 | let req = modelbox::CreateModelRequest { 86 | name: model.name, 87 | namespace: model.namespace, 88 | owner: model.owner, 89 | task: model.task, 90 | description: model.description, 91 | }; 92 | let fut_model = self.client.as_mut().unwrap().create_model(req); 93 | let res = self 94 | .rt 95 | .as_ref() 96 | .unwrap() 97 | .block_on(fut_model) 98 | .map_err(|e| PyErr::new::(format!("{}", e)))?; 99 | Ok(api_structs::CreateModelResult { 100 | id: res.into_inner().id, 101 | }) 102 | } 103 | 104 | pub fn create_model_version( 105 | &mut self, 106 | model_version: api_structs::CreateModelVersion, 107 | ) -> PyResult { 108 | let req = modelbox::CreateModelVersionRequest { 109 | model: model_version.model_id, 110 | name: model_version.name, 111 | version: model_version.version, 112 | description: model_version.description, 113 | namespace: model_version.namespace, 114 | framework: model_version.ml_framework.to_proto() as i32, 115 | unique_tags: model_version.tags, 116 | }; 117 | let fut_model_version = self.client.as_mut().unwrap().create_model_version(req); 118 | let res = self 119 | .rt 120 | .as_ref() 121 | .unwrap() 122 | .block_on(fut_model_version) 123 | .map_err(|e| PyErr::new::(format!("{}", e)))?; 124 | Ok(api_structs::CreateModelVersionResult { 125 | id: res.into_inner().model_version, 126 | }) 127 | } 128 | 129 | pub fn log_event( 130 | &mut self, 131 | event: api_structs::LogEvent, 132 | ) -> PyResult { 133 | let req = modelbox::LogEventRequest { 134 | parent_id: event.object_id, 135 | event: Some(modelbox::Event { 136 | name: event.name, 137 | source: Some(modelbox::EventSource { name: event.source }), 138 | wallclock_time: Some(prost_types::Timestamp { 139 | seconds: event.timestamp, 140 | nanos: 0, 141 | }), 142 | metadata: Some(modelbox::Metadata { 143 | metadata: event.metadata, 144 | }), 145 | }), 146 | }; 147 | let fut_event = self.client.as_mut().unwrap().log_event(req); 148 | let _res = self 149 | .rt 150 | .as_ref() 151 | .unwrap() 152 | .block_on(fut_event) 153 | .map_err(|e| PyErr::new::(format!("{}", e)))?; 154 | Ok(api_structs::LogEventResult {}) 155 | } 156 | } 157 | 158 | impl fmt::Display for ModelBoxRpcClient { 159 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 160 | write!(f, "ModelBoxRpcClient {{ addr: {} }}", self.addr) 161 | } 162 | } 163 | 164 | #[pymodule] 165 | fn modelbox_rpc_client(py: Python, m: &PyModule) -> PyResult<()> { 166 | m.add_class::()?; 167 | api_structs::register(py, m)?; 168 | Ok(()) 169 | } 170 | -------------------------------------------------------------------------------- /sdk-py/modelbox/client.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os import times 3 | from typing import Any, Union, Dict, List 4 | import json 5 | from dataclasses import dataclass 6 | from hashlib import md5 7 | 8 | import grpc 9 | from . import service_pb2 10 | from . import service_pb2_grpc 11 | from google.protobuf.struct_pb2 import Value 12 | from google.protobuf import json_format 13 | from google.protobuf import timestamp_pb2 14 | 15 | # The chunk size at which files are being read. 16 | CHUNK_SZ = 1024 17 | 18 | 19 | def file_checksum(path) -> str: 20 | checksum = "" 21 | with open(path, "rb") as f: 22 | checksum = md5(f.read()).hexdigest() 23 | return checksum 24 | 25 | 26 | @dataclass 27 | class ClientFileUploadResult: 28 | file_id: str 29 | artifact_id: str 30 | 31 | 32 | @dataclass 33 | class ClientTrackArtifactsResult: 34 | id: str 35 | 36 | 37 | @dataclass 38 | class ClientFileDownloadResult: 39 | id: str 40 | path: str 41 | checksum: str 42 | 43 | 44 | @dataclass 45 | class MetricValue: 46 | step: int 47 | wallclock_time: int 48 | value: Union[float, str, bytes] 49 | 50 | 51 | class ModelBoxClient: 52 | def __init__(self, addr): 53 | self._addr = addr 54 | self._channel = grpc.insecure_channel(addr) 55 | self._client = service_pb2_grpc.ModelStoreStub(self._channel) 56 | 57 | def create_experiment( 58 | self, 59 | name: str, 60 | owner: str, 61 | namespace: str, 62 | external_id: str, 63 | framework_proto: int, 64 | ) -> service_pb2.CreateExperimentResponse: 65 | req = service_pb2.CreateExperimentRequest( 66 | name=name, 67 | owner=owner, 68 | namespace=namespace, 69 | external_id=external_id, 70 | framework=framework_proto, 71 | ) 72 | return self._client.CreateExperiment(req) 73 | 74 | def update_metadata( 75 | self, parent_id: str, key: str, value: Any 76 | ) -> service_pb2.UpdateMetadataResponse: 77 | value = json.dumps(value) 78 | meta = service_pb2.Metadata(metadata={key: value}) 79 | req = service_pb2.UpdateMetadataRequest(parent_id=parent_id, metadata=meta) 80 | return self._client.UpdateMetadata(req) 81 | 82 | def log_metrics( 83 | self, parent_id: str, key: str, value: MetricValue 84 | ) -> service_pb2.LogMetricsResponse: 85 | req = service_pb2.LogMetricsRequest( 86 | parent_id=parent_id, 87 | key=key, 88 | value=service_pb2.MetricsValue( 89 | step=value.step, wallclock_time=value.wallclock_time, f_val=value.value 90 | ), 91 | ) 92 | return self._client.LogMetrics(req) 93 | 94 | def get_all_metrics(self, parent_id: str) -> Dict[str, List[MetricValue]]: 95 | req = service_pb2.GetMetricsRequest(parent_id=parent_id) 96 | resp = self._client.GetMetrics(req) 97 | metrics = {} 98 | for key, metric in resp.metrics.items(): 99 | m_vals = [] 100 | for v in metric.values: 101 | m_vals.append( 102 | MetricValue( 103 | step=v.step, wallclock_time=v.wallclock_time, value=v.f_val 104 | ) 105 | ) 106 | metrics[key] = m_vals 107 | 108 | return metrics 109 | 110 | def create_model( 111 | self, 112 | name: str, 113 | owner: str, 114 | namespace: str, 115 | task: str, 116 | description: str, 117 | ) -> service_pb2.Model: 118 | req = service_pb2.CreateModelRequest( 119 | name=name, 120 | owner=owner, 121 | namespace=namespace, 122 | task=task, 123 | description=description, 124 | ) 125 | return self._client.CreateModel(req) 126 | 127 | def _file_chunk_iterator( 128 | self, artifact_name: str, object_id: str, path: str, file_type_proto: int 129 | ): 130 | checksum = file_checksum(path) 131 | file_meta = service_pb2.FileMetadata( 132 | parent_id=object_id, 133 | checksum=checksum, 134 | file_type=file_type_proto, 135 | src_path=path, 136 | ) 137 | upload_meta = service_pb2.UploadFileMetadata( 138 | artifact_name=artifact_name, 139 | object_id=object_id, 140 | metadata=file_meta, 141 | ) 142 | yield service_pb2.UploadFileRequest(metadata=upload_meta) 143 | with open(path, "rb") as f: 144 | while True: 145 | data = f.read(CHUNK_SZ) 146 | if not data: 147 | break 148 | yield service_pb2.UploadFileRequest(chunks=data) 149 | 150 | def upload_artifact( 151 | self, artifact_name: str, object_id: str, path: str, file_type_proto: int 152 | ) -> ClientFileUploadResult: 153 | itr = self._file_chunk_iterator(artifact_name, object_id, path, file_type_proto) 154 | resp = self._client.UploadFile(itr) 155 | return ClientFileUploadResult( 156 | file_id=resp.file_id, artifact_id=resp.artifact_id 157 | ) 158 | 159 | def download_asset(self, id: str, dst_path: str) -> ClientFileDownloadResult: 160 | req = service_pb2.DownloadFileRequest(file_id=id) 161 | resp_itr = self._client.DownloadFile(req) 162 | ret = ClientFileDownloadResult 163 | src_path, checksum = None, None 164 | for resp in resp_itr: 165 | if resp.HasField("metadata"): 166 | src_path = resp.metadata.src_path 167 | checksum = resp.metadata.checksum 168 | file_name = os.path.join(dst_path, src_path) 169 | os.makedirs(os.path.dirname(file_name), exist_ok=True) 170 | with open(file_name, "wb") as f: 171 | for resp in resp_itr: 172 | if resp.HasField("chunks"): 173 | f.write(resp.chunks) 174 | return ret 175 | 176 | def track_artifacts( 177 | self, name: str, object_id: str, files: List[service_pb2.FileMetadata] 178 | ): 179 | req = service_pb2.TrackArtifactsRequest( 180 | name=name, object_id=object_id, files=files 181 | ) 182 | resp = self._client.TrackArtifacts(req) 183 | return ClientTrackArtifactsResult(id=resp.id) 184 | 185 | def list_artifacts(self, object_id: str) -> service_pb2.ListArtifactsResponse: 186 | return self._client.ListArtifacts( 187 | service_pb2.ListArtifactsRequest(object_id=object_id) 188 | ) 189 | 190 | def log_event( 191 | self, parent_id: str, event: service_pb2.Event 192 | ) -> service_pb2.LogEventResponse: 193 | req = service_pb2.LogEventRequest(parent_id=parent_id, event=event) 194 | return self._client.LogEvent(req) 195 | 196 | def list_events(self, parent_id: str) -> service_pb2.ListEventsRequest: 197 | return self._client.ListEvents( 198 | service_pb2.ListEventsRequest( 199 | parent_id=parent_id, since=timestamp_pb2.Timestamp(seconds=0) 200 | ) 201 | ) 202 | 203 | def list_metadata(self, id: str) -> Dict: 204 | req = service_pb2.ListMetadataRequest(parent_id=id) 205 | resp = self._client.ListMetadata(req) 206 | if (resp.metadata is None) or (resp.metadata.metadata is None): 207 | return {} 208 | return resp.metadata.metadata 209 | 210 | def list_models(self, namespace: str) -> service_pb2.ListModelsResponse: 211 | req = service_pb2.ListModelsRequest(namespace=namespace) 212 | return self._client.ListModels(req) 213 | 214 | def list_experiments(self, namespace: str) -> service_pb2.ListExperimentsResponse: 215 | req = service_pb2.ListExperimentsRequest(namespace=namespace) 216 | return self._client.ListExperiments(req) 217 | 218 | def create_model_version( 219 | self, 220 | model_id: str, 221 | version: str, 222 | name: str, 223 | description: str, 224 | files: List[service_pb2.FileMetadata], 225 | framework_proto: int, 226 | unique_tags: List[str], 227 | ) -> service_pb2.CreateModelVersionResponse: 228 | req = service_pb2.CreateModelVersionRequest( 229 | model=model_id, 230 | name=name, 231 | version=version, 232 | description=description, 233 | framework=framework_proto, 234 | unique_tags=unique_tags, 235 | ) 236 | # TODO: Add files 237 | return self._client.CreateModelVersion(req) 238 | 239 | def list_model_versions( 240 | self, model_id: str 241 | ) -> service_pb2.ListModelVersionsResponse: 242 | return self._client.ListModelVersions( 243 | service_pb2.ListModelVersionsRequest(model=model_id) 244 | ) 245 | 246 | def get_experiment(self, id: str) -> service_pb2.GetExperimentResponse: 247 | return self._client.GetExperiment(service_pb2.GetExperimentRequest(id=id)) 248 | 249 | def close(self): 250 | if self._channel is not None: 251 | self._channel.close() 252 | 253 | def __enter__(self): 254 | return self 255 | 256 | def __exit__(self, exc_type, exc_value, traceback): 257 | return self.close() 258 | -------------------------------------------------------------------------------- /sdk-py/modelbox/admin_pb2_grpc.py: -------------------------------------------------------------------------------- 1 | # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! 2 | """Client and server classes corresponding to protobuf-defined services.""" 3 | import grpc 4 | 5 | import admin_pb2 as admin__pb2 6 | 7 | 8 | class ModelBoxAdminStub(object): 9 | """The RPC interface used by the workers 10 | """ 11 | 12 | def __init__(self, channel): 13 | """Constructor. 14 | 15 | Args: 16 | channel: A grpc.Channel. 17 | """ 18 | self.RegisterAgent = channel.unary_unary( 19 | '/modelbox.ModelBoxAdmin/RegisterAgent', 20 | request_serializer=admin__pb2.RegisterAgentRequest.SerializeToString, 21 | response_deserializer=admin__pb2.RegisterAgentResponse.FromString, 22 | ) 23 | self.Heartbeat = channel.unary_unary( 24 | '/modelbox.ModelBoxAdmin/Heartbeat', 25 | request_serializer=admin__pb2.HeartbeatRequest.SerializeToString, 26 | response_deserializer=admin__pb2.HeartbeatResponse.FromString, 27 | ) 28 | self.GetRunnableActionInstances = channel.unary_unary( 29 | '/modelbox.ModelBoxAdmin/GetRunnableActionInstances', 30 | request_serializer=admin__pb2.GetRunnableActionInstancesRequest.SerializeToString, 31 | response_deserializer=admin__pb2.GetRunnableActionInstancesResponse.FromString, 32 | ) 33 | self.UpdateActionStatus = channel.unary_unary( 34 | '/modelbox.ModelBoxAdmin/UpdateActionStatus', 35 | request_serializer=admin__pb2.UpdateActionStatusRequest.SerializeToString, 36 | response_deserializer=admin__pb2.UpdateActionStatusResponse.FromString, 37 | ) 38 | self.GetClusterMembers = channel.unary_unary( 39 | '/modelbox.ModelBoxAdmin/GetClusterMembers', 40 | request_serializer=admin__pb2.GetClusterMembersRequest.SerializeToString, 41 | response_deserializer=admin__pb2.GetClusterMembersResponse.FromString, 42 | ) 43 | 44 | 45 | class ModelBoxAdminServicer(object): 46 | """The RPC interface used by the workers 47 | """ 48 | 49 | def RegisterAgent(self, request, context): 50 | """Register an agent capable of running plugins 51 | """ 52 | context.set_code(grpc.StatusCode.UNIMPLEMENTED) 53 | context.set_details('Method not implemented!') 54 | raise NotImplementedError('Method not implemented!') 55 | 56 | def Heartbeat(self, request, context): 57 | """Workers heartbeat with the server about their presence 58 | and work progress periodically 59 | """ 60 | context.set_code(grpc.StatusCode.UNIMPLEMENTED) 61 | context.set_details('Method not implemented!') 62 | raise NotImplementedError('Method not implemented!') 63 | 64 | def GetRunnableActionInstances(self, request, context): 65 | """Download the list of work that can be exectuted by a action runner 66 | """ 67 | context.set_code(grpc.StatusCode.UNIMPLEMENTED) 68 | context.set_details('Method not implemented!') 69 | raise NotImplementedError('Method not implemented!') 70 | 71 | def UpdateActionStatus(self, request, context): 72 | """Update action status 73 | """ 74 | context.set_code(grpc.StatusCode.UNIMPLEMENTED) 75 | context.set_details('Method not implemented!') 76 | raise NotImplementedError('Method not implemented!') 77 | 78 | def GetClusterMembers(self, request, context): 79 | """Returns the list of servers in a cluster. 80 | """ 81 | context.set_code(grpc.StatusCode.UNIMPLEMENTED) 82 | context.set_details('Method not implemented!') 83 | raise NotImplementedError('Method not implemented!') 84 | 85 | 86 | def add_ModelBoxAdminServicer_to_server(servicer, server): 87 | rpc_method_handlers = { 88 | 'RegisterAgent': grpc.unary_unary_rpc_method_handler( 89 | servicer.RegisterAgent, 90 | request_deserializer=admin__pb2.RegisterAgentRequest.FromString, 91 | response_serializer=admin__pb2.RegisterAgentResponse.SerializeToString, 92 | ), 93 | 'Heartbeat': grpc.unary_unary_rpc_method_handler( 94 | servicer.Heartbeat, 95 | request_deserializer=admin__pb2.HeartbeatRequest.FromString, 96 | response_serializer=admin__pb2.HeartbeatResponse.SerializeToString, 97 | ), 98 | 'GetRunnableActionInstances': grpc.unary_unary_rpc_method_handler( 99 | servicer.GetRunnableActionInstances, 100 | request_deserializer=admin__pb2.GetRunnableActionInstancesRequest.FromString, 101 | response_serializer=admin__pb2.GetRunnableActionInstancesResponse.SerializeToString, 102 | ), 103 | 'UpdateActionStatus': grpc.unary_unary_rpc_method_handler( 104 | servicer.UpdateActionStatus, 105 | request_deserializer=admin__pb2.UpdateActionStatusRequest.FromString, 106 | response_serializer=admin__pb2.UpdateActionStatusResponse.SerializeToString, 107 | ), 108 | 'GetClusterMembers': grpc.unary_unary_rpc_method_handler( 109 | servicer.GetClusterMembers, 110 | request_deserializer=admin__pb2.GetClusterMembersRequest.FromString, 111 | response_serializer=admin__pb2.GetClusterMembersResponse.SerializeToString, 112 | ), 113 | } 114 | generic_handler = grpc.method_handlers_generic_handler( 115 | 'modelbox.ModelBoxAdmin', rpc_method_handlers) 116 | server.add_generic_rpc_handlers((generic_handler,)) 117 | 118 | 119 | # This class is part of an EXPERIMENTAL API. 120 | class ModelBoxAdmin(object): 121 | """The RPC interface used by the workers 122 | """ 123 | 124 | @staticmethod 125 | def RegisterAgent(request, 126 | target, 127 | options=(), 128 | channel_credentials=None, 129 | call_credentials=None, 130 | insecure=False, 131 | compression=None, 132 | wait_for_ready=None, 133 | timeout=None, 134 | metadata=None): 135 | return grpc.experimental.unary_unary(request, target, '/modelbox.ModelBoxAdmin/RegisterAgent', 136 | admin__pb2.RegisterAgentRequest.SerializeToString, 137 | admin__pb2.RegisterAgentResponse.FromString, 138 | options, channel_credentials, 139 | insecure, call_credentials, compression, wait_for_ready, timeout, metadata) 140 | 141 | @staticmethod 142 | def Heartbeat(request, 143 | target, 144 | options=(), 145 | channel_credentials=None, 146 | call_credentials=None, 147 | insecure=False, 148 | compression=None, 149 | wait_for_ready=None, 150 | timeout=None, 151 | metadata=None): 152 | return grpc.experimental.unary_unary(request, target, '/modelbox.ModelBoxAdmin/Heartbeat', 153 | admin__pb2.HeartbeatRequest.SerializeToString, 154 | admin__pb2.HeartbeatResponse.FromString, 155 | options, channel_credentials, 156 | insecure, call_credentials, compression, wait_for_ready, timeout, metadata) 157 | 158 | @staticmethod 159 | def GetRunnableActionInstances(request, 160 | target, 161 | options=(), 162 | channel_credentials=None, 163 | call_credentials=None, 164 | insecure=False, 165 | compression=None, 166 | wait_for_ready=None, 167 | timeout=None, 168 | metadata=None): 169 | return grpc.experimental.unary_unary(request, target, '/modelbox.ModelBoxAdmin/GetRunnableActionInstances', 170 | admin__pb2.GetRunnableActionInstancesRequest.SerializeToString, 171 | admin__pb2.GetRunnableActionInstancesResponse.FromString, 172 | options, channel_credentials, 173 | insecure, call_credentials, compression, wait_for_ready, timeout, metadata) 174 | 175 | @staticmethod 176 | def UpdateActionStatus(request, 177 | target, 178 | options=(), 179 | channel_credentials=None, 180 | call_credentials=None, 181 | insecure=False, 182 | compression=None, 183 | wait_for_ready=None, 184 | timeout=None, 185 | metadata=None): 186 | return grpc.experimental.unary_unary(request, target, '/modelbox.ModelBoxAdmin/UpdateActionStatus', 187 | admin__pb2.UpdateActionStatusRequest.SerializeToString, 188 | admin__pb2.UpdateActionStatusResponse.FromString, 189 | options, channel_credentials, 190 | insecure, call_credentials, compression, wait_for_ready, timeout, metadata) 191 | 192 | @staticmethod 193 | def GetClusterMembers(request, 194 | target, 195 | options=(), 196 | channel_credentials=None, 197 | call_credentials=None, 198 | insecure=False, 199 | compression=None, 200 | wait_for_ready=None, 201 | timeout=None, 202 | metadata=None): 203 | return grpc.experimental.unary_unary(request, target, '/modelbox.ModelBoxAdmin/GetClusterMembers', 204 | admin__pb2.GetClusterMembersRequest.SerializeToString, 205 | admin__pb2.GetClusterMembersResponse.FromString, 206 | options, channel_credentials, 207 | insecure, call_credentials, compression, wait_for_ready, timeout, metadata) 208 | -------------------------------------------------------------------------------- /sdk-go/client.go: -------------------------------------------------------------------------------- 1 | package client 2 | 3 | import ( 4 | "context" 5 | "crypto/md5" 6 | "fmt" 7 | "io" 8 | "os" 9 | "strings" 10 | "time" 11 | 12 | "github.com/tensorland/modelbox/sdk-go/proto" 13 | "github.com/tensorland/modelbox/server" 14 | "github.com/tensorland/modelbox/server/storage/artifacts" 15 | "google.golang.org/grpc" 16 | "google.golang.org/grpc/credentials/insecure" 17 | "google.golang.org/protobuf/types/known/structpb" 18 | ) 19 | 20 | func MLFrameworkProtoFromStr(framework string) proto.MLFramework { 21 | switch strings.ToLower(framework) { 22 | case "pytorch": 23 | return proto.MLFramework_PYTORCH 24 | case "keras": 25 | return proto.MLFramework_KERAS 26 | } 27 | return proto.MLFramework_UNKNOWN 28 | } 29 | 30 | const ( 31 | DEADLINE = 10 * time.Second 32 | ) 33 | 34 | type CheckpointDownloadResponse struct { 35 | Checksum string 36 | ServerChecksum string 37 | } 38 | 39 | type FileUploadResponse struct { 40 | Id string 41 | Checksum string 42 | } 43 | 44 | type CreateModelApiResponse struct { 45 | Id string 46 | } 47 | 48 | type ChangeStreamEventResponse struct { 49 | Event uint8 50 | PayLoad *structpb.Value 51 | } 52 | 53 | type ModelBoxClient struct { 54 | conn *grpc.ClientConn 55 | client proto.ModelStoreClient 56 | } 57 | 58 | func NewModelBoxClient(addr string) (*ModelBoxClient, error) { 59 | var opts []grpc.DialOption 60 | opts = append(opts, grpc.WithTransportCredentials(insecure.NewCredentials())) 61 | conn, err := grpc.Dial(addr, opts...) 62 | if err != nil { 63 | return nil, err 64 | } 65 | client := proto.NewModelStoreClient(conn) 66 | return &ModelBoxClient{conn: conn, client: client}, nil 67 | } 68 | 69 | func (m *ModelBoxClient) CreateExperiment(name, owner, namespace, framework string) (string, error) { 70 | ctx, cancel := context.WithTimeout(context.Background(), DEADLINE) 71 | defer cancel() 72 | req := &proto.CreateExperimentRequest{ 73 | Name: name, 74 | Owner: owner, 75 | Namespace: namespace, 76 | Framework: MLFrameworkProtoFromStr(framework), 77 | } 78 | resp, err := m.client.CreateExperiment(ctx, req) 79 | if err != nil { 80 | return "", err 81 | } 82 | return resp.ExperimentId, nil 83 | } 84 | 85 | func (m *ModelBoxClient) ListExperiments(namespace string) (*proto.ListExperimentsResponse, error) { 86 | ctx, cancel := context.WithTimeout(context.Background(), DEADLINE) 87 | defer cancel() 88 | req := &proto.ListExperimentsRequest{Namespace: namespace} 89 | return m.client.ListExperiments(ctx, req) 90 | } 91 | 92 | func (m *ModelBoxClient) ListCheckpoints(experimentId string) (*proto.ListCheckpointsResponse, error) { 93 | ctx, cancel := context.WithTimeout(context.Background(), DEADLINE) 94 | defer cancel() 95 | req := &proto.ListCheckpointsRequest{ExperimentId: experimentId} 96 | return m.client.ListCheckpoints(ctx, req) 97 | } 98 | 99 | func (m *ModelBoxClient) ClusterMembers() (*proto.GetClusterMembersResponse, error) { 100 | ctx, cancel := context.WithTimeout(context.Background(), DEADLINE) 101 | defer cancel() 102 | 103 | req := &proto.GetClusterMembersRequest{} 104 | return m.client.GetClusterMembers(ctx, req) 105 | } 106 | 107 | func (m *ModelBoxClient) CreateAction(name, arch, object, params string) (*proto.CreateActionResponse, error) { 108 | ctx, cancel := context.WithTimeout(context.Background(), DEADLINE) 109 | defer cancel() 110 | 111 | req := &proto.CreateActionRequest{ 112 | Name: name, 113 | Arch: arch, 114 | ObjectId: object, 115 | // Params: params, 116 | } 117 | return m.client.CreateActions(ctx, req) 118 | } 119 | 120 | func (m *ModelBoxClient) ListActions(objectId string) (*proto.ListActionsResponse, error) { 121 | ctx, cancel := context.WithTimeout(context.Background(), DEADLINE) 122 | defer cancel() 123 | 124 | req := &proto.ListActionsRequest{ObjectId: objectId} 125 | return m.client.ListActions(ctx, req) 126 | } 127 | 128 | func (m *ModelBoxClient) CreateModel(name, owner, namespace, task, description string, metadata map[string]string, files []*proto.FileMetadata) (*CreateModelApiResponse, error) { 129 | ctx, cancel := context.WithTimeout(context.Background(), DEADLINE) 130 | defer cancel() 131 | req := &proto.CreateModelRequest{ 132 | Name: name, 133 | Owner: owner, 134 | Namespace: namespace, 135 | Task: task, 136 | Description: description, 137 | Files: files, 138 | } 139 | 140 | resp, err := m.client.CreateModel(ctx, req) 141 | if err != nil { 142 | return nil, fmt.Errorf("unable to create model: %v", err) 143 | } 144 | return &CreateModelApiResponse{Id: resp.Id}, nil 145 | } 146 | 147 | func (m *ModelBoxClient) ListModels(namespace string) ([]*proto.Model, error) { 148 | ctx, cancel := context.WithTimeout(context.Background(), DEADLINE) 149 | defer cancel() 150 | req := &proto.ListModelsRequest{ 151 | Namespace: namespace, 152 | } 153 | 154 | resp, err := m.client.ListModels(ctx, req) 155 | if err != nil { 156 | return nil, err 157 | } 158 | return resp.Models, nil 159 | } 160 | 161 | type ApiCreateCheckpoint struct { 162 | ExperimentId string 163 | Epoch uint64 164 | Path string 165 | } 166 | 167 | func (m *ModelBoxClient) CreateCheckpoint(chk *ApiCreateCheckpoint) (*proto.CreateCheckpointResponse, error) { 168 | checkpointRequest := proto.CreateCheckpointRequest{ 169 | ExperimentId: chk.ExperimentId, 170 | Epoch: chk.Epoch, 171 | Files: []*proto.FileMetadata{{ 172 | ParentId: chk.ExperimentId, 173 | Path: chk.Path, 174 | }}, 175 | } 176 | response, err := m.client.CreateCheckpoint(context.Background(), &checkpointRequest) 177 | if err != nil { 178 | return nil, err 179 | } 180 | return response, nil 181 | } 182 | 183 | func (m *ModelBoxClient) UploadFile(path, parentId string, t artifacts.FileMIMEType) (*FileUploadResponse, error) { 184 | // This makes us read the file twice, this could be simplified 185 | // if we do bidirectional stream and send the 186 | // checkpoint at the end of the strem to the server to validate the file 187 | checksum, err := m.getChecksum(path) 188 | if err != nil { 189 | return nil, fmt.Errorf("unable to compute checksum: %v ", err) 190 | } 191 | f, err := os.Open(path) 192 | if err != nil { 193 | return nil, fmt.Errorf("unable to open file: %v", err) 194 | } 195 | defer f.Close() 196 | ctx, cancel := context.WithTimeout(context.Background(), DEADLINE) 197 | defer cancel() 198 | req := &proto.UploadFileRequest{ 199 | StreamFrame: &proto.UploadFileRequest_Metadata{ 200 | Metadata: &proto.FileMetadata{ 201 | ParentId: parentId, 202 | FileType: server.FileTypeToProto(t), 203 | Checksum: checksum, 204 | }, 205 | }, 206 | } 207 | stream, err := m.client.UploadFile(ctx) 208 | if err != nil { 209 | return nil, fmt.Errorf("unable to create client stream %v", err) 210 | } 211 | if err := stream.Send(req); err != nil { 212 | return nil, fmt.Errorf("unable to send metadata: %v", err) 213 | } 214 | var recvMsg proto.UploadFileResponse 215 | if err := stream.RecvMsg(&recvMsg); err != nil { 216 | return nil, fmt.Errorf("unable to write metadata: %v", err) 217 | } 218 | if recvMsg.FileId != "" { 219 | return &FileUploadResponse{recvMsg.FileId, checksum}, nil 220 | } 221 | bytes := make([]byte, 1024000) 222 | for { 223 | n, e := f.Read(bytes) 224 | if e == io.EOF { 225 | break 226 | } 227 | if e != nil && e != io.EOF { 228 | return nil, fmt.Errorf("unable to read file: %v", err) 229 | } 230 | req := &proto.UploadFileRequest{ 231 | StreamFrame: &proto.UploadFileRequest_Chunks{Chunks: bytes[:n]}, 232 | } 233 | if err := stream.Send(req); err != nil { 234 | return nil, fmt.Errorf("unable to upload chunks: %v", err) 235 | } 236 | } 237 | resp, err := stream.CloseAndRecv() 238 | if err != nil { 239 | return nil, fmt.Errorf("unable to close stream: %v", err) 240 | } 241 | return &FileUploadResponse{resp.FileId, checksum}, nil 242 | } 243 | 244 | func (m *ModelBoxClient) DownloadBlob(id, path string) (*CheckpointDownloadResponse, error) { 245 | ctx, cancel := context.WithTimeout(context.Background(), DEADLINE) 246 | defer cancel() 247 | req := &proto.DownloadFileRequest{ 248 | FileId: id, 249 | } 250 | h := md5.New() 251 | stream, err := m.client.DownloadFile(ctx, req) 252 | if err != nil { 253 | return nil, err 254 | } 255 | f, err := os.Create(path) 256 | if err != nil { 257 | return nil, err 258 | } 259 | defer f.Close() 260 | 261 | resp, err := stream.Recv() 262 | if err != nil { 263 | return nil, err 264 | } 265 | for { 266 | chunks, err := stream.Recv() 267 | if err == io.EOF { 268 | break 269 | } 270 | if err != nil { 271 | return nil, err 272 | } 273 | bytes := chunks.GetChunks() 274 | _, err = f.Write(bytes) 275 | h.Write(bytes) 276 | if err != nil { 277 | stream.CloseSend() 278 | return nil, err 279 | } 280 | } 281 | checksum := fmt.Sprintf("%x", h.Sum(nil)) 282 | serverChecksum := resp.GetMetadata().GetChecksum() 283 | if checksum != serverChecksum { 284 | return nil, fmt.Errorf("actual checksum %v, calculated checksum %v", serverChecksum, checksum) 285 | } 286 | return &CheckpointDownloadResponse{Checksum: checksum, ServerChecksum: serverChecksum}, nil 287 | } 288 | 289 | func (m *ModelBoxClient) getChecksum(path string) (string, error) { 290 | f, err := os.Open(path) 291 | if err != nil { 292 | return "", fmt.Errorf("error opening file: %v", err) 293 | } 294 | defer f.Close() 295 | 296 | h := md5.New() 297 | if _, err := io.Copy(h, f); err != nil { 298 | return "", fmt.Errorf("error reading file while calculating checksum: %v", err) 299 | } 300 | return fmt.Sprintf("%x", h.Sum(nil)), nil 301 | } 302 | 303 | func (m *ModelBoxClient) StremChangeEvents(namespace string, cb func(*ChangeStreamEventResponse) error) error { 304 | req := &proto.WatchNamespaceRequest{ 305 | Namespace: namespace, 306 | Since: uint64(time.Now().Unix()), 307 | } 308 | 309 | resp, err := m.client.WatchNamespace(context.Background(), req) 310 | if err != nil { 311 | return fmt.Errorf("unable to request change events: %v", err) 312 | } 313 | for { 314 | resp, err := resp.Recv() 315 | if err != nil { 316 | if err == io.EOF { 317 | return nil 318 | } 319 | return fmt.Errorf("stream closed with error: %v", err) 320 | } 321 | streamEvent := &ChangeStreamEventResponse{ 322 | Event: 1, 323 | PayLoad: resp.Payload, 324 | } 325 | if err := cb(streamEvent); err != nil { 326 | return fmt.Errorf("cb error: %v", err) 327 | } 328 | } 329 | } 330 | -------------------------------------------------------------------------------- /website/static/img/undraw_docusaurus_tree.svg: -------------------------------------------------------------------------------- 1 | 2 | Focus on What Matters 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /proto/service.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | import "google/protobuf/timestamp.proto"; 3 | import "google/protobuf/struct.proto"; 4 | 5 | option go_package = "github.com/tensorland/modelbox/sdk-go/proto"; 6 | 7 | package modelbox; 8 | 9 | /** 10 | * ModelStore is the service exposed to upload trained models and training 11 | * checkpoints, and manage metadata around them. 12 | */ 13 | service ModelStore { 14 | // Create a new Model under a namespace. If no namespace is specified, models 15 | // are created under a default namespace. 16 | rpc CreateModel(CreateModelRequest) returns (CreateModelResponse); 17 | 18 | // List Models uploaded for a namespace 19 | rpc ListModels(ListModelsRequest) returns (ListModelsResponse); 20 | 21 | // Creates a new model version for a model 22 | rpc CreateModelVersion(CreateModelVersionRequest) 23 | returns (CreateModelVersionResponse); 24 | 25 | // Lists model versions for a model. 26 | rpc ListModelVersions(ListModelVersionsRequest) 27 | returns (ListModelVersionsResponse); 28 | 29 | // Creates a new experiment 30 | rpc CreateExperiment(CreateExperimentRequest) 31 | returns (CreateExperimentResponse); 32 | 33 | // List Experiments 34 | rpc ListExperiments(ListExperimentsRequest) returns (ListExperimentsResponse); 35 | 36 | // Get Experiments 37 | rpc GetExperiment(GetExperimentRequest) returns (GetExperimentResponse); 38 | 39 | // UploadFile streams a files to ModelBox and stores the binaries to the condfigured storage 40 | rpc UploadFile(stream UploadFileRequest) returns (UploadFileResponse); 41 | 42 | // DownloadFile downloads a file from configured storage 43 | rpc DownloadFile(DownloadFileRequest) returns (stream DownloadFileResponse); 44 | 45 | // Persists a set of metadata related to objects 46 | rpc UpdateMetadata(UpdateMetadataRequest) returns (UpdateMetadataResponse); 47 | 48 | // Lists metadata associated with an object 49 | rpc ListMetadata(ListMetadataRequest) returns (ListMetadataResponse); 50 | 51 | // Tracks a set of artifacts with a experiment/checkpoint/model 52 | rpc TrackArtifacts(TrackArtifactsRequest) returns (TrackArtifactsResponse); 53 | 54 | // List artifacts for an expriment/model/model version 55 | rpc ListArtifacts(ListArtifactsRequest) returns (ListArtifactsResponse); 56 | 57 | // Log Metrics for an experiment, model or checkpoint 58 | rpc LogMetrics(LogMetricsRequest) returns (LogMetricsResponse); 59 | 60 | // Get metrics logged for an experiment, model or checkpoint. 61 | rpc GetMetrics(GetMetricsRequest) returns (GetMetricsResponse); 62 | 63 | // Log an event from any system interacting with metadata of a experiment, models or 64 | // using a trained model or checkpoint. 65 | rpc LogEvent(LogEventRequest) returns (LogEventResponse); 66 | 67 | // List events logged for an experiment/model, etc. 68 | rpc ListEvents(ListEventsRequest) returns (ListEventsResponse); 69 | 70 | // Streams change events in any of objects such as experiments, models, etc, for a given namespace 71 | // Response is a json representation of the new state of the obejct 72 | rpc WatchNamespace(WatchNamespaceRequest) returns (stream WatchNamespaceResponse); 73 | } 74 | 75 | // Request to watch events in a namespace, such as experiments/models/mocel versions 76 | // being created or updated. 77 | message WatchNamespaceRequest { 78 | string namespace = 1; 79 | 80 | uint64 since = 2; 81 | } 82 | 83 | enum ChangeEvent { 84 | CHANGE_EVENT_UNDEFINED = 0; 85 | 86 | OBJECT_CREATED = 1; 87 | 88 | OBJECT_UPDATED = 2; 89 | } 90 | 91 | message WatchNamespaceResponse { 92 | ChangeEvent event = 1; 93 | 94 | google.protobuf.Value payload = 2; 95 | } 96 | 97 | // Metrics contain the metric values for a given key 98 | message Metrics { 99 | string key = 1; 100 | 101 | repeated MetricsValue values = 2; 102 | } 103 | 104 | // Metric Value at a given point of time. 105 | message MetricsValue { 106 | uint64 step = 1; 107 | 108 | uint64 wallclock_time = 2; 109 | 110 | oneof value { 111 | float f_val = 5; 112 | 113 | string s_tensor = 6; 114 | 115 | bytes b_tensor = 7; 116 | } 117 | } 118 | 119 | // Message for logging a metric value at a given time 120 | message LogMetricsRequest { 121 | string parent_id = 1; 122 | 123 | string key = 2; 124 | 125 | MetricsValue value = 3; 126 | } 127 | 128 | message LogMetricsResponse {} 129 | 130 | message GetMetricsRequest { 131 | string parent_id = 1; 132 | } 133 | 134 | message GetMetricsResponse { 135 | map metrics = 1; 136 | } 137 | 138 | message TrackArtifactsRequest { 139 | string name = 1; 140 | string object_id = 2; 141 | repeated FileMetadata files = 3; 142 | } 143 | 144 | message TrackArtifactsResponse { 145 | string id = 1; 146 | } 147 | 148 | message ListArtifactsRequest { 149 | string object_id = 1; 150 | } 151 | 152 | message ListArtifactsResponse { 153 | repeated Artifact artifacts = 1; 154 | } 155 | 156 | /* 157 | * FileMetadata contains information about the file associated with a model version 158 | * such as model binaries, other meta data files related to the model. 159 | * This could either be sent as part of the model version creation request to track files 160 | * already managed by another storage service, or as the first message while uploading a file 161 | * to be managed by ModelBox. 162 | */ 163 | message FileMetadata { 164 | string id = 1; 165 | 166 | // The ID of the experiment, model to which this file belongs to 167 | string parent_id = 2; 168 | 169 | // MIMEType of the file 170 | FileType file_type = 3; 171 | 172 | // checksum of the file 173 | string checksum = 4; 174 | 175 | // path of the file 176 | string src_path = 5; 177 | 178 | // path of uploaded file 179 | string upload_path = 6; 180 | 181 | google.protobuf.Timestamp created_at = 20; 182 | google.protobuf.Timestamp updated_at = 21; 183 | } 184 | 185 | enum FileType { 186 | UNDEFINED = 0; 187 | MODEL = 1; 188 | CHECKPOINT = 2; 189 | TEXT = 3; 190 | IMAGE = 4; 191 | AUDIO = 5; 192 | VIDEO = 6; 193 | } 194 | 195 | message DownloadFileRequest { string file_id = 1; } 196 | 197 | message DownloadFileResponse { 198 | oneof stream_frame { 199 | FileMetadata metadata = 1; 200 | bytes chunks = 2; 201 | } 202 | } 203 | 204 | message UploadFileRequest { 205 | oneof stream_frame { 206 | UploadFileMetadata metadata = 1; 207 | bytes chunks = 2; 208 | } 209 | } 210 | 211 | message UploadFileResponse { 212 | string file_id = 1; 213 | string artifact_id = 2; 214 | } 215 | 216 | message UploadFileMetadata { 217 | string artifact_name = 1; 218 | string object_id = 2; 219 | FileMetadata metadata = 3; 220 | } 221 | 222 | message Artifact { 223 | string id = 1; 224 | string name = 2; 225 | string object_id = 3; 226 | repeated FileMetadata files = 4; 227 | } 228 | 229 | /* 230 | * Model contains metadata about a model which solves a particular use case. 231 | */ 232 | message Model { 233 | string id = 1; 234 | string name = 2; 235 | string owner = 3; 236 | string namespace = 4; 237 | string description = 5; 238 | string task = 6; 239 | google.protobuf.Timestamp created_at = 20; 240 | google.protobuf.Timestamp updated_at = 21; 241 | } 242 | 243 | /** 244 | * Create a new Model. If the id points to an existing model a new model version 245 | * is created. 246 | */ 247 | message CreateModelRequest { 248 | string name = 2; 249 | string owner = 3; 250 | string namespace = 4; 251 | string task = 5; 252 | string description = 6; 253 | } 254 | 255 | message CreateModelResponse { 256 | string id = 1; 257 | bool exists = 2; 258 | google.protobuf.Timestamp created_at = 20; 259 | google.protobuf.Timestamp updated_at = 21; 260 | } 261 | 262 | /* 263 | * Deep Learning frameworks known to ModelBox 264 | */ 265 | enum MLFramework { 266 | UNKNOWN = 0; 267 | PYTORCH = 1; 268 | KERAS = 2; 269 | } 270 | 271 | /** 272 | * ModelVersion contains a trained model binary, metrics related to the mode 273 | * such as accuracy on various datasets, performance on a hardware, etc. Model 274 | * Versions are always linked to a model. 275 | */ 276 | message ModelVersion { 277 | string id = 1; 278 | string model_id = 2; 279 | string name = 3; 280 | string version = 4; 281 | string description = 5; 282 | MLFramework framework = 8; 283 | repeated string unique_tags = 9; 284 | google.protobuf.Timestamp created_at = 20; 285 | google.protobuf.Timestamp updated_at = 21; 286 | } 287 | 288 | message CreateModelVersionRequest { 289 | string model = 1; 290 | string name = 2; 291 | string version = 3; 292 | string description = 4; 293 | string namespace = 5; 294 | MLFramework framework = 8; 295 | repeated string unique_tags = 9; 296 | } 297 | 298 | message CreateModelVersionResponse { 299 | string model_version = 1; 300 | bool exists = 2; 301 | google.protobuf.Timestamp created_at = 20; 302 | google.protobuf.Timestamp updated_at = 21; 303 | } 304 | 305 | /** 306 | * Experiments are the sources of Model checkpoints. They track various details 307 | * related to the training runs which created the models such as hyper 308 | * parameters, etc. 309 | */ 310 | message Experiment { 311 | string id = 1; 312 | string name = 2; 313 | string namespace = 3; 314 | string owner = 4; 315 | MLFramework framework = 5; 316 | string external_id = 7; 317 | google.protobuf.Timestamp created_at = 20; 318 | google.protobuf.Timestamp updated_at = 21; 319 | } 320 | 321 | message CreateExperimentRequest { 322 | string name = 1; 323 | string owner = 2; 324 | string namespace = 3; 325 | MLFramework framework = 4; 326 | string task = 5; 327 | string external_id = 7; 328 | } 329 | 330 | message CreateExperimentResponse { 331 | string experiment_id = 1; 332 | bool experiment_exists = 2; 333 | google.protobuf.Timestamp created_at = 20; 334 | google.protobuf.Timestamp updated_at = 21; 335 | } 336 | 337 | message ListExperimentsRequest { string namespace= 1; } 338 | 339 | message ListExperimentsResponse { repeated Experiment experiments = 1; } 340 | 341 | message ListModelVersionsRequest { string model = 1; } 342 | 343 | message ListModelVersionsResponse { repeated ModelVersion model_versions = 1; } 344 | 345 | message ListModelsRequest { string namespace = 1; } 346 | 347 | message ListModelsResponse { repeated Model models = 1; } 348 | 349 | message Metadata { 350 | map metadata = 1; 351 | } 352 | 353 | message UpdateMetadataRequest { 354 | string parent_id = 1; 355 | Metadata metadata = 2; 356 | } 357 | 358 | message UpdateMetadataResponse { 359 | } 360 | 361 | message ListMetadataRequest { 362 | string parent_id = 1; 363 | } 364 | 365 | message ListMetadataResponse { 366 | Metadata metadata = 1; 367 | } 368 | 369 | message EventSource { 370 | string name = 1; 371 | } 372 | 373 | message Event { 374 | string name = 2; 375 | EventSource source = 3; 376 | google.protobuf.Timestamp wallclock_time = 4; 377 | Metadata metadata = 5; 378 | } 379 | 380 | /** 381 | * Contains information about an event being logged about 382 | * an experiment or a model or a checkpoint by any system interacting 383 | * or using the object. 384 | */ 385 | message LogEventRequest { 386 | string parent_id = 1; 387 | Event event = 2; 388 | } 389 | 390 | message LogEventResponse { 391 | google.protobuf.Timestamp created_at = 1; 392 | } 393 | 394 | message ListEventsRequest { 395 | string parent_id = 1; 396 | google.protobuf.Timestamp since = 2; 397 | } 398 | 399 | message ListEventsResponse { 400 | repeated Event events = 1; 401 | } 402 | 403 | message GetExperimentRequest { 404 | string id = 1; 405 | } 406 | 407 | message GetExperimentResponse { 408 | Experiment experiment = 1; 409 | } -------------------------------------------------------------------------------- /sdk-py/modelbox/admin_pb2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by the protocol buffer compiler. DO NOT EDIT! 3 | # source: admin.proto 4 | """Generated protocol buffer code.""" 5 | from google.protobuf import descriptor as _descriptor 6 | from google.protobuf import descriptor_pool as _descriptor_pool 7 | from google.protobuf import message as _message 8 | from google.protobuf import reflection as _reflection 9 | from google.protobuf import symbol_database as _symbol_database 10 | # @@protoc_insertion_point(imports) 11 | 12 | _sym_db = _symbol_database.Default() 13 | 14 | 15 | from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 16 | from google.protobuf import struct_pb2 as google_dot_protobuf_dot_struct__pb2 17 | 18 | 19 | DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0b\x61\x64min.proto\x12\x08modelbox\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1cgoogle/protobuf/struct.proto\"\x1a\n\x18GetClusterMembersRequest\"S\n\rClusterMember\x12\n\n\x02id\x18\x01 \x01(\t\x12\x11\n\thost_name\x18\x02 \x01(\t\x12\x10\n\x08rpc_addr\x18\x03 \x01(\t\x12\x11\n\thttp_addr\x18\x04 \x01(\t\"E\n\x19GetClusterMembersResponse\x12(\n\x07members\x18\x01 \x03(\x0b\x32\x17.modelbox.ClusterMember\"<\n\x08NodeInfo\x12\x11\n\thost_name\x18\x01 \x01(\t\x12\x0f\n\x07ip_addr\x18\x02 \x01(\t\x12\x0c\n\x04\x61rch\x18\x03 \x01(\t\"K\n\x10HeartbeatRequest\x12\x0f\n\x07node_id\x18\x01 \x01(\t\x12&\n\x02\x61t\x18\x14 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\"\x13\n\x11HeartbeatResponse\"`\n\x15SubscribeEventRequest\x12\x11\n\tnamespace\x18\x01 \x01(\t\x12\x14\n\x0cml_framework\x18\x02 \x01(\t\x12\r\n\x05owner\x18\x03 \x01(\t\x12\x0f\n\x07\x61\x63tions\x18\x04 \x03(\t\"Q\n\x14RegisterAgentRequest\x12%\n\tnode_info\x18\x01 \x01(\x0b\x32\x12.modelbox.NodeInfo\x12\x12\n\nagent_name\x18\x02 \x01(\t\"(\n\x15RegisterAgentResponse\x12\x0f\n\x07node_id\x18\x01 \x01(\t\"F\n!GetRunnableActionInstancesRequest\x12\x13\n\x0b\x61\x63tion_name\x18\x01 \x01(\t\x12\x0c\n\x04\x61rch\x18\x02 \x01(\t\"\xbd\x01\n\x0eRunnableAction\x12\n\n\x02id\x18\x01 \x01(\t\x12\x11\n\taction_id\x18\x02 \x01(\t\x12\x0f\n\x07\x63ommand\x18\x03 \x01(\t\x12\x34\n\x06params\x18\x05 \x03(\x0b\x32$.modelbox.RunnableAction.ParamsEntry\x1a\x45\n\x0bParamsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12%\n\x05value\x18\x02 \x01(\x0b\x32\x16.google.protobuf.Value:\x02\x38\x01\"Q\n\"GetRunnableActionInstancesResponse\x12+\n\tinstances\x18\x01 \x03(\x0b\x32\x18.modelbox.RunnableAction\"\x85\x01\n\x19UpdateActionStatusRequest\x12\x1a\n\x12\x61\x63tion_instance_id\x18\x01 \x01(\t\x12\x0e\n\x06status\x18\x02 \x01(\r\x12\x0f\n\x07outcome\x18\x03 \x01(\r\x12\x16\n\x0eoutcome_reason\x18\x04 \x01(\t\x12\x13\n\x0budpate_time\x18\x05 \x01(\x04\"\x1c\n\x1aUpdateActionStatusResponse2\xdf\x03\n\rModelBoxAdmin\x12P\n\rRegisterAgent\x12\x1e.modelbox.RegisterAgentRequest\x1a\x1f.modelbox.RegisterAgentResponse\x12\x44\n\tHeartbeat\x12\x1a.modelbox.HeartbeatRequest\x1a\x1b.modelbox.HeartbeatResponse\x12w\n\x1aGetRunnableActionInstances\x12+.modelbox.GetRunnableActionInstancesRequest\x1a,.modelbox.GetRunnableActionInstancesResponse\x12_\n\x12UpdateActionStatus\x12#.modelbox.UpdateActionStatusRequest\x1a$.modelbox.UpdateActionStatusResponse\x12\\\n\x11GetClusterMembers\x12\".modelbox.GetClusterMembersRequest\x1a#.modelbox.GetClusterMembersResponseB-Z+github.com/tensorland/modelbox/sdk-go/protob\x06proto3') 20 | 21 | 22 | 23 | _GETCLUSTERMEMBERSREQUEST = DESCRIPTOR.message_types_by_name['GetClusterMembersRequest'] 24 | _CLUSTERMEMBER = DESCRIPTOR.message_types_by_name['ClusterMember'] 25 | _GETCLUSTERMEMBERSRESPONSE = DESCRIPTOR.message_types_by_name['GetClusterMembersResponse'] 26 | _NODEINFO = DESCRIPTOR.message_types_by_name['NodeInfo'] 27 | _HEARTBEATREQUEST = DESCRIPTOR.message_types_by_name['HeartbeatRequest'] 28 | _HEARTBEATRESPONSE = DESCRIPTOR.message_types_by_name['HeartbeatResponse'] 29 | _SUBSCRIBEEVENTREQUEST = DESCRIPTOR.message_types_by_name['SubscribeEventRequest'] 30 | _REGISTERAGENTREQUEST = DESCRIPTOR.message_types_by_name['RegisterAgentRequest'] 31 | _REGISTERAGENTRESPONSE = DESCRIPTOR.message_types_by_name['RegisterAgentResponse'] 32 | _GETRUNNABLEACTIONINSTANCESREQUEST = DESCRIPTOR.message_types_by_name['GetRunnableActionInstancesRequest'] 33 | _RUNNABLEACTION = DESCRIPTOR.message_types_by_name['RunnableAction'] 34 | _RUNNABLEACTION_PARAMSENTRY = _RUNNABLEACTION.nested_types_by_name['ParamsEntry'] 35 | _GETRUNNABLEACTIONINSTANCESRESPONSE = DESCRIPTOR.message_types_by_name['GetRunnableActionInstancesResponse'] 36 | _UPDATEACTIONSTATUSREQUEST = DESCRIPTOR.message_types_by_name['UpdateActionStatusRequest'] 37 | _UPDATEACTIONSTATUSRESPONSE = DESCRIPTOR.message_types_by_name['UpdateActionStatusResponse'] 38 | GetClusterMembersRequest = _reflection.GeneratedProtocolMessageType('GetClusterMembersRequest', (_message.Message,), { 39 | 'DESCRIPTOR' : _GETCLUSTERMEMBERSREQUEST, 40 | '__module__' : 'admin_pb2' 41 | # @@protoc_insertion_point(class_scope:modelbox.GetClusterMembersRequest) 42 | }) 43 | _sym_db.RegisterMessage(GetClusterMembersRequest) 44 | 45 | ClusterMember = _reflection.GeneratedProtocolMessageType('ClusterMember', (_message.Message,), { 46 | 'DESCRIPTOR' : _CLUSTERMEMBER, 47 | '__module__' : 'admin_pb2' 48 | # @@protoc_insertion_point(class_scope:modelbox.ClusterMember) 49 | }) 50 | _sym_db.RegisterMessage(ClusterMember) 51 | 52 | GetClusterMembersResponse = _reflection.GeneratedProtocolMessageType('GetClusterMembersResponse', (_message.Message,), { 53 | 'DESCRIPTOR' : _GETCLUSTERMEMBERSRESPONSE, 54 | '__module__' : 'admin_pb2' 55 | # @@protoc_insertion_point(class_scope:modelbox.GetClusterMembersResponse) 56 | }) 57 | _sym_db.RegisterMessage(GetClusterMembersResponse) 58 | 59 | NodeInfo = _reflection.GeneratedProtocolMessageType('NodeInfo', (_message.Message,), { 60 | 'DESCRIPTOR' : _NODEINFO, 61 | '__module__' : 'admin_pb2' 62 | # @@protoc_insertion_point(class_scope:modelbox.NodeInfo) 63 | }) 64 | _sym_db.RegisterMessage(NodeInfo) 65 | 66 | HeartbeatRequest = _reflection.GeneratedProtocolMessageType('HeartbeatRequest', (_message.Message,), { 67 | 'DESCRIPTOR' : _HEARTBEATREQUEST, 68 | '__module__' : 'admin_pb2' 69 | # @@protoc_insertion_point(class_scope:modelbox.HeartbeatRequest) 70 | }) 71 | _sym_db.RegisterMessage(HeartbeatRequest) 72 | 73 | HeartbeatResponse = _reflection.GeneratedProtocolMessageType('HeartbeatResponse', (_message.Message,), { 74 | 'DESCRIPTOR' : _HEARTBEATRESPONSE, 75 | '__module__' : 'admin_pb2' 76 | # @@protoc_insertion_point(class_scope:modelbox.HeartbeatResponse) 77 | }) 78 | _sym_db.RegisterMessage(HeartbeatResponse) 79 | 80 | SubscribeEventRequest = _reflection.GeneratedProtocolMessageType('SubscribeEventRequest', (_message.Message,), { 81 | 'DESCRIPTOR' : _SUBSCRIBEEVENTREQUEST, 82 | '__module__' : 'admin_pb2' 83 | # @@protoc_insertion_point(class_scope:modelbox.SubscribeEventRequest) 84 | }) 85 | _sym_db.RegisterMessage(SubscribeEventRequest) 86 | 87 | RegisterAgentRequest = _reflection.GeneratedProtocolMessageType('RegisterAgentRequest', (_message.Message,), { 88 | 'DESCRIPTOR' : _REGISTERAGENTREQUEST, 89 | '__module__' : 'admin_pb2' 90 | # @@protoc_insertion_point(class_scope:modelbox.RegisterAgentRequest) 91 | }) 92 | _sym_db.RegisterMessage(RegisterAgentRequest) 93 | 94 | RegisterAgentResponse = _reflection.GeneratedProtocolMessageType('RegisterAgentResponse', (_message.Message,), { 95 | 'DESCRIPTOR' : _REGISTERAGENTRESPONSE, 96 | '__module__' : 'admin_pb2' 97 | # @@protoc_insertion_point(class_scope:modelbox.RegisterAgentResponse) 98 | }) 99 | _sym_db.RegisterMessage(RegisterAgentResponse) 100 | 101 | GetRunnableActionInstancesRequest = _reflection.GeneratedProtocolMessageType('GetRunnableActionInstancesRequest', (_message.Message,), { 102 | 'DESCRIPTOR' : _GETRUNNABLEACTIONINSTANCESREQUEST, 103 | '__module__' : 'admin_pb2' 104 | # @@protoc_insertion_point(class_scope:modelbox.GetRunnableActionInstancesRequest) 105 | }) 106 | _sym_db.RegisterMessage(GetRunnableActionInstancesRequest) 107 | 108 | RunnableAction = _reflection.GeneratedProtocolMessageType('RunnableAction', (_message.Message,), { 109 | 110 | 'ParamsEntry' : _reflection.GeneratedProtocolMessageType('ParamsEntry', (_message.Message,), { 111 | 'DESCRIPTOR' : _RUNNABLEACTION_PARAMSENTRY, 112 | '__module__' : 'admin_pb2' 113 | # @@protoc_insertion_point(class_scope:modelbox.RunnableAction.ParamsEntry) 114 | }) 115 | , 116 | 'DESCRIPTOR' : _RUNNABLEACTION, 117 | '__module__' : 'admin_pb2' 118 | # @@protoc_insertion_point(class_scope:modelbox.RunnableAction) 119 | }) 120 | _sym_db.RegisterMessage(RunnableAction) 121 | _sym_db.RegisterMessage(RunnableAction.ParamsEntry) 122 | 123 | GetRunnableActionInstancesResponse = _reflection.GeneratedProtocolMessageType('GetRunnableActionInstancesResponse', (_message.Message,), { 124 | 'DESCRIPTOR' : _GETRUNNABLEACTIONINSTANCESRESPONSE, 125 | '__module__' : 'admin_pb2' 126 | # @@protoc_insertion_point(class_scope:modelbox.GetRunnableActionInstancesResponse) 127 | }) 128 | _sym_db.RegisterMessage(GetRunnableActionInstancesResponse) 129 | 130 | UpdateActionStatusRequest = _reflection.GeneratedProtocolMessageType('UpdateActionStatusRequest', (_message.Message,), { 131 | 'DESCRIPTOR' : _UPDATEACTIONSTATUSREQUEST, 132 | '__module__' : 'admin_pb2' 133 | # @@protoc_insertion_point(class_scope:modelbox.UpdateActionStatusRequest) 134 | }) 135 | _sym_db.RegisterMessage(UpdateActionStatusRequest) 136 | 137 | UpdateActionStatusResponse = _reflection.GeneratedProtocolMessageType('UpdateActionStatusResponse', (_message.Message,), { 138 | 'DESCRIPTOR' : _UPDATEACTIONSTATUSRESPONSE, 139 | '__module__' : 'admin_pb2' 140 | # @@protoc_insertion_point(class_scope:modelbox.UpdateActionStatusResponse) 141 | }) 142 | _sym_db.RegisterMessage(UpdateActionStatusResponse) 143 | 144 | _MODELBOXADMIN = DESCRIPTOR.services_by_name['ModelBoxAdmin'] 145 | if _descriptor._USE_C_DESCRIPTORS == False: 146 | 147 | DESCRIPTOR._options = None 148 | DESCRIPTOR._serialized_options = b'Z+github.com/tensorland/modelbox/sdk-go/proto' 149 | _RUNNABLEACTION_PARAMSENTRY._options = None 150 | _RUNNABLEACTION_PARAMSENTRY._serialized_options = b'8\001' 151 | _GETCLUSTERMEMBERSREQUEST._serialized_start=88 152 | _GETCLUSTERMEMBERSREQUEST._serialized_end=114 153 | _CLUSTERMEMBER._serialized_start=116 154 | _CLUSTERMEMBER._serialized_end=199 155 | _GETCLUSTERMEMBERSRESPONSE._serialized_start=201 156 | _GETCLUSTERMEMBERSRESPONSE._serialized_end=270 157 | _NODEINFO._serialized_start=272 158 | _NODEINFO._serialized_end=332 159 | _HEARTBEATREQUEST._serialized_start=334 160 | _HEARTBEATREQUEST._serialized_end=409 161 | _HEARTBEATRESPONSE._serialized_start=411 162 | _HEARTBEATRESPONSE._serialized_end=430 163 | _SUBSCRIBEEVENTREQUEST._serialized_start=432 164 | _SUBSCRIBEEVENTREQUEST._serialized_end=528 165 | _REGISTERAGENTREQUEST._serialized_start=530 166 | _REGISTERAGENTREQUEST._serialized_end=611 167 | _REGISTERAGENTRESPONSE._serialized_start=613 168 | _REGISTERAGENTRESPONSE._serialized_end=653 169 | _GETRUNNABLEACTIONINSTANCESREQUEST._serialized_start=655 170 | _GETRUNNABLEACTIONINSTANCESREQUEST._serialized_end=725 171 | _RUNNABLEACTION._serialized_start=728 172 | _RUNNABLEACTION._serialized_end=917 173 | _RUNNABLEACTION_PARAMSENTRY._serialized_start=848 174 | _RUNNABLEACTION_PARAMSENTRY._serialized_end=917 175 | _GETRUNNABLEACTIONINSTANCESRESPONSE._serialized_start=919 176 | _GETRUNNABLEACTIONINSTANCESRESPONSE._serialized_end=1000 177 | _UPDATEACTIONSTATUSREQUEST._serialized_start=1003 178 | _UPDATEACTIONSTATUSREQUEST._serialized_end=1136 179 | _UPDATEACTIONSTATUSRESPONSE._serialized_start=1138 180 | _UPDATEACTIONSTATUSRESPONSE._serialized_end=1166 181 | _MODELBOXADMIN._serialized_start=1169 182 | _MODELBOXADMIN._serialized_end=1648 183 | # @@protoc_insertion_point(module_scope) 184 | --------------------------------------------------------------------------------