├── .github ├── ISSUE_TEMPLATE │ ├── bug-report.md │ ├── enhancement.md │ └── support.md └── PULL_REQUEST_TEMPLATE.md ├── .gitignore ├── .travis.yml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── NOTICE ├── README.md ├── api └── v1 │ ├── batchtransformjob │ ├── batchtransformjob_types.go │ ├── batchtransformjob_types_test.go │ ├── suite_test.go │ └── zz_generated.deepcopy.go │ ├── common │ ├── groupversion_info.go │ ├── manual_deepcopy.go │ ├── sagemaker_api.go │ ├── suite_test.go │ └── zz_generated.deepcopy.go │ ├── endpointconfig │ ├── endpointconfig_types.go │ ├── endpointconfig_types_test.go │ ├── suite_test.go │ └── zz_generated.deepcopy.go │ ├── hostingautoscalingpolicy │ ├── hostingautoscalingpolicy_types.go │ ├── hostingautoscalingpolicy_types_test.go │ ├── suite_test.go │ └── zz_generated.deepcopy.go │ ├── hostingdeployment │ ├── hostingdeployment_types.go │ ├── hostingdeployment_types_test.go │ ├── suite_test.go │ └── zz_generated.deepcopy.go │ ├── hyperparametertuningjob │ ├── hyperparametertuningjob_types.go │ ├── hyperparametertuningjob_types_test.go │ ├── suite_test.go │ └── zz_generated.deepcopy.go │ ├── model │ ├── model_types.go │ ├── model_types_test.go │ ├── suite_test.go │ └── zz_generated.deepcopy.go │ ├── processingjob │ ├── processingjob_types.go │ ├── processingjob_types_test.go │ ├── suite_test.go │ └── zz_generated.deepcopy.go │ └── trainingjob │ ├── suite_test.go │ ├── trainingjob_types.go │ ├── trainingjob_types_test.go │ └── zz_generated.deepcopy.go ├── bin └── .gitkeep ├── codebuild ├── build_canary.yaml ├── build_image.yaml ├── build_integration_container.yaml ├── deploy_image.yaml ├── integration_test.yaml ├── release_tag.yaml ├── run_canary_china.yaml ├── scripts │ ├── build_canaries.sh │ ├── build_deploy_integration_container.sh │ ├── deployment_constants.sh │ ├── diff_release_yamls.sh │ ├── package_alpha_operators.sh │ ├── package_operators.sh │ ├── package_prod_operators.sh │ ├── release_tag.sh │ └── travis-api ├── smlog_test.yaml └── unit_test.yaml ├── config ├── base │ ├── kustomization.yaml │ ├── manager_auth_proxy_patch.yaml │ ├── manager_prometheus_metrics_patch.yaml │ ├── manager_webhook_patch.yaml │ └── webhookcainjection_patch.yaml ├── bindings │ ├── clusterspaced │ │ ├── auth_proxy_role.yaml │ │ ├── auth_proxy_role_binding.yaml │ │ ├── auth_proxy_service.yaml │ │ ├── kustomization.yaml │ │ └── role_binding.yaml │ └── namespaced │ │ ├── auth_proxy_role.yaml │ │ ├── auth_proxy_role_binding.yaml │ │ ├── auth_proxy_service.yaml │ │ ├── kustomization.yaml │ │ └── role_binding.yaml ├── certmanager │ ├── certificate.yaml │ ├── kustomization.yaml │ └── kustomizeconfig.yaml ├── crd │ ├── bases │ │ ├── sagemaker.aws.amazon.com_batchtransformjobs.yaml │ │ ├── sagemaker.aws.amazon.com_endpointconfigs.yaml │ │ ├── sagemaker.aws.amazon.com_hostingautoscalingpolicies.yaml │ │ ├── sagemaker.aws.amazon.com_hostingdeployments.yaml │ │ ├── sagemaker.aws.amazon.com_hyperparametertuningjobs.yaml │ │ ├── sagemaker.aws.amazon.com_models.yaml │ │ ├── sagemaker.aws.amazon.com_processingjobs.yaml │ │ └── sagemaker.aws.amazon.com_trainingjobs.yaml │ ├── kustomization.yaml │ ├── kustomizeconfig.yaml │ └── patches │ │ ├── cainjection_in_processingjobs.yaml │ │ ├── cainjection_in_trainingjobs.yaml │ │ ├── role-arn-validation-pattern.yaml │ │ ├── webhook_in_processingjobs.yaml │ │ └── webhook_in_trainingjobs.yaml ├── default │ └── kustomization.yaml ├── installers │ └── rolebasedcreds │ │ ├── kustomization.yaml │ │ └── namespaced │ │ ├── kustomization.yaml │ │ └── resources │ │ ├── kustomization.yaml │ │ ├── manager_add_service_account_patch.yaml │ │ └── serviceaccount.yaml ├── manager │ ├── kustomization.yaml │ └── manager.yaml ├── rbac │ ├── auth_proxy_role_binding.yaml │ ├── kustomization.yaml │ ├── leader_election_role.yaml │ ├── leader_election_role_binding.yaml │ └── role.yaml └── webhook │ ├── kustomization.yaml │ ├── kustomizeconfig.yaml │ ├── manifests.yaml │ └── service.yaml ├── controllers ├── README.md ├── aws_config_loader.go ├── batchtransformjob │ ├── READEME.md │ ├── batchtransformjob_controller.go │ ├── batchtransformjob_controller_test.go │ └── suite_test.go ├── common.go ├── common_test.go ├── controllertest │ ├── README.md │ ├── k8s_mocks.go │ ├── mock_autoscaling_client.go │ ├── mock_sagemaker_client.go │ ├── reconcile_responses.go │ └── util.go ├── endpointconfig │ ├── endpointconfig_controller.go │ ├── endpointconfig_controller_test.go │ └── suite_test.go ├── hosting │ ├── endpointconfig_reconciler.go │ ├── endpointconfig_reconciler_test.go │ ├── hostingdeployment_controller.go │ ├── hostingdeployment_controller_test.go │ ├── model_reconciler.go │ ├── model_reconciler_test.go │ └── suite_test.go ├── hostingautoscalingpolicy │ ├── hostingautoscalingpolicy_controller.go │ ├── hostingautoscalingpolicy_controller_test.go │ └── suite_test.go ├── hyperparametertuningjob │ ├── README.md │ ├── hpo_trainingjob_spawner.go │ ├── hpo_trainingjob_spawner_test.go │ ├── hyperparametertuningjob_controller.go │ ├── hyperparametertuningjob_controller_test.go │ └── suite_test.go ├── model │ ├── model_controller.go │ ├── model_controller_test.go │ └── suite_test.go ├── processingjob │ ├── processingjob_controller.go │ ├── processingjob_controller_test.go │ └── suite_test.go ├── sdkutil │ ├── README.md │ ├── clientwrapper │ │ ├── autoscaling_client.go │ │ ├── sagemaker_client.go │ │ ├── sagemaker_client_test.go │ │ └── suite_test.go │ ├── spec_comparison.go │ ├── spec_comparison_test.go │ ├── spec_sdk_converters.go │ ├── spec_sdk_converters_test.go │ └── suite_test.go ├── suite_test.go └── trainingjob │ ├── README.md │ ├── suite_test.go │ ├── trainingjob_controller.go │ └── trainingjob_controller_test.go ├── go.mod ├── go.sum ├── hack ├── boilerplate.go.txt └── charts │ ├── batch-transform-jobs │ ├── Chart.yaml │ ├── templates │ │ ├── NOTES.txt │ │ └── batch-transform-job.yaml │ └── values.yaml │ ├── hyperparameter-tuning-jobs │ ├── Chart.yaml │ ├── templates │ │ ├── NOTES.txt │ │ └── hpo-job.yaml │ └── values.yaml │ ├── installer │ └── rolebased │ │ ├── Chart.yaml │ │ ├── README.md │ │ ├── templates │ │ ├── NOTES.txt │ │ ├── _helpers.tpl │ │ ├── crds.yaml │ │ └── operator-installer.yaml │ │ └── values.yaml │ ├── namespaced │ ├── README.md │ ├── crd_chart │ │ ├── Chart.yaml │ │ └── templates │ │ │ └── crds.yaml │ └── operator_chart │ │ ├── Chart.yaml │ │ ├── templates │ │ ├── NOTES.txt │ │ ├── _helpers.tpl │ │ └── operator-installer.yaml │ │ └── values.yaml │ └── training-jobs │ ├── Chart.yaml │ ├── examples │ ├── efs_values.yaml │ ├── fsx_values.yaml │ ├── s3_values.yaml │ └── spot_values.yaml │ ├── templates │ ├── NOTES.txt │ └── job.yaml │ └── values.yaml ├── main.go ├── release ├── .gitkeep └── rolebased │ ├── .gitkeep │ ├── china │ └── installer_china.yaml │ ├── installer.yaml │ └── namespaced │ ├── .gitkeep │ ├── china │ ├── crd.yaml │ └── operator_china.yaml │ ├── crd.yaml │ └── operator.yaml ├── samples ├── bring-your-own-container │ ├── README.md │ └── my-training-job.yaml ├── efs-xgboost-mnist-trainingjob.yaml ├── fsx-xgboost-mnist-trainingjob.yaml ├── hap-custom-metric.yaml ├── hap-predefined-metric.yaml ├── image-classifier-augmented-manifest.yaml ├── kmeans-mnist-processingjob.yaml ├── kmeans_preprocessing.py ├── spot-xgboost-mnist-hpo.yaml ├── spot-xgboost-mnist-trainingjob.yaml ├── xgboost-mnist-batchtransform.yaml ├── xgboost-mnist-custom-endpoint.yaml ├── xgboost-mnist-hostingdeployment.yaml ├── xgboost-mnist-hpo-custom-endpoint.yaml ├── xgboost-mnist-hpo.yaml ├── xgboost-mnist-trainingjob-debugger.yaml ├── xgboost-mnist-trainingjob.yaml └── xgboost-multi-model-hostingdeployment.yaml ├── scripts ├── build-release-tarball-Dockerfile ├── manager-builder-Dockerfile └── upload_xgboost_mnist_dataset │ ├── README.md │ └── upload_xgboost_mnist_dataset ├── smlogs-kubectl-plugin ├── .gitignore ├── Makefile ├── README.md ├── cmd │ └── kubectl-smlogs.go ├── go.mod ├── go.sum └── pkg │ └── cmd │ ├── cloudwatchlogs_client.go │ ├── smlogs.go │ └── smlogs_test.go └── tests ├── build_canary.sh ├── build_integration.sh ├── codebuild ├── common.sh ├── create_tests.sh ├── delete_tests.sh ├── feature_tests.sh ├── generate_iam_role.sh ├── generate_trust_policy.sh ├── inject_tests.sh ├── local-run │ ├── .env.example │ ├── .gitignore │ ├── README.md │ ├── codebuild_build.sh │ ├── local-codebuild-Dockerfile │ └── run_integration_test_against_existing_cluster.sh ├── private-link-test │ ├── generate-deny-egress-yaml │ ├── non-private-link-trainingjob.yaml │ ├── private-link-trainingjob.yaml │ └── run_private_link_integration_test ├── run_all_sample_canary_tests.sh ├── run_all_sample_canary_tests_china.sh ├── run_all_sample_namespace_tests.sh ├── run_all_sample_test.sh ├── run_canarytest.sh ├── run_canarytest_china.sh ├── run_integtest.sh ├── smlogs_tests.sh ├── testfiles │ ├── efs-xgboost-mnist-trainingjob.yaml │ ├── failing-xgboost-mnist-hpo.yaml │ ├── failing-xgboost-mnist-trainingjob.yaml │ ├── fsx-kmeans-mnist-trainingjob.yaml │ ├── hd-autoscaling-retain-varient-properties.yaml │ ├── hd-retain-varient-properties.yaml │ ├── kmeans-mnist-processingjob.yaml │ ├── spot-xgboost-mnist-hpo.yaml │ ├── spot-xgboost-mnist-trainingjob.yaml │ ├── xgboost-hosting-deployment-multi-container.yaml │ ├── xgboost-hosting-deployment-with-name.yaml │ ├── xgboost-hosting-deployment.yaml │ ├── xgboost-hostingautoscaling-custom.yaml │ ├── xgboost-hostingautoscaling.yaml │ ├── xgboost-mnist-batchtransform-china.yaml │ ├── xgboost-mnist-batchtransform.yaml │ ├── xgboost-mnist-custom-endpoint.yaml │ ├── xgboost-mnist-hpo-china.yaml │ ├── xgboost-mnist-hpo-custom-endpoint.yaml │ ├── xgboost-mnist-hpo.yaml │ ├── xgboost-mnist-trainingjob-china.yaml │ ├── xgboost-mnist-trainingjob-debugger.yaml │ ├── xgboost-mnist-trainingjob-namespaced.yaml │ ├── xgboost-mnist-trainingjob.yaml │ ├── xgboost-model-china.yaml │ └── xgboost-model.yaml └── update_tests.sh └── images ├── Dockerfile.canary ├── Dockerfile.canary.china └── Dockerfile.integration /.github/ISSUE_TEMPLATE/bug-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug Report 3 | about: Report a bug encountered while using the operator 4 | labels: bug 5 | 6 | --- 7 | 8 | 12 | 13 | 14 | **What happened**: 15 | 16 | **What you expected to happen**: 17 | 18 | **How to reproduce it (as minimally and precisely as possible)**: 19 | 20 | **Anything else we need to know?**: 21 | 22 | **Environment**: 23 | - Kubernetes version (use `kubectl version`): 24 | - Operator version (controller image tag): 25 | - OS (e.g: `cat /etc/os-release`): 26 | - Kernel (e.g. `uname -a`): 27 | - Installation method: 28 | - Others: 29 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/enhancement.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Enhancement Request 3 | about: Suggest an enhancement to the operator 4 | labels: enhancement 5 | 6 | --- 7 | 8 | **What would you like to be added**: 9 | 10 | **Why is this needed**: 11 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/support.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Support Request 3 | about: Support request or question relating to the operator 4 | labels: question 5 | 6 | --- -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ### What does this PR do / how does this improve the operators? 2 | 3 | ### Which issue(s) does this PR fix? 4 | 5 | Fixes # 6 | 7 | ### Special notes for the reviewer: 8 | 9 | ### Does this PR require changes to documentation? 10 | 11 | ### All Submissions: 12 | 13 | * [ ] Have you followed the guidelines in our Contributing document? 14 | * [ ] Have you written or refactored unit tests to cover the change? 15 | * [ ] Have you ran all unit tests and ensured they are passing? 16 | * [ ] Have you manually tested each feature that is being added/modified? 17 | * [ ] Have you ensured you have not introduced linting errors? 18 | 19 | By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore AWS credential file. 2 | awscreds.env 3 | 4 | # Binaries for programs and plugins 5 | *.exe 6 | *.exe~ 7 | *.dll 8 | *.so 9 | *.dylib 10 | 11 | # Test binary, build with `go test -c` 12 | *.test 13 | 14 | # Output of the go coverage tool, specifically when used with LiteIDE 15 | *.out 16 | 17 | # editor and IDE paraphernalia 18 | .idea 19 | *.swp 20 | *.swo 21 | *~ 22 | 23 | # Build generated files 24 | config/default/manager_image_patch.yaml-e 25 | bin/* 26 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | os: osx 2 | language: minimal 3 | jobs: 4 | include: 5 | - stage: "Test smlogs" 6 | script: 7 | - wget https://amazon-sagemaker-operator-for-k8s-us-east-1.s3.amazonaws.com/kubectl-smlogs-plugin/latest/darwin.amd64.tar.gz 8 | - tar xvzf darwin.amd64.tar.gz 9 | - mkdir -p $HOME/sagemaker-k8s-bin 10 | - cp ./kubectl-smlogs.darwin.amd64/kubectl-smlogs $HOME/sagemaker-k8s-bin/kubectl-smlogs 11 | - PATH=$PATH:$HOME/sagemaker-k8s-bin kubectl-smlogs -h 12 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. -------------------------------------------------------------------------------- /api/v1/batchtransformjob/suite_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1 18 | 19 | import ( 20 | "path/filepath" 21 | "testing" 22 | 23 | . "github.com/onsi/ginkgo" 24 | . "github.com/onsi/gomega" 25 | 26 | commonv1 "github.com/aws/amazon-sagemaker-operator-for-k8s/api/v1/common" 27 | 28 | "k8s.io/client-go/kubernetes/scheme" 29 | "k8s.io/client-go/rest" 30 | "sigs.k8s.io/controller-runtime/pkg/client" 31 | "sigs.k8s.io/controller-runtime/pkg/envtest" 32 | "sigs.k8s.io/controller-runtime/pkg/envtest/printer" 33 | logf "sigs.k8s.io/controller-runtime/pkg/log" 34 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 35 | // +kubebuilder:scaffold:imports 36 | ) 37 | 38 | // These tests use Ginkgo (BDD-style Go testing framework). Refer to 39 | // http://onsi.github.io/ginkgo/ to learn more about Ginkgo. 40 | 41 | var cfg *rest.Config 42 | var k8sClient client.Client 43 | var testEnv *envtest.Environment 44 | 45 | func TestAPIs(t *testing.T) { 46 | RegisterFailHandler(Fail) 47 | 48 | RunSpecsWithDefaultAndCustomReporters(t, 49 | "v1 batchTransformJob Suite", 50 | []Reporter{printer.NewlineReporter{}}) 51 | } 52 | 53 | var _ = BeforeSuite(func(done Done) { 54 | logf.SetLogger(zap.LoggerTo(GinkgoWriter, true)) 55 | 56 | By("bootstrapping test environment") 57 | testEnv = &envtest.Environment{ 58 | CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "config", "crd", "bases")}, 59 | } 60 | 61 | err := commonv1.SchemeBuilder.AddToScheme(scheme.Scheme) 62 | Expect(err).NotTo(HaveOccurred()) 63 | 64 | cfg, err = testEnv.Start() 65 | Expect(err).ToNot(HaveOccurred()) 66 | Expect(cfg).ToNot(BeNil()) 67 | 68 | k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) 69 | Expect(err).ToNot(HaveOccurred()) 70 | Expect(k8sClient).ToNot(BeNil()) 71 | 72 | close(done) 73 | }, 60) 74 | 75 | var _ = AfterSuite(func() { 76 | By("tearing down the test environment") 77 | err := testEnv.Stop() 78 | Expect(err).ToNot(HaveOccurred()) 79 | }) 80 | -------------------------------------------------------------------------------- /api/v1/common/groupversion_info.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1 18 | 19 | import ( 20 | "k8s.io/apimachinery/pkg/runtime/schema" 21 | "sigs.k8s.io/controller-runtime/pkg/scheme" 22 | ) 23 | 24 | var ( 25 | // GroupVersion is group version used to register these objects 26 | GroupVersion = schema.GroupVersion{Group: "sagemaker.aws.amazon.com", Version: "v1"} 27 | 28 | // SchemeBuilder is used to add go types to the GroupVersionKind scheme 29 | SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} 30 | 31 | // AddToScheme adds the types in this group-version to the given scheme. 32 | AddToScheme = SchemeBuilder.AddToScheme 33 | ) 34 | -------------------------------------------------------------------------------- /api/v1/common/manual_deepcopy.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1 18 | 19 | // Go does not allow methods on value types, so 20 | // we have to write deepcopy methods manually when necessary. 21 | func DeepCopyTagSlice(tagsToCopy []Tag) []Tag { 22 | if tagsToCopy == nil { 23 | return nil 24 | } 25 | 26 | copiedTags := []Tag{} 27 | for _, tag := range tagsToCopy { 28 | copiedTag := tag.DeepCopy() 29 | copiedTags = append(copiedTags, *copiedTag) 30 | } 31 | return copiedTags 32 | } 33 | -------------------------------------------------------------------------------- /api/v1/common/suite_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1 18 | 19 | import ( 20 | "testing" 21 | 22 | . "github.com/onsi/ginkgo" 23 | . "github.com/onsi/gomega" 24 | ) 25 | 26 | func TestAPIs(t *testing.T) { 27 | RegisterFailHandler(Fail) 28 | 29 | RunSpecs(t, "v1 common Suite") 30 | } 31 | -------------------------------------------------------------------------------- /api/v1/endpointconfig/suite_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1 18 | 19 | import ( 20 | "path/filepath" 21 | "testing" 22 | 23 | commonv1 "github.com/aws/amazon-sagemaker-operator-for-k8s/api/v1/common" 24 | 25 | . "github.com/onsi/ginkgo" 26 | . "github.com/onsi/gomega" 27 | 28 | "k8s.io/client-go/kubernetes/scheme" 29 | "k8s.io/client-go/rest" 30 | "sigs.k8s.io/controller-runtime/pkg/client" 31 | "sigs.k8s.io/controller-runtime/pkg/envtest" 32 | "sigs.k8s.io/controller-runtime/pkg/envtest/printer" 33 | logf "sigs.k8s.io/controller-runtime/pkg/log" 34 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 35 | ) 36 | 37 | // These tests use Ginkgo (BDD-style Go testing framework). Refer to 38 | // http://onsi.github.io/ginkgo/ to learn more about Ginkgo. 39 | 40 | var cfg *rest.Config 41 | var k8sClient client.Client 42 | var testEnv *envtest.Environment 43 | 44 | func TestAPIs(t *testing.T) { 45 | RegisterFailHandler(Fail) 46 | 47 | RunSpecsWithDefaultAndCustomReporters(t, 48 | "v1 EndpointConfig Suite", 49 | []Reporter{printer.NewlineReporter{}}) 50 | } 51 | 52 | var _ = BeforeSuite(func(done Done) { 53 | logf.SetLogger(zap.LoggerTo(GinkgoWriter, true)) 54 | 55 | By("bootstrapping test environment") 56 | testEnv = &envtest.Environment{ 57 | CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "config", "crd", "bases")}, 58 | } 59 | 60 | err := commonv1.SchemeBuilder.AddToScheme(scheme.Scheme) 61 | Expect(err).NotTo(HaveOccurred()) 62 | 63 | cfg, err = testEnv.Start() 64 | Expect(err).ToNot(HaveOccurred()) 65 | Expect(cfg).ToNot(BeNil()) 66 | 67 | k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) 68 | Expect(err).ToNot(HaveOccurred()) 69 | Expect(k8sClient).ToNot(BeNil()) 70 | 71 | close(done) 72 | }, 60) 73 | 74 | var _ = AfterSuite(func() { 75 | By("tearing down the test environment") 76 | err := testEnv.Stop() 77 | Expect(err).ToNot(HaveOccurred()) 78 | }) 79 | -------------------------------------------------------------------------------- /api/v1/hostingautoscalingpolicy/suite_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1 18 | 19 | import ( 20 | "path/filepath" 21 | "testing" 22 | 23 | commonv1 "github.com/aws/amazon-sagemaker-operator-for-k8s/api/v1/common" 24 | 25 | . "github.com/onsi/ginkgo" 26 | . "github.com/onsi/gomega" 27 | "k8s.io/client-go/kubernetes/scheme" 28 | "k8s.io/client-go/rest" 29 | "sigs.k8s.io/controller-runtime/pkg/client" 30 | "sigs.k8s.io/controller-runtime/pkg/envtest" 31 | "sigs.k8s.io/controller-runtime/pkg/envtest/printer" 32 | logf "sigs.k8s.io/controller-runtime/pkg/log" 33 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 34 | ) 35 | 36 | // These tests use Ginkgo (BDD-style Go testing framework). Refer to 37 | // http://onsi.github.io/ginkgo/ to learn more about Ginkgo. 38 | 39 | var cfg *rest.Config 40 | var k8sClient client.Client 41 | var testEnv *envtest.Environment 42 | 43 | func TestAPIs(t *testing.T) { 44 | RegisterFailHandler(Fail) 45 | 46 | RunSpecsWithDefaultAndCustomReporters(t, 47 | "v1 hostingautoscalingpolicy Suite", 48 | []Reporter{printer.NewlineReporter{}}) 49 | } 50 | 51 | var _ = BeforeSuite(func(done Done) { 52 | logf.SetLogger(zap.LoggerTo(GinkgoWriter, true)) 53 | 54 | By("bootstrapping test environment") 55 | testEnv = &envtest.Environment{ 56 | CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "config", "crd", "bases")}, 57 | } 58 | 59 | err := commonv1.SchemeBuilder.AddToScheme(scheme.Scheme) 60 | Expect(err).NotTo(HaveOccurred()) 61 | 62 | cfg, err = testEnv.Start() 63 | Expect(err).ToNot(HaveOccurred()) 64 | Expect(cfg).ToNot(BeNil()) 65 | 66 | k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) 67 | Expect(err).ToNot(HaveOccurred()) 68 | Expect(k8sClient).ToNot(BeNil()) 69 | 70 | close(done) 71 | }, 60) 72 | 73 | var _ = AfterSuite(func() { 74 | By("tearing down the test environment") 75 | err := testEnv.Stop() 76 | Expect(err).ToNot(HaveOccurred()) 77 | }) 78 | -------------------------------------------------------------------------------- /api/v1/hostingdeployment/suite_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1 18 | 19 | import ( 20 | "path/filepath" 21 | "testing" 22 | 23 | commonv1 "github.com/aws/amazon-sagemaker-operator-for-k8s/api/v1/common" 24 | 25 | . "github.com/onsi/ginkgo" 26 | . "github.com/onsi/gomega" 27 | 28 | "k8s.io/client-go/kubernetes/scheme" 29 | "k8s.io/client-go/rest" 30 | "sigs.k8s.io/controller-runtime/pkg/client" 31 | "sigs.k8s.io/controller-runtime/pkg/envtest" 32 | "sigs.k8s.io/controller-runtime/pkg/envtest/printer" 33 | logf "sigs.k8s.io/controller-runtime/pkg/log" 34 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 35 | ) 36 | 37 | // These tests use Ginkgo (BDD-style Go testing framework). Refer to 38 | // http://onsi.github.io/ginkgo/ to learn more about Ginkgo. 39 | 40 | var cfg *rest.Config 41 | var k8sClient client.Client 42 | var testEnv *envtest.Environment 43 | 44 | func TestAPIs(t *testing.T) { 45 | RegisterFailHandler(Fail) 46 | 47 | RunSpecsWithDefaultAndCustomReporters(t, 48 | "v1 hostingdeployment Suite", 49 | []Reporter{printer.NewlineReporter{}}) 50 | } 51 | 52 | var _ = BeforeSuite(func(done Done) { 53 | logf.SetLogger(zap.LoggerTo(GinkgoWriter, true)) 54 | 55 | By("bootstrapping test environment") 56 | testEnv = &envtest.Environment{ 57 | CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "config", "crd", "bases")}, 58 | } 59 | 60 | err := commonv1.SchemeBuilder.AddToScheme(scheme.Scheme) 61 | Expect(err).NotTo(HaveOccurred()) 62 | 63 | cfg, err = testEnv.Start() 64 | Expect(err).ToNot(HaveOccurred()) 65 | Expect(cfg).ToNot(BeNil()) 66 | 67 | k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) 68 | Expect(err).ToNot(HaveOccurred()) 69 | Expect(k8sClient).ToNot(BeNil()) 70 | 71 | close(done) 72 | }, 60) 73 | 74 | var _ = AfterSuite(func() { 75 | By("tearing down the test environment") 76 | err := testEnv.Stop() 77 | Expect(err).ToNot(HaveOccurred()) 78 | }) 79 | -------------------------------------------------------------------------------- /api/v1/hyperparametertuningjob/suite_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1 18 | 19 | import ( 20 | "path/filepath" 21 | "testing" 22 | 23 | commonv1 "github.com/aws/amazon-sagemaker-operator-for-k8s/api/v1/common" 24 | 25 | . "github.com/onsi/ginkgo" 26 | . "github.com/onsi/gomega" 27 | 28 | "k8s.io/client-go/kubernetes/scheme" 29 | "k8s.io/client-go/rest" 30 | "sigs.k8s.io/controller-runtime/pkg/client" 31 | "sigs.k8s.io/controller-runtime/pkg/envtest" 32 | "sigs.k8s.io/controller-runtime/pkg/envtest/printer" 33 | logf "sigs.k8s.io/controller-runtime/pkg/log" 34 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 35 | ) 36 | 37 | // These tests use Ginkgo (BDD-style Go testing framework). Refer to 38 | // http://onsi.github.io/ginkgo/ to learn more about Ginkgo. 39 | 40 | var cfg *rest.Config 41 | var k8sClient client.Client 42 | var testEnv *envtest.Environment 43 | 44 | func TestAPIs(t *testing.T) { 45 | RegisterFailHandler(Fail) 46 | 47 | RunSpecsWithDefaultAndCustomReporters(t, 48 | "v1 hyperparametertuningjob Suite", 49 | []Reporter{printer.NewlineReporter{}}) 50 | } 51 | 52 | var _ = BeforeSuite(func(done Done) { 53 | logf.SetLogger(zap.LoggerTo(GinkgoWriter, true)) 54 | 55 | By("bootstrapping test environment") 56 | testEnv = &envtest.Environment{ 57 | CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "config", "crd", "bases")}, 58 | } 59 | 60 | err := commonv1.SchemeBuilder.AddToScheme(scheme.Scheme) 61 | Expect(err).NotTo(HaveOccurred()) 62 | 63 | cfg, err = testEnv.Start() 64 | Expect(err).ToNot(HaveOccurred()) 65 | Expect(cfg).ToNot(BeNil()) 66 | 67 | k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) 68 | Expect(err).ToNot(HaveOccurred()) 69 | Expect(k8sClient).ToNot(BeNil()) 70 | 71 | close(done) 72 | }, 60) 73 | 74 | var _ = AfterSuite(func() { 75 | By("tearing down the test environment") 76 | err := testEnv.Stop() 77 | Expect(err).ToNot(HaveOccurred()) 78 | }) 79 | -------------------------------------------------------------------------------- /api/v1/model/model_types_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1 18 | 19 | import ( 20 | . "github.com/onsi/ginkgo" 21 | . "github.com/onsi/gomega" 22 | 23 | . "github.com/aws/amazon-sagemaker-operator-for-k8s/controllers/controllertest" 24 | 25 | "golang.org/x/net/context" 26 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 27 | "k8s.io/apimachinery/pkg/types" 28 | ) 29 | 30 | // These tests are written in BDD-style using Ginkgo framework. Refer to 31 | // http://onsi.github.io/ginkgo to learn more. 32 | 33 | var _ = Describe("Model", func() { 34 | var ( 35 | key types.NamespacedName 36 | created, fetched *Model 37 | ) 38 | 39 | BeforeEach(func() { 40 | // Add any setup steps that needs to be executed before each test 41 | }) 42 | 43 | AfterEach(func() { 44 | // Add any teardown steps that needs to be executed after each test 45 | }) 46 | 47 | // Add Tests for OpenAPI validation (or additonal CRD features) specified in 48 | // your API definition. 49 | // Avoid adding tests for vanilla CRUD operations because they would 50 | // test Kubernetes API server, which isn't the goal here. 51 | Context("Create API", func() { 52 | 53 | It("should create an object successfully", func() { 54 | 55 | key = types.NamespacedName{ 56 | Name: "foo", 57 | Namespace: "default", 58 | } 59 | created = &Model{ 60 | ObjectMeta: metav1.ObjectMeta{ 61 | Name: "foo", 62 | Namespace: "default", 63 | }, 64 | Spec: ModelSpec{ 65 | ExecutionRoleArn: ToStringPtr("abc"), 66 | Region: ToStringPtr("us-east-1"), 67 | }, 68 | } 69 | 70 | By("creating an API obj") 71 | Expect(k8sClient.Create(context.TODO(), created)).To(Succeed()) 72 | 73 | fetched = &Model{} 74 | Expect(k8sClient.Get(context.TODO(), key, fetched)).To(Succeed()) 75 | Expect(fetched).To(Equal(created)) 76 | 77 | By("deleting the created object") 78 | Expect(k8sClient.Delete(context.TODO(), created)).To(Succeed()) 79 | Expect(k8sClient.Get(context.TODO(), key, created)).ToNot(Succeed()) 80 | }) 81 | 82 | }) 83 | 84 | }) 85 | -------------------------------------------------------------------------------- /api/v1/model/suite_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1 18 | 19 | import ( 20 | "path/filepath" 21 | "testing" 22 | 23 | commonv1 "github.com/aws/amazon-sagemaker-operator-for-k8s/api/v1/common" 24 | 25 | . "github.com/onsi/ginkgo" 26 | . "github.com/onsi/gomega" 27 | 28 | "k8s.io/client-go/kubernetes/scheme" 29 | "k8s.io/client-go/rest" 30 | "sigs.k8s.io/controller-runtime/pkg/client" 31 | "sigs.k8s.io/controller-runtime/pkg/envtest" 32 | "sigs.k8s.io/controller-runtime/pkg/envtest/printer" 33 | logf "sigs.k8s.io/controller-runtime/pkg/log" 34 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 35 | ) 36 | 37 | // These tests use Ginkgo (BDD-style Go testing framework). Refer to 38 | // http://onsi.github.io/ginkgo/ to learn more about Ginkgo. 39 | 40 | var cfg *rest.Config 41 | var k8sClient client.Client 42 | var testEnv *envtest.Environment 43 | 44 | func TestAPIs(t *testing.T) { 45 | RegisterFailHandler(Fail) 46 | 47 | RunSpecsWithDefaultAndCustomReporters(t, 48 | "v1 Model Suite", 49 | []Reporter{printer.NewlineReporter{}}) 50 | } 51 | 52 | var _ = BeforeSuite(func(done Done) { 53 | logf.SetLogger(zap.LoggerTo(GinkgoWriter, true)) 54 | 55 | By("bootstrapping test environment") 56 | testEnv = &envtest.Environment{ 57 | CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "config", "crd", "bases")}, 58 | } 59 | 60 | err := commonv1.SchemeBuilder.AddToScheme(scheme.Scheme) 61 | Expect(err).NotTo(HaveOccurred()) 62 | 63 | cfg, err = testEnv.Start() 64 | Expect(err).ToNot(HaveOccurred()) 65 | Expect(cfg).ToNot(BeNil()) 66 | 67 | k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) 68 | Expect(err).ToNot(HaveOccurred()) 69 | Expect(k8sClient).ToNot(BeNil()) 70 | 71 | close(done) 72 | }, 60) 73 | 74 | var _ = AfterSuite(func() { 75 | By("tearing down the test environment") 76 | err := testEnv.Stop() 77 | Expect(err).ToNot(HaveOccurred()) 78 | }) 79 | -------------------------------------------------------------------------------- /api/v1/processingjob/suite_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1 18 | 19 | import ( 20 | "path/filepath" 21 | "testing" 22 | 23 | commonv1 "github.com/aws/amazon-sagemaker-operator-for-k8s/api/v1/common" 24 | 25 | . "github.com/onsi/ginkgo" 26 | . "github.com/onsi/gomega" 27 | 28 | "k8s.io/client-go/kubernetes/scheme" 29 | "k8s.io/client-go/rest" 30 | "sigs.k8s.io/controller-runtime/pkg/client" 31 | "sigs.k8s.io/controller-runtime/pkg/envtest" 32 | "sigs.k8s.io/controller-runtime/pkg/envtest/printer" 33 | logf "sigs.k8s.io/controller-runtime/pkg/log" 34 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 35 | ) 36 | 37 | // These tests use Ginkgo (BDD-style Go testing framework). Refer to 38 | // http://onsi.github.io/ginkgo/ to learn more about Ginkgo. 39 | 40 | var cfg *rest.Config 41 | var k8sClient client.Client 42 | var testEnv *envtest.Environment 43 | 44 | func TestAPIs(t *testing.T) { 45 | RegisterFailHandler(Fail) 46 | 47 | RunSpecsWithDefaultAndCustomReporters(t, 48 | "v1 processingjob Suite", 49 | []Reporter{printer.NewlineReporter{}}) 50 | } 51 | 52 | var _ = BeforeSuite(func(done Done) { 53 | logf.SetLogger(zap.LoggerTo(GinkgoWriter, true)) 54 | 55 | By("bootstrapping test environment") 56 | testEnv = &envtest.Environment{ 57 | CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "config", "crd", "bases")}, 58 | } 59 | 60 | err := commonv1.SchemeBuilder.AddToScheme(scheme.Scheme) 61 | Expect(err).NotTo(HaveOccurred()) 62 | 63 | cfg, err = testEnv.Start() 64 | Expect(err).ToNot(HaveOccurred()) 65 | Expect(cfg).ToNot(BeNil()) 66 | 67 | k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) 68 | Expect(err).ToNot(HaveOccurred()) 69 | Expect(k8sClient).ToNot(BeNil()) 70 | 71 | close(done) 72 | }, 60) 73 | 74 | var _ = AfterSuite(func() { 75 | By("tearing down the test environment") 76 | err := testEnv.Stop() 77 | Expect(err).ToNot(HaveOccurred()) 78 | }) 79 | -------------------------------------------------------------------------------- /api/v1/trainingjob/suite_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1 18 | 19 | import ( 20 | "path/filepath" 21 | "testing" 22 | 23 | commonv1 "github.com/aws/amazon-sagemaker-operator-for-k8s/api/v1/common" 24 | 25 | . "github.com/onsi/ginkgo" 26 | . "github.com/onsi/gomega" 27 | 28 | "k8s.io/client-go/kubernetes/scheme" 29 | "k8s.io/client-go/rest" 30 | "sigs.k8s.io/controller-runtime/pkg/client" 31 | "sigs.k8s.io/controller-runtime/pkg/envtest" 32 | "sigs.k8s.io/controller-runtime/pkg/envtest/printer" 33 | logf "sigs.k8s.io/controller-runtime/pkg/log" 34 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 35 | ) 36 | 37 | // These tests use Ginkgo (BDD-style Go testing framework). Refer to 38 | // http://onsi.github.io/ginkgo/ to learn more about Ginkgo. 39 | 40 | var cfg *rest.Config 41 | var k8sClient client.Client 42 | var testEnv *envtest.Environment 43 | 44 | func TestAPIs(t *testing.T) { 45 | RegisterFailHandler(Fail) 46 | 47 | RunSpecsWithDefaultAndCustomReporters(t, 48 | "v1 trainingjob Suite", 49 | []Reporter{printer.NewlineReporter{}}) 50 | } 51 | 52 | var _ = BeforeSuite(func(done Done) { 53 | logf.SetLogger(zap.LoggerTo(GinkgoWriter, true)) 54 | 55 | By("bootstrapping test environment") 56 | testEnv = &envtest.Environment{ 57 | CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "config", "crd", "bases")}, 58 | } 59 | 60 | err := commonv1.SchemeBuilder.AddToScheme(scheme.Scheme) 61 | Expect(err).NotTo(HaveOccurred()) 62 | 63 | cfg, err = testEnv.Start() 64 | Expect(err).ToNot(HaveOccurred()) 65 | Expect(cfg).ToNot(BeNil()) 66 | 67 | k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) 68 | Expect(err).ToNot(HaveOccurred()) 69 | Expect(k8sClient).ToNot(BeNil()) 70 | 71 | close(done) 72 | }, 60) 73 | 74 | var _ = AfterSuite(func() { 75 | By("tearing down the test environment") 76 | err := testEnv.Stop() 77 | Expect(err).ToNot(HaveOccurred()) 78 | }) 79 | -------------------------------------------------------------------------------- /bin/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/amazon-sagemaker-operator-for-k8s/e131566f83b8d860300c1ccf377b3982b385f177/bin/.gitkeep -------------------------------------------------------------------------------- /codebuild/build_canary.yaml: -------------------------------------------------------------------------------- 1 | version: 0.2 2 | phases: 3 | build: 4 | commands: 5 | # Docker engine takes few seconds to start 6 | - sudo service docker start 7 | # TODO: Make it polling 8 | - sleep 1 9 | # Build the canaries 10 | - (bash codebuild/scripts/build_canaries.sh) -------------------------------------------------------------------------------- /codebuild/build_image.yaml: -------------------------------------------------------------------------------- 1 | version: 0.2 2 | phases: 3 | pre_build: 4 | commands: 5 | - docker login -u $DOCKERHUB_USERNAME -p $DOCKERHUB_PASSWORD 6 | build: 7 | commands: 8 | # Docker engine takes few seconds to start 9 | - sudo service docker start 10 | # TODO: Make it polling 11 | - sleep 1 12 | # Define env variables 13 | - GO111MODULE=on 14 | # Package and ship new commit version 15 | - (bash codebuild/scripts/package_alpha_operators.sh) 16 | 17 | -------------------------------------------------------------------------------- /codebuild/build_integration_container.yaml: -------------------------------------------------------------------------------- 1 | # This CodeBuild project is run using the docker:stable-dind container 2 | # Docker daemon start-up script was taken from the following URL: 3 | # https://docs.aws.amazon.com/codebuild/latest/userguide/sample-docker-custom-image.html 4 | 5 | version: 0.2 6 | phases: 7 | install: 8 | commands: 9 | - nohup /usr/local/bin/dockerd --host=unix:///var/run/docker.sock --host=tcp://127.0.0.1:2375 --storage-driver=overlay2& 10 | - timeout 15 sh -c "until docker info; do echo .; sleep 1; done" 11 | pre_build: 12 | commands: 13 | # Add AWSCLI and bash 14 | - (apk add --update python3 python3-dev py-pip build-base bash && pip install awscli --upgrade) 15 | build: 16 | commands: 17 | # Build new integration test container 18 | - (IMG=$INTEGRATION_CONTAINER_REPOSITORY bash codebuild/scripts/build_deploy_integration_container.sh) -------------------------------------------------------------------------------- /codebuild/deploy_image.yaml: -------------------------------------------------------------------------------- 1 | version: 0.2 2 | phases: 3 | pre_build: 4 | commands: 5 | - docker login -u $DOCKERHUB_USERNAME -p $DOCKERHUB_PASSWORD 6 | build: 7 | commands: 8 | # Docker engine takes few seconds to start 9 | - sudo service docker start 10 | # TODO: Make it polling 11 | - sleep 1 12 | # Define env variables 13 | - GO111MODULE=on 14 | # Package and ship new commit version 15 | - (bash codebuild/scripts/package_prod_operators.sh) -------------------------------------------------------------------------------- /codebuild/integration_test.yaml: -------------------------------------------------------------------------------- 1 | version: 0.2 2 | 3 | phases: 4 | pre_build: 5 | commands: 6 | - docker login -u $DOCKERHUB_USERNAME -p $DOCKERHUB_PASSWORD 7 | build: 8 | commands: 9 | - (bash codebuild/scripts/diff_release_yamls.sh) 10 | - bash tests/codebuild/run_integtest.sh 11 | 12 | -------------------------------------------------------------------------------- /codebuild/release_tag.yaml: -------------------------------------------------------------------------------- 1 | version: 0.2 2 | phases: 3 | build: 4 | commands: 5 | # Docker engine takes few seconds to start 6 | - sudo service docker start 7 | # TODO: Make it polling 8 | - sleep 1 9 | # Package and ship new commit version 10 | - (bash codebuild/scripts/release_tag.sh) -------------------------------------------------------------------------------- /codebuild/run_canary_china.yaml: -------------------------------------------------------------------------------- 1 | version: 0.2 2 | phases: 3 | build: 4 | commands: 5 | # make all shell scripts executable. This is required when running code copyied from S3 6 | - find ./ -type f -name "*.sh" -exec chmod +x {} \; 7 | - cp release/rolebased/china/installer_china.yaml tests/codebuild/ 8 | - cd tests 9 | - aws ecr get-login-password --region $CLUSTER_REGION | docker login --username AWS --password-stdin $ECR_CACHE_URI || true 10 | - docker pull ${ECR_CACHE_URI}:latest || true 11 | - docker build -f images/Dockerfile.canary.china . -t ${ECR_CACHE_URI}:latest --quiet || true 12 | - docker run --name operator-canary-china $(env | cut -f1 -d= | sed 's/^/-e /') --mount type=bind,source="$(pwd)/codebuild/",target="/app/" ${ECR_CACHE_URI}:latest 13 | - docker push ${ECR_CACHE_URI}:latest || true 14 | -------------------------------------------------------------------------------- /codebuild/scripts/build_canaries.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # This function builds, packages and deploys a region-specific operator to an ECR repo and output bucket. 6 | # Parameter: 7 | # $1: The region of the ECR repo. 8 | # $2: The stage in the pipeline for the output account. (prod/beta/dev) 9 | # e.g. build_canary us-east-1 prod 10 | function build_canary() 11 | { 12 | local account_region="$1" 13 | local stage="$2" 14 | 15 | # Match stage to pipeline type 16 | if [ "$stage" != "$PIPELINE_STAGE" ]; then 17 | return 18 | fi 19 | 20 | $(aws ecr get-login --no-include-email --region $CANARY_ECR_REGION --registry-ids $AWS_ACCOUNT_ID) 21 | 22 | # Download the operator for this canary 23 | pushd tests 24 | aws s3 cp s3://$ALPHA_TARBALL_BUCKET/${CODEBUILD_RESOLVED_SOURCE_VERSION}/sagemaker-k8s-operator-${account_region}.tar.gz sagemaker-k8s-operator.tar.gz 25 | 26 | # Build and push the newest canary tests with SHA1 and latest tags 27 | IMAGE=$AWS_ACCOUNT_ID.dkr.ecr.$CANARY_ECR_REGION.amazonaws.com/$CANARY_ECR_REPOSITORY 28 | SHA_REGION_TAG=${CODEBUILD_RESOLVED_SOURCE_VERSION}-${account_region} 29 | REGION_TAG=$account_region 30 | 31 | # Push a canary tagged with the SHA and the region 32 | IMG=$IMAGE:$SHA_REGION_TAG COMMIT_SHA=$CODEBUILD_RESOLVED_SOURCE_VERSION bash build_canary.sh 33 | docker push $IMAGE:$SHA_REGION_TAG 34 | 35 | # Tag it with just the region name for latest 36 | docker tag $IMAGE:$SHA_REGION_TAG $IMAGE:$REGION_TAG 37 | docker push $IMAGE:$REGION_TAG 38 | popd 39 | } 40 | 41 | # Replace JSON single quotes with double quotes for jq to understand 42 | ACCOUNTS_ESCAPED=`echo $ACCOUNTS | sed "s/'/\"/g"` 43 | for row in $(echo ${ACCOUNTS_ESCAPED} | jq -r '.[] | @base64'); do 44 | _jq() { 45 | echo ${row} | base64 --decode | jq -r ${1} 46 | } 47 | 48 | region="$(_jq '.region')" 49 | stage="$(_jq '.stage')" 50 | build_canary "$region" "$stage" 51 | done -------------------------------------------------------------------------------- /codebuild/scripts/build_deploy_integration_container.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script will build the integration test container. This container contains 4 | # all the tools necessary for running the build and test steps for each of the 5 | # CodeBuild projects. The script will also tag the container with the latest 6 | # commit SHA, and with the "latest" tag, then push to an ECR repository. 7 | 8 | set -x 9 | 10 | # Build new integration test container 11 | pushd tests 12 | IMG=$INTEGRATION_CONTAINER_REPOSITORY bash build_integration.sh 13 | popd 14 | 15 | # Log into ECR 16 | $(aws ecr get-login --no-include-email --region $REGION --registry-ids $AWS_ACCOUNT_ID) 17 | 18 | # Tag the container with SHA and latest 19 | docker tag $INTEGRATION_CONTAINER_REPOSITORY $AWS_ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/$INTEGRATION_CONTAINER_REPOSITORY:$CODEBUILD_RESOLVED_SOURCE_VERSION 20 | docker tag $INTEGRATION_CONTAINER_REPOSITORY $AWS_ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/$INTEGRATION_CONTAINER_REPOSITORY:latest 21 | 22 | # Push the newly tagged containers 23 | docker push $AWS_ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/$INTEGRATION_CONTAINER_REPOSITORY:$CODEBUILD_RESOLVED_SOURCE_VERSION 24 | docker push $AWS_ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/$INTEGRATION_CONTAINER_REPOSITORY:latest -------------------------------------------------------------------------------- /codebuild/scripts/deployment_constants.sh: -------------------------------------------------------------------------------- 1 | RELEASE_BUCKET_NAME_FMT="%s-%s" 2 | 3 | RELEASE_BINARY_PREFIX_FMT="s3://%s/kubectl-smlogs-plugin" 4 | ALPHA_BINARY_PREFIX_FMT="s3://%s/%s" 5 | 6 | ALPHA_LINUX_BINARY_PATH_FMT="%s/kubectl-smlogs-plugin.linux.amd64.tar.gz" 7 | ALPHA_DARWIN_BINARY_PATH_FMT="%s/kubectl-smlogs-plugin.darwin.amd64.tar.gz" 8 | 9 | RELEASE_LINUX_BINARY_PATH_FMT="%s/%s/linux.amd64.tar.gz" 10 | RELEASE_DARWIN_BINARY_PATH_FMT="%s/%s/darwin.amd64.tar.gz" 11 | 12 | PUBLIC_CP_ARGS="--acl public-read" -------------------------------------------------------------------------------- /codebuild/scripts/diff_release_yamls.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | BUNDLED_RELEASE_PATH="release/" 4 | TMP_RELEASE_PATH="/tmp/release" 5 | 6 | mkdir -p "${TMP_RELEASE_PATH}" 7 | 8 | # Create installers from CRDs into temporary directory 9 | INSTALLER_PATH="${TMP_RELEASE_PATH}" make create-installers 10 | 11 | # Diff the two folders, excluding hidden files (such as .gitkeep) 12 | diff -r --exclude=".*" "${TMP_RELEASE_PATH}" "${BUNDLED_RELEASE_PATH}" 13 | 14 | if [ $? -ne 0 ]; then 15 | echo "Release files did not match. See diff above for more details" 16 | exit 1 17 | fi -------------------------------------------------------------------------------- /codebuild/scripts/package_alpha_operators.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source codebuild/scripts/package_operators.sh 4 | 5 | set -x 6 | 7 | # Login to alpha ECR 8 | $(aws ecr get-login --no-include-email --region $ALPHA_REPOSITORY_REGION --registry-ids $ALPHA_ACCOUNT_ID) 9 | 10 | # Build the image with a temporary tag 11 | ALPHA_IMAGE=$ALPHA_ACCOUNT_ID.dkr.ecr.$ALPHA_REPOSITORY_REGION.amazonaws.com/$REPOSITORY_NAME 12 | make docker-build docker-push IMG=$ALPHA_IMAGE:$CODEBUILD_RESOLVED_SOURCE_VERSION 13 | 14 | # Ensure the smlogs are built and tarred into the alpha bucket 15 | SHOULD_PUSH_SMLOGS_ALPHA="true" 16 | 17 | # Release the operator into the private alpha repository 18 | # Set as all to ensure it runs through the function 19 | # Add the alpha prefix for integration testing 20 | package_operator "$ALPHA_ACCOUNT_ID" "$ALPHA_REPOSITORY_REGION" "$REPOSITORY_NAME" "all" "-alpha" -------------------------------------------------------------------------------- /codebuild/scripts/package_prod_operators.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source codebuild/scripts/package_operators.sh 4 | 5 | # Replace JSON single quotes with double quotes for jq to understand 6 | ACCOUNTS_ESCAPED=`echo $ACCOUNTS | sed "s/'/\"/g"` 7 | for row in $(echo ${ACCOUNTS_ESCAPED} | jq -r '.[] | @base64'); do 8 | _jq() { 9 | echo ${row} | base64 --decode | jq -r ${1} 10 | } 11 | 12 | repository_account="$(_jq '.repositoryAccount')" 13 | region="$(_jq '.region')" 14 | image_repository="${REPOSITORY_NAME}" 15 | stage="$(_jq '.stage')" 16 | 17 | package_operator "$repository_account" "$region" "$image_repository" "$stage" 18 | done -------------------------------------------------------------------------------- /codebuild/scripts/travis-api: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # one time: travis enable -r aws/amazon-sagemaker-operator-for-k8s 3 | travis login --github-token "$GITHUB_TOKEN" 4 | # to get travis token run `travis token` 5 | body='{ 6 | "request": { 7 | "message": "Override the commit message: this is an api request 3", 8 | "branch":"travis" 9 | }}' 10 | 11 | curl -s -X POST \ 12 | -H "Content-Type: application/json" \ 13 | -H "Accept: application/json" \ 14 | -H "Travis-API-Version: 3" \ 15 | -H "Authorization: token $(travis token)" \ 16 | -d "$body" \ 17 | https://api.travis-ci.org/repo/aws%2Famazon-sagemaker-operator-for-k8s/requests 18 | -------------------------------------------------------------------------------- /codebuild/smlog_test.yaml: -------------------------------------------------------------------------------- 1 | version: 0.2 2 | 3 | phases: 4 | build: 5 | commands: 6 | # travis-api assumes that GITHUB_TOKEN has been defined with appropriate privileged. 7 | - bash codebuild/scripts/travis-api 8 | 9 | -------------------------------------------------------------------------------- /codebuild/unit_test.yaml: -------------------------------------------------------------------------------- 1 | version: 0.2 2 | phases: 3 | build: 4 | commands: 5 | - (make test) 6 | - (cd smlogs-kubectl-plugin && make test) 7 | - (bash codebuild/scripts/diff_release_yamls.sh) -------------------------------------------------------------------------------- /config/base/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | 4 | # This template does not include the packaging of the CRDs. 5 | 6 | # Add other common files from base dir if you need to 7 | 8 | # This is set by `make docker-build` but not when you build helm chart 9 | images: 10 | - name: controller 11 | newName: 957583890962.dkr.ecr.us-east-1.amazonaws.com/amazon-sagemaker-operator-for-k8s 12 | newTag: v1 13 | resources: 14 | - ../manager 15 | patchesStrategicMerge: 16 | - manager_auth_proxy_patch.yaml 17 | -------------------------------------------------------------------------------- /config/base/manager_auth_proxy_patch.yaml: -------------------------------------------------------------------------------- 1 | # This patch inject a sidecar container which is a HTTP proxy for the controller manager, 2 | # it performs RBAC authorization against the Kubernetes API using SubjectAccessReviews. 3 | apiVersion: apps/v1 4 | kind: Deployment 5 | metadata: 6 | name: controller-manager 7 | namespace: system 8 | spec: 9 | template: 10 | spec: 11 | containers: 12 | - name: kube-rbac-proxy 13 | image: gcr.io/kubebuilder/kube-rbac-proxy:v0.4.0 14 | args: 15 | - "--secure-listen-address=0.0.0.0:8443" 16 | - "--upstream=http://127.0.0.1:8080/" 17 | - "--logtostderr=true" 18 | - "--v=10" 19 | ports: 20 | - containerPort: 8443 21 | name: https 22 | -------------------------------------------------------------------------------- /config/base/manager_prometheus_metrics_patch.yaml: -------------------------------------------------------------------------------- 1 | # This patch enables Prometheus scraping for the manager pod. 2 | apiVersion: apps/v1 3 | kind: Deployment 4 | metadata: 5 | name: controller-manager 6 | namespace: system 7 | spec: 8 | template: 9 | metadata: 10 | annotations: 11 | prometheus.io/scrape: 'true' 12 | spec: 13 | containers: 14 | # Expose the prometheus metrics on default port 15 | - name: manager 16 | ports: 17 | - containerPort: 8080 18 | name: metrics 19 | protocol: TCP 20 | -------------------------------------------------------------------------------- /config/base/manager_webhook_patch.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: controller-manager 5 | namespace: system 6 | spec: 7 | template: 8 | spec: 9 | containers: 10 | - name: manager 11 | ports: 12 | - containerPort: 443 13 | name: webhook-server 14 | protocol: TCP 15 | volumeMounts: 16 | - mountPath: /tmp/k8s-webhook-server/serving-certs 17 | name: cert 18 | readOnly: true 19 | volumes: 20 | - name: cert 21 | secret: 22 | defaultMode: 420 23 | secretName: webhook-server-cert 24 | -------------------------------------------------------------------------------- /config/base/webhookcainjection_patch.yaml: -------------------------------------------------------------------------------- 1 | # This patch add annotation to admission webhook config and 2 | # the variables $(NAMESPACE) and $(CERTIFICATENAME) will be substituted by kustomize. 3 | apiVersion: admissionregistration.k8s.io/v1beta1 4 | kind: MutatingWebhookConfiguration 5 | metadata: 6 | name: mutating-webhook-configuration 7 | annotations: 8 | certmanager.k8s.io/inject-ca-from: $(NAMESPACE)/$(CERTIFICATENAME) 9 | --- 10 | apiVersion: admissionregistration.k8s.io/v1beta1 11 | kind: ValidatingWebhookConfiguration 12 | metadata: 13 | name: validating-webhook-configuration 14 | annotations: 15 | certmanager.k8s.io/inject-ca-from: $(NAMESPACE)/$(CERTIFICATENAME) 16 | -------------------------------------------------------------------------------- /config/bindings/clusterspaced/auth_proxy_role.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | name: proxy-role 5 | rules: 6 | - apiGroups: ["authentication.k8s.io"] 7 | resources: 8 | - tokenreviews 9 | verbs: ["create"] 10 | - apiGroups: ["authorization.k8s.io"] 11 | resources: 12 | - subjectaccessreviews 13 | verbs: ["create"] 14 | -------------------------------------------------------------------------------- /config/bindings/clusterspaced/auth_proxy_role_binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: proxy-rolebinding 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: proxy-role 9 | subjects: 10 | - kind: ServiceAccount 11 | name: default 12 | namespace: system 13 | -------------------------------------------------------------------------------- /config/bindings/clusterspaced/auth_proxy_service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | annotations: 5 | prometheus.io/port: "8443" 6 | prometheus.io/scheme: https 7 | prometheus.io/scrape: "true" 8 | labels: 9 | control-plane: controller-manager 10 | name: controller-manager-metrics-service 11 | namespace: system 12 | spec: 13 | ports: 14 | - name: https 15 | port: 8443 16 | targetPort: https 17 | selector: 18 | control-plane: controller-manager 19 | -------------------------------------------------------------------------------- /config/bindings/clusterspaced/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - ../../rbac 3 | - role_binding.yaml 4 | # Comment the following 3 lines if you want to disable 5 | # the auth proxy (https://github.com/brancz/kube-rbac-proxy) 6 | # which protects your /metrics endpoint. 7 | - auth_proxy_service.yaml 8 | - auth_proxy_role.yaml 9 | - auth_proxy_role_binding.yaml 10 | -------------------------------------------------------------------------------- /config/bindings/clusterspaced/role_binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: manager-rolebinding 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: manager-role 9 | subjects: 10 | - kind: ServiceAccount 11 | name: default 12 | namespace: system 13 | -------------------------------------------------------------------------------- /config/bindings/namespaced/auth_proxy_role.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: Role 3 | metadata: 4 | name: proxy-role 5 | rules: 6 | - apiGroups: ["authentication.k8s.io"] 7 | resources: 8 | - tokenreviews 9 | verbs: ["create"] 10 | - apiGroups: ["authorization.k8s.io"] 11 | resources: 12 | - subjectaccessreviews 13 | verbs: ["create"] 14 | -------------------------------------------------------------------------------- /config/bindings/namespaced/auth_proxy_role_binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: RoleBinding 3 | metadata: 4 | name: proxy-rolebinding 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: Role 8 | name: proxy-role 9 | subjects: 10 | - kind: ServiceAccount 11 | name: default 12 | namespace: system 13 | -------------------------------------------------------------------------------- /config/bindings/namespaced/auth_proxy_service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | annotations: 5 | prometheus.io/port: "8443" 6 | prometheus.io/scheme: https 7 | prometheus.io/scrape: "true" 8 | labels: 9 | control-plane: controller-manager 10 | name: controller-manager-metrics-service 11 | namespace: system 12 | spec: 13 | ports: 14 | - name: https 15 | port: 8443 16 | targetPort: https 17 | selector: 18 | control-plane: controller-manager 19 | -------------------------------------------------------------------------------- /config/bindings/namespaced/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | 4 | resources: 5 | - ../../rbac 6 | - role_binding.yaml 7 | # Comment the following 3 lines if you want to disable 8 | # the auth proxy (https://github.com/brancz/kube-rbac-proxy) 9 | # which protects your /metrics endpoint. 10 | - auth_proxy_service.yaml 11 | - auth_proxy_role.yaml 12 | - auth_proxy_role_binding.yaml 13 | 14 | patchesJson6902: 15 | # Modify ClusterRole to Role 16 | - target: 17 | group: rbac.authorization.k8s.io 18 | version: v1 19 | kind: ClusterRole 20 | name: manager-role 21 | patch: |- 22 | - op: replace 23 | path: "/kind" 24 | value: Role -------------------------------------------------------------------------------- /config/bindings/namespaced/role_binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: RoleBinding 3 | metadata: 4 | name: manager-rolebinding 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: Role 8 | name: manager-role 9 | subjects: 10 | - kind: ServiceAccount 11 | name: default 12 | namespace: system 13 | -------------------------------------------------------------------------------- /config/certmanager/certificate.yaml: -------------------------------------------------------------------------------- 1 | # The following manifests contain a self-signed issuer CR and a certificate CR. 2 | # More document can be found at https://docs.cert-manager.io 3 | apiVersion: certmanager.k8s.io/v1alpha1 4 | kind: Issuer 5 | metadata: 6 | name: selfsigned-issuer 7 | namespace: system 8 | spec: 9 | selfSigned: {} 10 | --- 11 | apiVersion: certmanager.k8s.io/v1alpha1 12 | kind: Certificate 13 | metadata: 14 | name: serving-cert # this name should match the one appeared in kustomizeconfig.yaml 15 | namespace: system 16 | spec: 17 | # $(SERVICENAME) and $(NAMESPACE) will be substituted by kustomize 18 | commonName: $(SERVICENAME).$(NAMESPACE).svc 19 | dnsNames: 20 | - $(SERVICENAME).$(NAMESPACE).svc.cluster.local 21 | issuerRef: 22 | kind: Issuer 23 | name: selfsigned-issuer 24 | secretName: webhook-server-cert # this secret will not be prefixed, since it's not managed by kustomize 25 | -------------------------------------------------------------------------------- /config/certmanager/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - certificate.yaml 3 | 4 | # the following config is for teaching kustomize how to do var substitution 5 | vars: 6 | - name: NAMESPACE # namespace of the service and the certificate CR 7 | objref: 8 | kind: Service 9 | version: v1 10 | name: webhook-service 11 | fieldref: 12 | fieldpath: metadata.namespace 13 | - name: CERTIFICATENAME 14 | objref: 15 | kind: Certificate 16 | group: certmanager.k8s.io 17 | version: v1alpha1 18 | name: serving-cert # this name should match the one in certificate.yaml 19 | - name: SERVICENAME 20 | objref: 21 | kind: Service 22 | version: v1 23 | name: webhook-service 24 | 25 | configurations: 26 | - kustomizeconfig.yaml 27 | -------------------------------------------------------------------------------- /config/certmanager/kustomizeconfig.yaml: -------------------------------------------------------------------------------- 1 | # This configuration is for teaching kustomize how to update name ref and var substitution 2 | nameReference: 3 | - kind: Issuer 4 | group: certmanager.k8s.io 5 | fieldSpecs: 6 | - kind: Certificate 7 | group: certmanager.k8s.io 8 | path: spec/issuerRef/name 9 | 10 | varReference: 11 | - kind: Certificate 12 | group: certmanager.k8s.io 13 | path: spec/commonName 14 | - kind: Certificate 15 | group: certmanager.k8s.io 16 | path: spec/dnsNames 17 | -------------------------------------------------------------------------------- /config/crd/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - bases/sagemaker.aws.amazon.com_trainingjobs.yaml 3 | - bases/sagemaker.aws.amazon.com_hyperparametertuningjobs.yaml 4 | - bases/sagemaker.aws.amazon.com_hostingdeployments.yaml 5 | - bases/sagemaker.aws.amazon.com_batchtransformjobs.yaml 6 | - bases/sagemaker.aws.amazon.com_models.yaml 7 | - bases/sagemaker.aws.amazon.com_endpointconfigs.yaml 8 | - bases/sagemaker.aws.amazon.com_hostingautoscalingpolicies.yaml 9 | - bases/sagemaker.aws.amazon.com_processingjobs.yaml 10 | # +kubebuilder:scaffold:crdkustomizeresource 11 | 12 | patchesStrategicMerge: 13 | - patches/role-arn-validation-pattern.yaml 14 | # [WEBHOOK] patches here are for enabling the conversion webhook for each CRD 15 | #- patches/webhook_in_trainingjobs.yaml 16 | #- patches/webhook_in_processingjobs.yaml 17 | # +kubebuilder:scaffold:crdkustomizewebhookpatch 18 | 19 | # [CAINJECTION] patches here are for enabling the CA injection for each CRD 20 | #- patches/cainjection_in_trainingjobs.yaml 21 | #- patches/cainjection_in_processingjobs.yaml 22 | # +kubebuilder:scaffold:crdkustomizecainjectionpatch 23 | 24 | # the following config is for teaching kustomize how to do kustomization for CRDs. 25 | configurations: 26 | - kustomizeconfig.yaml 27 | -------------------------------------------------------------------------------- /config/crd/kustomizeconfig.yaml: -------------------------------------------------------------------------------- 1 | # This file is for teaching kustomize how to substitute name and namespace reference in CRD 2 | nameReference: 3 | - kind: Service 4 | version: v1 5 | fieldSpecs: 6 | - kind: CustomResourceDefinition 7 | group: apiextensions.k8s.io 8 | path: spec/conversion/webhookClientConfig/service/name 9 | 10 | namespace: 11 | - kind: CustomResourceDefinition 12 | group: apiextensions.k8s.io 13 | path: spec/conversion/webhookClientConfig/service/namespace 14 | create: false 15 | 16 | varReference: 17 | - path: metadata/annotations 18 | -------------------------------------------------------------------------------- /config/crd/patches/cainjection_in_processingjobs.yaml: -------------------------------------------------------------------------------- 1 | # The following patch adds a directive for certmanager to inject CA into the CRD 2 | # CRD conversion requires k8s 1.13 or later. 3 | apiVersion: apiextensions.k8s.io/v1beta1 4 | kind: CustomResourceDefinition 5 | metadata: 6 | annotations: 7 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) 8 | name: processingjobs.sagemaker.aws.amazon.com 9 | -------------------------------------------------------------------------------- /config/crd/patches/cainjection_in_trainingjobs.yaml: -------------------------------------------------------------------------------- 1 | # The following patch adds a directive for certmanager to inject CA into the CRD 2 | # CRD conversion requires k8s 1.13 or later. 3 | apiVersion: apiextensions.k8s.io/v1beta1 4 | kind: CustomResourceDefinition 5 | metadata: 6 | annotations: 7 | certmanager.k8s.io/inject-ca-from: $(NAMESPACE)/$(CERTIFICATENAME) 8 | name: trainingjobs.sagemaker.aws.amazon.com 9 | -------------------------------------------------------------------------------- /config/crd/patches/role-arn-validation-pattern.yaml: -------------------------------------------------------------------------------- 1 | # This patch is required for roleArn. The pattern validation cannot be applied via Kubebuilder marker 2 | # because of a bug in controller tools: https://github.com/kubernetes-sigs/controller-tools/issues/272 3 | apiVersion: apiextensions.k8s.io/v1beta1 4 | kind: CustomResourceDefinition 5 | metadata: 6 | name: trainingjobs.sagemaker.aws.amazon.com 7 | spec: 8 | validation: 9 | openAPIV3Schema: 10 | properties: 11 | spec: 12 | properties: 13 | roleArn: 14 | pattern: '^arn:aws[a-z\-]*:iam::\d{12}:role/?[a-zA-Z_0-9+=,.@\-_/]+$' 15 | -------------------------------------------------------------------------------- /config/crd/patches/webhook_in_processingjobs.yaml: -------------------------------------------------------------------------------- 1 | # The following patch enables conversion webhook for CRD 2 | # CRD conversion requires k8s 1.13 or later. 3 | apiVersion: apiextensions.k8s.io/v1beta1 4 | kind: CustomResourceDefinition 5 | metadata: 6 | name: processingjobs.sagemaker.aws.amazon.com 7 | spec: 8 | conversion: 9 | strategy: Webhook 10 | webhookClientConfig: 11 | # this is "\n" used as a placeholder, otherwise it will be rejected by the apiserver for being blank, 12 | # but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager) 13 | caBundle: Cg== 14 | service: 15 | namespace: system 16 | name: webhook-service 17 | path: /convert 18 | -------------------------------------------------------------------------------- /config/crd/patches/webhook_in_trainingjobs.yaml: -------------------------------------------------------------------------------- 1 | # The following patch enables conversion webhook for CRD 2 | # CRD conversion requires k8s 1.13 or later. 3 | apiVersion: apiextensions.k8s.io/v1beta1 4 | kind: CustomResourceDefinition 5 | metadata: 6 | name: trainingjobs.sagemaker.aws.amazon.com 7 | spec: 8 | conversion: 9 | strategy: Webhook 10 | webhookClientConfig: 11 | # this is "\n" used as a placeholder, otherwise it will be rejected by the apiserver for being blank, 12 | # but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager) 13 | caBundle: Cg== 14 | service: 15 | namespace: system 16 | name: webhook-service 17 | path: /convert 18 | -------------------------------------------------------------------------------- /config/default/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | 4 | # Adds namespace to all resources. 5 | namespace: sagemaker-k8s-operator-system 6 | namePrefix: sagemaker-k8s-operator- 7 | 8 | resources: 9 | - ../crd 10 | - ../base 11 | - ../bindings/clusterspaced 12 | -------------------------------------------------------------------------------- /config/installers/rolebasedcreds/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | 4 | # Adds namespace to all resources. 5 | namespace: sagemaker-k8s-operator-system 6 | namePrefix: sagemaker-k8s-operator- 7 | 8 | bases: 9 | - ../../crd 10 | - ../../base 11 | - namespaced/resources 12 | 13 | resources: 14 | - ../../bindings/clusterspaced 15 | 16 | patchesStrategicMerge: 17 | - namespaced/resources/manager_add_service_account_patch.yaml -------------------------------------------------------------------------------- /config/installers/rolebasedcreds/namespaced/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | 4 | # Adds namespace to all resources. 5 | namespace: PLACEHOLDER-NAMESPACE 6 | namePrefix: sagemaker-k8s-operator- 7 | 8 | bases: 9 | - ../../../base 10 | - resources 11 | 12 | resources: 13 | - ../../../bindings/namespaced 14 | 15 | patchesStrategicMerge: 16 | - resources/manager_add_service_account_patch.yaml 17 | 18 | patchesJson6902: 19 | # Rename created namespace 20 | - target: 21 | version: v1 22 | kind: Namespace 23 | name: sagemaker-k8s-operator-system 24 | patch: |- 25 | - op: replace 26 | path: "/metadata/name" 27 | value: PLACEHOLDER-NAMESPACE 28 | # Patch broken ServiceAccount names/namespaces 29 | # Because we are messing with namespaces so much, especially using patching, 30 | # it's breaking all the built in dynamic references that Kustomize provides 31 | # which would ensure they are all in the correct namespace. 32 | # See issue https://github.com/kubernetes-sigs/kustomize/issues/1377 33 | - target: 34 | group: rbac.authorization.k8s.io 35 | version: v1 36 | kind: RoleBinding 37 | name: manager-rolebinding 38 | patch: |- 39 | - op: replace 40 | path: "/subjects/0/name" 41 | value: sagemaker-k8s-operator-default 42 | - target: 43 | group: rbac.authorization.k8s.io 44 | version: v1 45 | kind: RoleBinding 46 | name: proxy-rolebinding 47 | patch: |- 48 | - op: replace 49 | path: "/subjects/0/name" 50 | value: sagemaker-k8s-operator-default 51 | - target: 52 | group: rbac.authorization.k8s.io 53 | version: v1 54 | kind: RoleBinding 55 | name: leader-election-rolebinding 56 | patch: |- 57 | - op: replace 58 | path: "/subjects/0/name" 59 | value: sagemaker-k8s-operator-default 60 | # Hacky solution so that its possible to add --namespace for both China and US region with this one file 61 | # change the position of pods so that manager pod is first pod in both US and China 62 | - target: 63 | group: apps 64 | version: v1 65 | kind: Deployment 66 | namespace: system 67 | name: controller-manager 68 | patch: |- 69 | - op: move 70 | from: "/spec/template/spec/containers/0" 71 | path: "/spec/template/spec/containers/-" 72 | # Add --namespace option to the manager pod 73 | - target: 74 | group: apps 75 | version: v1 76 | kind: Deployment 77 | namespace: system 78 | name: controller-manager 79 | patch: |- 80 | - op: add 81 | path: "/spec/template/spec/containers/0/args/-" 82 | value: --namespace=PLACEHOLDER-NAMESPACE -------------------------------------------------------------------------------- /config/installers/rolebasedcreds/namespaced/resources/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - serviceaccount.yaml -------------------------------------------------------------------------------- /config/installers/rolebasedcreds/namespaced/resources/manager_add_service_account_patch.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: controller-manager 5 | namespace: system 6 | spec: 7 | template: 8 | spec: 9 | serviceAccountName: sagemaker-k8s-operator-default 10 | -------------------------------------------------------------------------------- /config/installers/rolebasedcreds/namespaced/resources/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: default 5 | namespace: sagemaker-k8s-operator-system 6 | # Until we find how to add this using kustomize 7 | annotations: 8 | eks.amazonaws.com/role-arn: arn:aws:iam::123456789012:role/DELETE_ME 9 | -------------------------------------------------------------------------------- /config/manager/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - manager.yaml 3 | -------------------------------------------------------------------------------- /config/manager/manager.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | labels: 5 | control-plane: controller-manager 6 | name: system 7 | --- 8 | apiVersion: apps/v1 9 | kind: Deployment 10 | metadata: 11 | name: controller-manager 12 | namespace: system 13 | labels: 14 | control-plane: controller-manager 15 | spec: 16 | selector: 17 | matchLabels: 18 | control-plane: controller-manager 19 | replicas: 1 20 | template: 21 | metadata: 22 | labels: 23 | control-plane: controller-manager 24 | spec: 25 | containers: 26 | - command: 27 | - /manager 28 | args: 29 | - --metrics-addr=127.0.0.1:8080 30 | image: controller:latest 31 | imagePullPolicy: Always 32 | name: manager 33 | env: 34 | - name: AWS_DEFAULT_SAGEMAKER_ENDPOINT 35 | value: "" 36 | resources: 37 | limits: 38 | cpu: 100m 39 | memory: 30Mi 40 | requests: 41 | cpu: 100m 42 | memory: 20Mi 43 | terminationGracePeriodSeconds: 10 44 | -------------------------------------------------------------------------------- /config/rbac/auth_proxy_role_binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: proxy-rolebinding 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: proxy-role 9 | subjects: 10 | - kind: ServiceAccount 11 | name: default 12 | namespace: system 13 | -------------------------------------------------------------------------------- /config/rbac/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - role.yaml 3 | - leader_election_role.yaml 4 | - leader_election_role_binding.yaml -------------------------------------------------------------------------------- /config/rbac/leader_election_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions to do leader election. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: Role 4 | metadata: 5 | name: leader-election-role 6 | rules: 7 | - apiGroups: 8 | - "" 9 | resources: 10 | - configmaps 11 | verbs: 12 | - get 13 | - list 14 | - watch 15 | - create 16 | - update 17 | - patch 18 | - delete 19 | - apiGroups: 20 | - "" 21 | resources: 22 | - configmaps/status 23 | verbs: 24 | - get 25 | - update 26 | - patch 27 | -------------------------------------------------------------------------------- /config/rbac/leader_election_role_binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: RoleBinding 3 | metadata: 4 | name: leader-election-rolebinding 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: Role 8 | name: leader-election-role 9 | subjects: 10 | - kind: ServiceAccount 11 | name: default 12 | namespace: system 13 | -------------------------------------------------------------------------------- /config/webhook/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - manifests.yaml 3 | - service.yaml 4 | 5 | configurations: 6 | - kustomizeconfig.yaml 7 | -------------------------------------------------------------------------------- /config/webhook/kustomizeconfig.yaml: -------------------------------------------------------------------------------- 1 | # the following config is for teaching kustomize where to look at when substituting vars. 2 | # It requires kustomize v2.1.0 or newer to work properly. 3 | nameReference: 4 | - kind: Service 5 | version: v1 6 | fieldSpecs: 7 | - kind: MutatingWebhookConfiguration 8 | group: admissionregistration.k8s.io 9 | path: webhooks/clientConfig/service/name 10 | - kind: ValidatingWebhookConfiguration 11 | group: admissionregistration.k8s.io 12 | path: webhooks/clientConfig/service/name 13 | 14 | namespace: 15 | - kind: MutatingWebhookConfiguration 16 | group: admissionregistration.k8s.io 17 | path: webhooks/clientConfig/service/namespace 18 | create: true 19 | - kind: ValidatingWebhookConfiguration 20 | group: admissionregistration.k8s.io 21 | path: webhooks/clientConfig/service/namespace 22 | create: true 23 | 24 | varReference: 25 | - path: metadata/annotations 26 | -------------------------------------------------------------------------------- /config/webhook/manifests.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/amazon-sagemaker-operator-for-k8s/e131566f83b8d860300c1ccf377b3982b385f177/config/webhook/manifests.yaml -------------------------------------------------------------------------------- /config/webhook/service.yaml: -------------------------------------------------------------------------------- 1 | 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | name: webhook-service 6 | namespace: system 7 | spec: 8 | ports: 9 | - port: 443 10 | targetPort: 443 11 | selector: 12 | control-plane: controller-manager 13 | -------------------------------------------------------------------------------- /controllers/README.md: -------------------------------------------------------------------------------- 1 | # Controllers Package 2 | This package contains common controller utilities and packages for each controller. 3 | -------------------------------------------------------------------------------- /controllers/batchtransformjob/READEME.md: -------------------------------------------------------------------------------- 1 | # Package batchtransformjob 2 | This package contains the batch transform job controller, its tests, and helper types and functions. 3 | -------------------------------------------------------------------------------- /controllers/batchtransformjob/suite_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package batchtransformjob 18 | 19 | import ( 20 | "path/filepath" 21 | "testing" 22 | 23 | . "github.com/onsi/ginkgo" 24 | . "github.com/onsi/gomega" 25 | 26 | commonv1 "github.com/aws/amazon-sagemaker-operator-for-k8s/api/v1/common" 27 | 28 | "k8s.io/client-go/kubernetes/scheme" 29 | "k8s.io/client-go/rest" 30 | "sigs.k8s.io/controller-runtime/pkg/client" 31 | "sigs.k8s.io/controller-runtime/pkg/envtest" 32 | "sigs.k8s.io/controller-runtime/pkg/envtest/printer" 33 | logf "sigs.k8s.io/controller-runtime/pkg/log" 34 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 35 | // +kubebuilder:scaffold:imports 36 | ) 37 | 38 | // These tests use Ginkgo (BDD-style Go testing framework). Refer to 39 | // http://onsi.github.io/ginkgo/ to learn more about Ginkgo. 40 | 41 | var cfg *rest.Config 42 | var k8sClient client.Client 43 | var testEnv *envtest.Environment 44 | 45 | func TestAPIs(t *testing.T) { 46 | RegisterFailHandler(Fail) 47 | 48 | RunSpecsWithDefaultAndCustomReporters(t, 49 | "Controller Suite", 50 | []Reporter{printer.NewlineReporter{}}) 51 | } 52 | 53 | var _ = BeforeSuite(func(done Done) { 54 | logf.SetLogger(zap.LoggerTo(GinkgoWriter, true)) 55 | 56 | By("bootstrapping test environment") 57 | testEnv = &envtest.Environment{ 58 | CRDDirectoryPaths: []string{filepath.Join("..", "..", "config", "crd", "bases")}, 59 | } 60 | 61 | var err error 62 | cfg, err = testEnv.Start() 63 | Expect(err).ToNot(HaveOccurred()) 64 | Expect(cfg).ToNot(BeNil()) 65 | 66 | err = commonv1.AddToScheme(scheme.Scheme) 67 | Expect(err).NotTo(HaveOccurred()) 68 | 69 | // +kubebuilder:scaffold:scheme 70 | 71 | k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) 72 | Expect(err).ToNot(HaveOccurred()) 73 | Expect(k8sClient).ToNot(BeNil()) 74 | 75 | close(done) 76 | }, 60) 77 | 78 | var _ = AfterSuite(func() { 79 | By("tearing down the test environment") 80 | err := testEnv.Stop() 81 | Expect(err).ToNot(HaveOccurred()) 82 | }) 83 | -------------------------------------------------------------------------------- /controllers/controllertest/README.md: -------------------------------------------------------------------------------- 1 | # Package controllertest 2 | This package contains test utilities for controllers, including mocks and common helper functions. 3 | -------------------------------------------------------------------------------- /controllers/controllertest/reconcile_responses.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package controllertest 18 | 19 | import ( 20 | "time" 21 | 22 | . "github.com/onsi/gomega" 23 | 24 | ctrl "sigs.k8s.io/controller-runtime" 25 | // +kubebuilder:scaffold:imports 26 | ) 27 | 28 | // Expect the controller return value to be RequeueAfterInterval, with the poll duration specified. 29 | func ExpectRequeueAfterInterval(result ctrl.Result, err error, pollDuration string) { 30 | Expect(err).ToNot(HaveOccurred()) 31 | Expect(result.Requeue).To(Equal(false)) 32 | Expect(result.RequeueAfter).To(Equal(ParseDurationOrFail(pollDuration))) 33 | } 34 | 35 | // Expect the controller return value to be RequeueImmediately. 36 | func ExpectRequeueImmediately(result ctrl.Result, err error) { 37 | Expect(err).ToNot(HaveOccurred()) 38 | Expect(result.Requeue).To(Equal(true)) 39 | Expect(result.RequeueAfter).To(Equal(time.Duration(0))) 40 | } 41 | 42 | // Expect the controller return value to be NoRequeue 43 | func ExpectNoRequeue(result ctrl.Result, err error) { 44 | Expect(err).ToNot(HaveOccurred()) 45 | Expect(result.Requeue).To(Equal(false)) 46 | Expect(result.RequeueAfter).To(Equal(time.Duration(0))) 47 | } 48 | -------------------------------------------------------------------------------- /controllers/controllertest/util.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package controllertest 18 | 19 | import ( 20 | "context" 21 | 22 | . "github.com/onsi/ginkgo" 23 | v1 "k8s.io/api/core/v1" 24 | apierrs "k8s.io/apimachinery/pkg/api/errors" 25 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 26 | "sigs.k8s.io/controller-runtime/pkg/client" 27 | 28 | "fmt" 29 | "time" 30 | ) 31 | 32 | func ToStringPtr(str string) *string { 33 | return &str 34 | } 35 | 36 | func ToInt64Ptr(i int64) *int64 { 37 | return &i 38 | } 39 | 40 | func ToIntPtr(i int) *int { 41 | return &i 42 | } 43 | 44 | func ToFloat64Ptr(f float64) *float64 { 45 | return &f 46 | } 47 | 48 | func ToBoolPtr(b bool) *bool { 49 | return &b 50 | } 51 | 52 | func ParseDurationOrFail(str string) time.Duration { 53 | dur, err := time.ParseDuration(str) 54 | 55 | if err != nil { 56 | Fail(fmt.Sprintf("Invalid duration string: '%s'", str)) 57 | return time.Duration(0) 58 | } 59 | 60 | return dur 61 | } 62 | 63 | func CreateMockNamespace(ctx context.Context, k8sClient client.Client, k8sNamespace string) error { 64 | err := k8sClient.Create(ctx, &v1.Namespace{ 65 | ObjectMeta: metav1.ObjectMeta{ 66 | Name: k8sNamespace, 67 | }, 68 | }) 69 | 70 | if err != nil && !apierrs.IsAlreadyExists(err) { 71 | return err 72 | } 73 | return nil 74 | } 75 | -------------------------------------------------------------------------------- /controllers/endpointconfig/suite_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package endpointconfig 18 | 19 | import ( 20 | "path/filepath" 21 | "testing" 22 | 23 | . "github.com/onsi/ginkgo" 24 | . "github.com/onsi/gomega" 25 | 26 | commonv1 "github.com/aws/amazon-sagemaker-operator-for-k8s/api/v1/common" 27 | 28 | "k8s.io/client-go/kubernetes/scheme" 29 | "k8s.io/client-go/rest" 30 | "sigs.k8s.io/controller-runtime/pkg/client" 31 | "sigs.k8s.io/controller-runtime/pkg/envtest" 32 | "sigs.k8s.io/controller-runtime/pkg/envtest/printer" 33 | logf "sigs.k8s.io/controller-runtime/pkg/log" 34 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 35 | // +kubebuilder:scaffold:imports 36 | ) 37 | 38 | // These tests use Ginkgo (BDD-style Go testing framework). Refer to 39 | // http://onsi.github.io/ginkgo/ to learn more about Ginkgo. 40 | 41 | var cfg *rest.Config 42 | var k8sClient client.Client 43 | var testEnv *envtest.Environment 44 | 45 | func TestAPIs(t *testing.T) { 46 | RegisterFailHandler(Fail) 47 | 48 | RunSpecsWithDefaultAndCustomReporters(t, 49 | "EndpointConfig Controller Suite", 50 | []Reporter{printer.NewlineReporter{}}) 51 | } 52 | 53 | var _ = BeforeSuite(func(done Done) { 54 | logf.SetLogger(zap.LoggerTo(GinkgoWriter, true)) 55 | 56 | By("bootstrapping test environment") 57 | testEnv = &envtest.Environment{ 58 | CRDDirectoryPaths: []string{filepath.Join("..", "..", "config", "crd", "bases")}, 59 | } 60 | 61 | var err error 62 | cfg, err = testEnv.Start() 63 | Expect(err).ToNot(HaveOccurred()) 64 | Expect(cfg).ToNot(BeNil()) 65 | 66 | err = commonv1.AddToScheme(scheme.Scheme) 67 | Expect(err).NotTo(HaveOccurred()) 68 | 69 | // +kubebuilder:scaffold:scheme 70 | 71 | k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) 72 | Expect(err).ToNot(HaveOccurred()) 73 | Expect(k8sClient).ToNot(BeNil()) 74 | 75 | close(done) 76 | }, 60) 77 | 78 | var _ = AfterSuite(func() { 79 | By("tearing down the test environment") 80 | err := testEnv.Stop() 81 | Expect(err).ToNot(HaveOccurred()) 82 | }) 83 | -------------------------------------------------------------------------------- /controllers/hosting/suite_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package hosting 18 | 19 | import ( 20 | "path/filepath" 21 | "testing" 22 | 23 | . "github.com/onsi/ginkgo" 24 | . "github.com/onsi/gomega" 25 | 26 | commonv1 "github.com/aws/amazon-sagemaker-operator-for-k8s/api/v1/common" 27 | 28 | "k8s.io/client-go/kubernetes/scheme" 29 | "k8s.io/client-go/rest" 30 | "sigs.k8s.io/controller-runtime/pkg/client" 31 | "sigs.k8s.io/controller-runtime/pkg/envtest" 32 | "sigs.k8s.io/controller-runtime/pkg/envtest/printer" 33 | logf "sigs.k8s.io/controller-runtime/pkg/log" 34 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 35 | // +kubebuilder:scaffold:imports 36 | ) 37 | 38 | // These tests use Ginkgo (BDD-style Go testing framework). Refer to 39 | // http://onsi.github.io/ginkgo/ to learn more about Ginkgo. 40 | 41 | var cfg *rest.Config 42 | var k8sClient client.Client 43 | var testEnv *envtest.Environment 44 | 45 | func TestAPIs(t *testing.T) { 46 | RegisterFailHandler(Fail) 47 | 48 | RunSpecsWithDefaultAndCustomReporters(t, 49 | "Controller Suite", 50 | []Reporter{printer.NewlineReporter{}}) 51 | } 52 | 53 | var _ = BeforeSuite(func(done Done) { 54 | logf.SetLogger(zap.LoggerTo(GinkgoWriter, true)) 55 | 56 | By("bootstrapping test environment") 57 | testEnv = &envtest.Environment{ 58 | CRDDirectoryPaths: []string{filepath.Join("..", "..", "config", "crd", "bases")}, 59 | } 60 | 61 | var err error 62 | cfg, err = testEnv.Start() 63 | Expect(err).ToNot(HaveOccurred()) 64 | Expect(cfg).ToNot(BeNil()) 65 | 66 | err = commonv1.AddToScheme(scheme.Scheme) 67 | Expect(err).NotTo(HaveOccurred()) 68 | 69 | // +kubebuilder:scaffold:scheme 70 | 71 | k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) 72 | Expect(err).ToNot(HaveOccurred()) 73 | Expect(k8sClient).ToNot(BeNil()) 74 | 75 | close(done) 76 | }, 60) 77 | 78 | var _ = AfterSuite(func() { 79 | By("tearing down the test environment") 80 | err := testEnv.Stop() 81 | Expect(err).ToNot(HaveOccurred()) 82 | }) 83 | -------------------------------------------------------------------------------- /controllers/hostingautoscalingpolicy/suite_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package hostingautoscalingpolicy 18 | 19 | import ( 20 | "path/filepath" 21 | "testing" 22 | 23 | . "github.com/onsi/ginkgo" 24 | . "github.com/onsi/gomega" 25 | 26 | commonv1 "github.com/aws/amazon-sagemaker-operator-for-k8s/api/v1/common" 27 | "k8s.io/client-go/kubernetes/scheme" 28 | "k8s.io/client-go/rest" 29 | "sigs.k8s.io/controller-runtime/pkg/client" 30 | "sigs.k8s.io/controller-runtime/pkg/envtest" 31 | "sigs.k8s.io/controller-runtime/pkg/envtest/printer" 32 | logf "sigs.k8s.io/controller-runtime/pkg/log" 33 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 34 | // +kubebuilder:scaffold:imports 35 | ) 36 | 37 | // These tests use Ginkgo (BDD-style Go testing framework). Refer to 38 | // http://onsi.github.io/ginkgo/ to learn more about Ginkgo. 39 | 40 | var cfg *rest.Config 41 | var k8sClient client.Client 42 | var testEnv *envtest.Environment 43 | 44 | func TestAPIs(t *testing.T) { 45 | RegisterFailHandler(Fail) 46 | 47 | RunSpecsWithDefaultAndCustomReporters(t, 48 | "HostingAutoscalingPolicy Controller Suite", 49 | []Reporter{printer.NewlineReporter{}}) 50 | } 51 | 52 | var _ = BeforeSuite(func(done Done) { 53 | logf.SetLogger(zap.LoggerTo(GinkgoWriter, true)) 54 | 55 | By("bootstrapping test environment") 56 | testEnv = &envtest.Environment{ 57 | CRDDirectoryPaths: []string{filepath.Join("..", "..", "config", "crd", "bases")}, 58 | } 59 | 60 | var err error 61 | cfg, err = testEnv.Start() 62 | Expect(err).ToNot(HaveOccurred()) 63 | Expect(cfg).ToNot(BeNil()) 64 | 65 | err = commonv1.AddToScheme(scheme.Scheme) 66 | Expect(err).NotTo(HaveOccurred()) 67 | 68 | // +kubebuilder:scaffold:scheme 69 | 70 | k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) 71 | Expect(err).ToNot(HaveOccurred()) 72 | Expect(k8sClient).ToNot(BeNil()) 73 | 74 | close(done) 75 | }, 60) 76 | 77 | var _ = AfterSuite(func() { 78 | By("tearing down the test environment") 79 | err := testEnv.Stop() 80 | Expect(err).ToNot(HaveOccurred()) 81 | }) 82 | -------------------------------------------------------------------------------- /controllers/hyperparametertuningjob/README.md: -------------------------------------------------------------------------------- 1 | # Package hyperparametertuningjob 2 | This package contains the hyperparameter tuning job controller, its tests, and helper types and functions. 3 | -------------------------------------------------------------------------------- /controllers/hyperparametertuningjob/suite_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package hyperparametertuningjob 18 | 19 | import ( 20 | "path/filepath" 21 | "testing" 22 | 23 | . "github.com/onsi/ginkgo" 24 | . "github.com/onsi/gomega" 25 | 26 | commonv1 "github.com/aws/amazon-sagemaker-operator-for-k8s/api/v1/common" 27 | 28 | "k8s.io/client-go/kubernetes/scheme" 29 | "k8s.io/client-go/rest" 30 | "sigs.k8s.io/controller-runtime/pkg/client" 31 | "sigs.k8s.io/controller-runtime/pkg/envtest" 32 | "sigs.k8s.io/controller-runtime/pkg/envtest/printer" 33 | logf "sigs.k8s.io/controller-runtime/pkg/log" 34 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 35 | // +kubebuilder:scaffold:imports 36 | ) 37 | 38 | // To run tests envtest requires that some Kubernetes files exist on the host machine. 39 | // See https://github.com/kubernetes-sigs/controller-runtime/blob/0d3fbbc45eddb4b47e33924bffbc5df1b66901ca/pkg/envtest/server.go#L46 . 40 | 41 | // These tests use Ginkgo (BDD-style Go testing framework). Refer to 42 | // http://onsi.github.io/ginkgo/ to learn more about Ginkgo. 43 | 44 | var cfg *rest.Config 45 | var k8sClient client.Client 46 | var testEnv *envtest.Environment 47 | 48 | func TestAPIs(t *testing.T) { 49 | RegisterFailHandler(Fail) 50 | 51 | RunSpecsWithDefaultAndCustomReporters(t, 52 | "HyperparameterTuningJob Controller", 53 | []Reporter{printer.NewlineReporter{}}) 54 | } 55 | 56 | var _ = BeforeSuite(func(done Done) { 57 | logf.SetLogger(zap.LoggerTo(GinkgoWriter, true)) 58 | 59 | By("bootstrapping test environment") 60 | testEnv = &envtest.Environment{ 61 | CRDDirectoryPaths: []string{filepath.Join("..", "..", "config", "crd", "bases")}, 62 | } 63 | 64 | cfg, err := testEnv.Start() 65 | Expect(err).ToNot(HaveOccurred()) 66 | Expect(cfg).ToNot(BeNil()) 67 | 68 | err = commonv1.AddToScheme(scheme.Scheme) 69 | Expect(err).NotTo(HaveOccurred()) 70 | 71 | // +kubebuilder:scaffold:scheme 72 | 73 | k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) 74 | Expect(err).ToNot(HaveOccurred()) 75 | Expect(k8sClient).ToNot(BeNil()) 76 | 77 | close(done) 78 | }, 60) 79 | 80 | var _ = AfterSuite(func() { 81 | By("tearing down the test environment") 82 | err := testEnv.Stop() 83 | Expect(err).ToNot(HaveOccurred()) 84 | }) 85 | -------------------------------------------------------------------------------- /controllers/model/suite_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package model 18 | 19 | import ( 20 | "path/filepath" 21 | "testing" 22 | 23 | . "github.com/onsi/ginkgo" 24 | . "github.com/onsi/gomega" 25 | 26 | commonv1 "github.com/aws/amazon-sagemaker-operator-for-k8s/api/v1/common" 27 | 28 | "k8s.io/client-go/kubernetes/scheme" 29 | "k8s.io/client-go/rest" 30 | "sigs.k8s.io/controller-runtime/pkg/client" 31 | "sigs.k8s.io/controller-runtime/pkg/envtest" 32 | "sigs.k8s.io/controller-runtime/pkg/envtest/printer" 33 | logf "sigs.k8s.io/controller-runtime/pkg/log" 34 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 35 | // +kubebuilder:scaffold:imports 36 | ) 37 | 38 | // These tests use Ginkgo (BDD-style Go testing framework). Refer to 39 | // http://onsi.github.io/ginkgo/ to learn more about Ginkgo. 40 | 41 | var cfg *rest.Config 42 | var k8sClient client.Client 43 | var testEnv *envtest.Environment 44 | 45 | func TestAPIs(t *testing.T) { 46 | RegisterFailHandler(Fail) 47 | 48 | RunSpecsWithDefaultAndCustomReporters(t, 49 | "Model Controller Suite", 50 | []Reporter{printer.NewlineReporter{}}) 51 | } 52 | 53 | var _ = BeforeSuite(func(done Done) { 54 | logf.SetLogger(zap.LoggerTo(GinkgoWriter, true)) 55 | 56 | By("bootstrapping test environment") 57 | testEnv = &envtest.Environment{ 58 | CRDDirectoryPaths: []string{filepath.Join("..", "..", "config", "crd", "bases")}, 59 | } 60 | 61 | var err error 62 | cfg, err = testEnv.Start() 63 | Expect(err).ToNot(HaveOccurred()) 64 | Expect(cfg).ToNot(BeNil()) 65 | 66 | err = commonv1.AddToScheme(scheme.Scheme) 67 | Expect(err).NotTo(HaveOccurred()) 68 | 69 | // +kubebuilder:scaffold:scheme 70 | 71 | k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) 72 | Expect(err).ToNot(HaveOccurred()) 73 | Expect(k8sClient).ToNot(BeNil()) 74 | 75 | close(done) 76 | }, 60) 77 | 78 | var _ = AfterSuite(func() { 79 | By("tearing down the test environment") 80 | err := testEnv.Stop() 81 | Expect(err).ToNot(HaveOccurred()) 82 | }) 83 | -------------------------------------------------------------------------------- /controllers/processingjob/suite_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package processingjob 18 | 19 | import ( 20 | "path/filepath" 21 | "testing" 22 | 23 | . "github.com/onsi/ginkgo" 24 | . "github.com/onsi/gomega" 25 | 26 | commonv1 "github.com/aws/amazon-sagemaker-operator-for-k8s/api/v1/common" 27 | 28 | "k8s.io/client-go/kubernetes/scheme" 29 | "k8s.io/client-go/rest" 30 | "sigs.k8s.io/controller-runtime/pkg/client" 31 | "sigs.k8s.io/controller-runtime/pkg/envtest" 32 | "sigs.k8s.io/controller-runtime/pkg/envtest/printer" 33 | logf "sigs.k8s.io/controller-runtime/pkg/log" 34 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 35 | // +kubebuilder:scaffold:imports 36 | ) 37 | 38 | // To run tests envtest requires that some Kubernetes files exist on the host machine. 39 | // See https://github.com/kubernetes-sigs/controller-runtime/blob/0d3fbbc45eddb4b47e33924bffbc5df1b66901ca/pkg/envtest/server.go#L46 . 40 | 41 | // These tests use Ginkgo (BDD-style Go testing framework). Refer to 42 | // http://onsi.github.io/ginkgo/ to learn more about Ginkgo. 43 | 44 | var cfg *rest.Config 45 | var k8sClient client.Client 46 | var testEnv *envtest.Environment 47 | 48 | func TestAPIs(t *testing.T) { 49 | RegisterFailHandler(Fail) 50 | 51 | RunSpecsWithDefaultAndCustomReporters(t, 52 | "ProcessingJob Controller", 53 | []Reporter{printer.NewlineReporter{}}) 54 | } 55 | 56 | var _ = BeforeSuite(func(done Done) { 57 | logf.SetLogger(zap.LoggerTo(GinkgoWriter, true)) 58 | 59 | By("bootstrapping test environment") 60 | testEnv = &envtest.Environment{ 61 | CRDDirectoryPaths: []string{filepath.Join("..", "..", "config", "crd", "bases")}, 62 | } 63 | 64 | cfg, err := testEnv.Start() 65 | Expect(err).ToNot(HaveOccurred()) 66 | Expect(cfg).ToNot(BeNil()) 67 | 68 | err = commonv1.AddToScheme(scheme.Scheme) 69 | Expect(err).NotTo(HaveOccurred()) 70 | 71 | // +kubebuilder:scaffold:scheme 72 | 73 | k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) 74 | Expect(err).ToNot(HaveOccurred()) 75 | Expect(k8sClient).ToNot(BeNil()) 76 | 77 | close(done) 78 | }, 60) 79 | 80 | var _ = AfterSuite(func() { 81 | By("tearing down the test environment") 82 | err := testEnv.Stop() 83 | Expect(err).ToNot(HaveOccurred()) 84 | }) 85 | -------------------------------------------------------------------------------- /controllers/sdkutil/README.md: -------------------------------------------------------------------------------- 1 | # sdkutil Package 2 | This package contains utilities for interacting with [SageMaker SDK](https://github.com/aws/aws-sdk-go-v2/tree/002fb7d849ecb625b8ee15fa04be65ff217353c9/service/sagemaker) types. Specifically, these files help convert and compare the operator's SageMaker types to SageMaker SDK types. 3 | -------------------------------------------------------------------------------- /controllers/sdkutil/clientwrapper/suite_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package clientwrapper 18 | 19 | import ( 20 | "path/filepath" 21 | "testing" 22 | 23 | . "github.com/onsi/ginkgo" 24 | . "github.com/onsi/gomega" 25 | 26 | commonv1 "github.com/aws/amazon-sagemaker-operator-for-k8s/api/v1/common" 27 | 28 | "k8s.io/client-go/kubernetes/scheme" 29 | "k8s.io/client-go/rest" 30 | "sigs.k8s.io/controller-runtime/pkg/client" 31 | "sigs.k8s.io/controller-runtime/pkg/envtest" 32 | "sigs.k8s.io/controller-runtime/pkg/envtest/printer" 33 | logf "sigs.k8s.io/controller-runtime/pkg/log" 34 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 35 | // +kubebuilder:scaffold:imports 36 | ) 37 | 38 | // These tests use Ginkgo (BDD-style Go testing framework). Refer to 39 | // http://onsi.github.io/ginkgo/ to learn more about Ginkgo. 40 | 41 | var cfg *rest.Config 42 | var k8sClient client.Client 43 | var testEnv *envtest.Environment 44 | 45 | func TestAPIs(t *testing.T) { 46 | RegisterFailHandler(Fail) 47 | 48 | RunSpecsWithDefaultAndCustomReporters(t, 49 | "Controller Suite", 50 | []Reporter{printer.NewlineReporter{}}) 51 | } 52 | 53 | var _ = BeforeSuite(func(done Done) { 54 | logf.SetLogger(zap.LoggerTo(GinkgoWriter, true)) 55 | 56 | By("bootstrapping test environment") 57 | testEnv = &envtest.Environment{ 58 | CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "config", "crd", "bases")}, 59 | } 60 | 61 | var err error 62 | cfg, err = testEnv.Start() 63 | Expect(err).ToNot(HaveOccurred()) 64 | Expect(cfg).ToNot(BeNil()) 65 | 66 | err = commonv1.AddToScheme(scheme.Scheme) 67 | Expect(err).NotTo(HaveOccurred()) 68 | 69 | // +kubebuilder:scaffold:scheme 70 | 71 | k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) 72 | Expect(err).ToNot(HaveOccurred()) 73 | Expect(k8sClient).ToNot(BeNil()) 74 | 75 | close(done) 76 | }, 60) 77 | 78 | var _ = AfterSuite(func() { 79 | By("tearing down the test environment") 80 | err := testEnv.Stop() 81 | Expect(err).ToNot(HaveOccurred()) 82 | }) 83 | -------------------------------------------------------------------------------- /controllers/sdkutil/suite_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package sdkutil 18 | 19 | import ( 20 | "testing" 21 | 22 | . "github.com/onsi/ginkgo" 23 | . "github.com/onsi/gomega" 24 | ) 25 | 26 | func TestSdkUtils(t *testing.T) { 27 | RegisterFailHandler(Fail) 28 | RunSpecs(t, "SDK Utility Suite") 29 | } 30 | -------------------------------------------------------------------------------- /controllers/suite_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package controllers 18 | 19 | import ( 20 | "testing" 21 | 22 | . "github.com/onsi/ginkgo" 23 | . "github.com/onsi/gomega" 24 | 25 | logf "sigs.k8s.io/controller-runtime/pkg/log" 26 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 27 | ) 28 | 29 | // These tests use Ginkgo (BDD-style Go testing framework). Refer to 30 | // http://onsi.github.io/ginkgo/ to learn more about Ginkgo. 31 | 32 | func TestControllerCommon(t *testing.T) { 33 | RegisterFailHandler(Fail) 34 | RunSpecs(t, "Controller Common Suite") 35 | } 36 | 37 | var _ = BeforeSuite(func() { 38 | logf.SetLogger(zap.LoggerTo(GinkgoWriter, true)) 39 | }) 40 | -------------------------------------------------------------------------------- /controllers/trainingjob/README.md: -------------------------------------------------------------------------------- 1 | # Package trainingjob 2 | This package contains the training job controller, its tests, and helper types and functions. 3 | -------------------------------------------------------------------------------- /controllers/trainingjob/suite_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package trainingjob 18 | 19 | import ( 20 | "path/filepath" 21 | "testing" 22 | 23 | . "github.com/onsi/ginkgo" 24 | . "github.com/onsi/gomega" 25 | 26 | commonv1 "github.com/aws/amazon-sagemaker-operator-for-k8s/api/v1/common" 27 | 28 | "k8s.io/client-go/kubernetes/scheme" 29 | "k8s.io/client-go/rest" 30 | "sigs.k8s.io/controller-runtime/pkg/client" 31 | "sigs.k8s.io/controller-runtime/pkg/envtest" 32 | "sigs.k8s.io/controller-runtime/pkg/envtest/printer" 33 | logf "sigs.k8s.io/controller-runtime/pkg/log" 34 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 35 | // +kubebuilder:scaffold:imports 36 | ) 37 | 38 | // To run tests envtest requires that some Kubernetes files exist on the host machine. 39 | // See https://github.com/kubernetes-sigs/controller-runtime/blob/0d3fbbc45eddb4b47e33924bffbc5df1b66901ca/pkg/envtest/server.go#L46 . 40 | 41 | // These tests use Ginkgo (BDD-style Go testing framework). Refer to 42 | // http://onsi.github.io/ginkgo/ to learn more about Ginkgo. 43 | 44 | var cfg *rest.Config 45 | var k8sClient client.Client 46 | var testEnv *envtest.Environment 47 | 48 | func TestAPIs(t *testing.T) { 49 | RegisterFailHandler(Fail) 50 | 51 | RunSpecsWithDefaultAndCustomReporters(t, 52 | "TrainingJob Controller", 53 | []Reporter{printer.NewlineReporter{}}) 54 | } 55 | 56 | var _ = BeforeSuite(func(done Done) { 57 | logf.SetLogger(zap.LoggerTo(GinkgoWriter, true)) 58 | 59 | By("bootstrapping test environment") 60 | testEnv = &envtest.Environment{ 61 | CRDDirectoryPaths: []string{filepath.Join("..", "..", "config", "crd", "bases")}, 62 | } 63 | 64 | cfg, err := testEnv.Start() 65 | Expect(err).ToNot(HaveOccurred()) 66 | Expect(cfg).ToNot(BeNil()) 67 | 68 | err = commonv1.AddToScheme(scheme.Scheme) 69 | Expect(err).NotTo(HaveOccurred()) 70 | 71 | // +kubebuilder:scaffold:scheme 72 | 73 | k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) 74 | Expect(err).ToNot(HaveOccurred()) 75 | Expect(k8sClient).ToNot(BeNil()) 76 | 77 | close(done) 78 | }, 60) 79 | 80 | var _ = AfterSuite(func() { 81 | By("tearing down the test environment") 82 | err := testEnv.Stop() 83 | Expect(err).ToNot(HaveOccurred()) 84 | }) 85 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/aws/amazon-sagemaker-operator-for-k8s 2 | 3 | go 1.13 4 | 5 | require ( 6 | github.com/Jeffail/gabs/v2 v2.5.1 7 | github.com/adammck/venv v0.0.0-20160819025605-8a9c907a37d3 8 | github.com/aws/aws-sdk-go v1.37.3 9 | github.com/go-logr/logr v0.2.1 10 | github.com/go-logr/zapr v0.2.0 // indirect 11 | github.com/gogo/protobuf v1.3.2 // indirect 12 | github.com/google/go-cmp v0.5.5 13 | github.com/google/uuid v1.1.2 14 | github.com/onsi/ginkgo v1.12.1 15 | github.com/onsi/gomega v1.10.1 16 | github.com/pkg/errors v0.9.1 17 | github.com/prometheus/client_golang v1.11.1 // indirect 18 | go.uber.org/zap v1.15.0 // indirect 19 | golang.org/x/net v0.0.0-20201110031124-69a78807bb2b 20 | google.golang.org/appengine v1.6.6 // indirect 21 | k8s.io/api v0.20.0 22 | k8s.io/apimachinery v0.20.0 23 | k8s.io/client-go v0.20.0 24 | sigs.k8s.io/controller-runtime v0.6.2 25 | ) 26 | -------------------------------------------------------------------------------- /hack/boilerplate.go.txt: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ -------------------------------------------------------------------------------- /hack/charts/batch-transform-jobs/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | name: amazon-sagemaker-batchtransformjob 3 | version: 1.0.0 4 | description: A Helm chart for deploying a SageMaker Batch Transform Job from Kubernetes. 5 | maintainers: 6 | - name: Gautam Kumar 7 | email: gauta@amazon.com 8 | - name: Suraj Kota 9 | email: surakota@amazon.com 10 | - name: Kartik Kalamadi 11 | email: kalamadi@amazon.com 12 | - name: Meghna Baijal 13 | email: mbaijal@amazon.com -------------------------------------------------------------------------------- /hack/charts/batch-transform-jobs/templates/NOTES.txt: -------------------------------------------------------------------------------- 1 | Successfully deployed a new batch transform job chart. 2 | 3 | Your release is named {{ .Release.Name }}. 4 | 5 | To learn more about the release, try: 6 | 7 | $ helm status {{ .Release.Name }} 8 | $ helm get all {{ .Release.Name }} -------------------------------------------------------------------------------- /hack/charts/batch-transform-jobs/templates/batch-transform-job.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: BatchTransformJob 3 | metadata: 4 | name: {{ .Values.name }} 5 | spec: 6 | region: {{ .Values.spec.region }} 7 | modelName: {{ .Values.spec.modelName | quote }} 8 | transformInput: 9 | dataSource: 10 | s3DataSource: 11 | s3DataType: {{ .Values.spec.inputDataConfig.s3DataType | default "S3Prefix" }} 12 | s3Uri: {{ .Values.spec.inputDataConfig.s3Uri }} 13 | contentType: {{ .contentType | default "text/csv" }} 14 | transformOutput: 15 | s3OutputPath: {{ .Values.spec.outputPath }} 16 | transformResources: 17 | instanceCount: {{ .Values.spec.instanceCount }} 18 | instanceType: {{ .Values.spec.instanceType }} 19 | {{- if .Values.spec.tags }} 20 | tags: 21 | {{- range $key, $value := .Values.spec.tags }} 22 | - name: {{ $key }} 23 | value: {{ $value | quote }} 24 | {{- end }} 25 | {{- end }} -------------------------------------------------------------------------------- /hack/charts/batch-transform-jobs/values.yaml: -------------------------------------------------------------------------------- 1 | name: xgboost-mnist-batch-transform 2 | spec: 3 | instanceCount: 1 4 | instanceType: ml.m4.xlarge 5 | region: us-west-2 6 | modelName: xgboost-mnist-model 7 | inputDataConfig: 8 | s3Uri: s3://sagemaker-sample-data-us-west-2/batch-transform/mnist-1000-samples 9 | s3DataType: S3Prefix 10 | outputPath: s3://my-bucket/batch_transform/output 11 | 12 | -------------------------------------------------------------------------------- /hack/charts/hyperparameter-tuning-jobs/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | name: amazon-sagemaker-hyperparametertuningjob 3 | version: 1.0.0 4 | description: A Helm chart for deploying a SageMaker HyperParameter tuning job from Kubernetes. 5 | maintainers: 6 | - name: Gautam Kumar 7 | email: gauta@amazon.com 8 | - name: Suraj Kota 9 | email: surakota@amazon.com 10 | - name: Kartik Kalamadi 11 | email: kalamadi@amazon.com 12 | - name: Meghna Baijal 13 | email: mbaijal@amazon.com -------------------------------------------------------------------------------- /hack/charts/hyperparameter-tuning-jobs/templates/NOTES.txt: -------------------------------------------------------------------------------- 1 | Successfully deployed a new hyperparameter tuning job chart. 2 | 3 | Your release is named {{ .Release.Name }}. 4 | 5 | To learn more about the release, try: 6 | 7 | $ helm status {{ .Release.Name }} 8 | $ helm get all {{ .Release.Name }} -------------------------------------------------------------------------------- /hack/charts/hyperparameter-tuning-jobs/values.yaml: -------------------------------------------------------------------------------- 1 | name: xgboost-mnist-hpo 2 | spec: 3 | instanceCount: 1 4 | instanceType: ml.m4.xlarge 5 | volumeSizeInGB: 35 6 | maxRuntimeInSeconds: 86400 7 | roleArn: arn:aws:iam::123456789012:role/service-role/AmazonSageMaker-ExecutionRole 8 | region: us-west-2 9 | image: 433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:1 10 | enableNetworkIsolation: true 11 | enableInterContainerTrafficEncryption: false 12 | trainingInputMode: File 13 | hyperparameterTuningJobConfig: 14 | maxNumberOfTrainingJobs: 10 15 | maxParallelTrainingJobs: 10 16 | strategy: Bayesian 17 | hyperparameterTuningJobObjective: 18 | type: Minimize 19 | metricName: validation:error 20 | integerParameterRanges: 21 | name: num_round 22 | minValue: 10 23 | maxValue: 20 24 | scalingType: Linear 25 | trainingJobConfig: 26 | hyperparameters: 27 | base_score: '0.5' 28 | booster: gbtree 29 | csv_weights: '0' 30 | dsplit: row 31 | grow_policy: depthwise 32 | lambda_bias: '0.0' 33 | max_bin: '256' 34 | max_leaves: '0' 35 | normalize_type: tree 36 | objective: reg:linear 37 | one_drop: '0' 38 | prob_buffer_row: '1.0' 39 | process_type: default 40 | rate_drop: '0.0' 41 | refresh_leaf: '1' 42 | sample_type: uniform 43 | scale_pos_weight: '1.0' 44 | silent: '0' 45 | sketch_eps: '0.03' 46 | skip_drop: '0.0' 47 | tree_method: auto 48 | tweedie_variance_power: '1.5' 49 | updater: grow_colmaker,prune 50 | inputDataConfig: 51 | - channel: train 52 | s3: 53 | s3Uri: s3://my-bucket/xgboost-mnist/train/ 54 | - channel: validation 55 | s3: 56 | s3Uri: s3://my-bucket/xgboost-mnist/validation/ 57 | outputPath: s3://my-bucket/xgboost-mnist/xgboost/ 58 | -------------------------------------------------------------------------------- /hack/charts/installer/rolebased/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | name: amazon-sagemaker-operator-for-k8s 3 | version: 1.2.2 4 | appVersion: 1.2.2 5 | description: A Helm chart for deploying the Amazon SageMaker Operator for Kubernetes using EKS IAM roles for service accounts. 6 | maintainers: 7 | - name: Suraj Kota 8 | email: surakota@amazon.com 9 | - name: Kartik Kalamadi 10 | email: kalamadi@amazon.com 11 | - name: Meghna Baijal 12 | email: mbaijal@amazon.com 13 | keywords: 14 | - amazon 15 | - aws 16 | - sagemaker 17 | - eks 18 | -------------------------------------------------------------------------------- /hack/charts/installer/rolebased/README.md: -------------------------------------------------------------------------------- 1 | # Amazon SageMaker Operator for Kubernetes Role-Based Installer 2 | 3 | Amazon SageMaker Operator for Kubernetes Helm charts for role-based installation 4 | 5 | ## Prerequisites 6 | 7 | * Kubernetes >= 1.13 8 | * Helm == 3 9 | 10 | ## Installing the Chart 11 | 12 | Clone the existing repository: 13 | 14 | ```bash 15 | git clone https://github.com/aws/amazon-sagemaker-operator-for-k8s.git 16 | ``` 17 | 18 | Navigate to the helm chart directory and edit the values in the configuration file: 19 | 20 | ```bash 21 | cd amazon-sagemaker-operator-for-k8s/hack/charts/installer 22 | vim rolebased/values.yaml 23 | ``` 24 | 25 | The [configuration](#configuration) section below lists the parameters that can be configured for installation. 26 | 27 | Install the operator using the following command: 28 | 29 | ```bash 30 | kubectl create namespace sagemaker-k8s-operator-system 31 | helm install --namespace sagemaker-k8s-operator-system sagemaker-operator rolebased/ 32 | ``` 33 | 34 | ## Uninstalling the chart 35 | 36 | To uninstall/delete the operator deployment: 37 | 38 | ```bash 39 | helm delete --namespace sagemaker-k8s-operator-system sagemaker-operator 40 | ``` 41 | 42 | ## Configuration 43 | 44 | The following table lists the configurable parameters for the chart and their default values. 45 | 46 | Parameter | Description | Default 47 | --- | --- | --- 48 | `roleArn` | The EKS service account role ARN | `` 49 | `image.repository` | image repository | `957583890962.dkr.ecr.us-east-1.amazonaws.com/amazon-sagemaker-operator-for-k8s` 50 | `image.tag` | image tag | `` -------------------------------------------------------------------------------- /hack/charts/installer/rolebased/templates/NOTES.txt: -------------------------------------------------------------------------------- 1 | Amazon SageMaker Operator for Kubernetes installed! 2 | 3 | Your release is named {{ .Release.Name }}. 4 | 5 | To learn more about the release, try: 6 | 7 | $ helm status {{ .Release.Name }} 8 | $ helm get {{ .Release.Name }} 9 | 10 | For more information about the operator: https://sagemaker.readthedocs.io/en/stable/amazon_sagemaker_operators_for_kubernetes.html -------------------------------------------------------------------------------- /hack/charts/installer/rolebased/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Create a image repository and tag name 4 | */}} 5 | {{- define "controller.image" -}} 6 | {{- printf "%s:%s" .Values.image.repository .Values.image.tag -}} 7 | {{- end -}} -------------------------------------------------------------------------------- /hack/charts/installer/rolebased/values.yaml: -------------------------------------------------------------------------------- 1 | roleArn: arn:aws:iam::123456789012:role/DELETE_ME 2 | 3 | image: 4 | repository: 957583890962.dkr.ecr.us-east-1.amazonaws.com/amazon-sagemaker-operator-for-k8s 5 | tag: v1 6 | 7 | -------------------------------------------------------------------------------- /hack/charts/namespaced/README.md: -------------------------------------------------------------------------------- 1 | # Amazon SageMaker Operator for Kubernetes Role-Based Installer - Namespaced Deployment 2 | 3 | This README will help you install the Amazon SageMaker Operator for Kubernetes using Helm charts for role-based installation with operator scope limited to a specified namespace. 4 | 5 | ## Prerequisites 6 | 7 | * Kubernetes >= 1.13 8 | * Helm == 3 9 | 10 | ## Download the Chart 11 | 12 | Clone the existing repository as follows: 13 | 14 | ```bash 15 | $ git clone https://github.com/aws/amazon-sagemaker-operator-for-k8s.git 16 | ``` 17 | 18 | Navigate to the helm chart directory 19 | 20 | ```bash 21 | cd amazon-sagemaker-operator-for-k8s/hack/charts/installer/namespaced 22 | ``` 23 | 24 | ## Install the CRDs 25 | Install the CRDs using the following command: 26 | ```bash 27 | $ helm install crds crd_chart/ 28 | ``` 29 | 30 | ## Install the Operator Manager Pod 31 | Edit the values.yaml file to specify the IAM Role as required: 32 | ```bash 33 | $ vim operator_chart/values.yaml 34 | ``` 35 | 36 | Create the required namespace and install the operator using the following command: 37 | ```bash 38 | $ kubectl create namespace 39 | $ helm install -n op operator_chart/ 40 | ``` 41 | 42 | ## Uninstall the charts 43 | 44 | To uninstall/delete the operator deployment, first make sure there are no jobs running, then: 45 | 46 | ```bash 47 | $ helm delete -n op 48 | $ helm delete -n crds 49 | $ kubectl delete namespace 50 | ``` -------------------------------------------------------------------------------- /hack/charts/namespaced/crd_chart/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | name: amazon-sagemaker-operator-for-k8s-install-crds 3 | version: 1.2.2 4 | appVersion: 1.2.2 5 | description: A Helm chart for deploying the Amazon SageMaker Operator CRDs for Kubernetes using EKS IAM roles for service accounts. 6 | maintainers: 7 | - name: Suraj Kota 8 | email: surakota@amazon.com 9 | - name: Kartik Kalamadi 10 | email: kalamadi@amazon.com 11 | - name: Meghna Baijal 12 | email: mbaijal@amazon.com 13 | keywords: 14 | - amazon 15 | - aws 16 | - sagemaker 17 | - eks 18 | -------------------------------------------------------------------------------- /hack/charts/namespaced/operator_chart/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | name: amazon-sagemaker-operator-for-k8s-install-operator 3 | version: 1.2.2 4 | appVersion: 1.2.2 5 | description: A Helm chart for deploying the Amazon SageMaker Operator to a specified namespace for Kubernetes using EKS IAM roles for service accounts. 6 | maintainers: 7 | - name: Suraj Kota 8 | email: surakota@amazon.com 9 | - name: Kartik Kalamadi 10 | email: kalamadi@amazon.com 11 | - name: Meghna Baijal 12 | email: mbaijal@amazon.com 13 | keywords: 14 | - amazon 15 | - aws 16 | - sagemaker 17 | - eks 18 | -------------------------------------------------------------------------------- /hack/charts/namespaced/operator_chart/templates/NOTES.txt: -------------------------------------------------------------------------------- 1 | Amazon SageMaker Operator for Kubernetes installed! 2 | 3 | Your release is named {{ .Release.Name }}. 4 | 5 | To learn more about the release, try: 6 | 7 | $ helm status {{ .Release.Name }} 8 | $ helm get {{ .Release.Name }} 9 | 10 | For more information about the operator: https://sagemaker.readthedocs.io/en/stable/amazon_sagemaker_operators_for_kubernetes.html -------------------------------------------------------------------------------- /hack/charts/namespaced/operator_chart/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Create a image repository and tag name 4 | */}} 5 | {{- define "controller.image" -}} 6 | {{- printf "%s:%s" .Values.image.repository .Values.image.tag -}} 7 | {{- end -}} -------------------------------------------------------------------------------- /hack/charts/namespaced/operator_chart/values.yaml: -------------------------------------------------------------------------------- 1 | roleArn: arn:aws:iam::123456789012:role/DELETE_ME 2 | 3 | image: 4 | repository: 957583890962.dkr.ecr.us-east-1.amazonaws.com/amazon-sagemaker-operator-for-k8s 5 | tag: v1 6 | -------------------------------------------------------------------------------- /hack/charts/training-jobs/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | name: amazon-sagemaker-trainingjob 3 | version: 1.0.0 4 | description: A Helm chart for deploying a SageMaker TrainingJob from Kubernetes. 5 | maintainers: 6 | - name: Gautam Kumar 7 | email: gauta@amazon.com 8 | - name: Suraj Kota 9 | email: surakota@amazon.com 10 | - name: Kartik Kalamadi 11 | email: kalamadi@amazon.com 12 | - name: Meghna Baijal 13 | email: mbaijal@amazon.com 14 | # dependencies: 15 | # - name: amazon-sagemaker-operator-for-k8s 16 | # version: ">= 1.0.0" 17 | # repository: https://aws.github.io/eks-charts/ -------------------------------------------------------------------------------- /hack/charts/training-jobs/examples/efs_values.yaml: -------------------------------------------------------------------------------- 1 | name: xgboost-mnist-from-for-efs 2 | spec: 3 | instanceCount: 1 4 | instanceType: ml.m4.xlarge 5 | volumeSizeInGB: 5 6 | maxRuntimeInSeconds: 86400 7 | roleArn: arn:aws:iam::123456789012:role/service-role/AmazonSageMaker-ExecutionRole 8 | region: us-west-2 9 | image: 433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:1 10 | trainingInputMode: File 11 | hyperparameters: 12 | max_depth: 5 13 | eta: 0.2 14 | num_round: 10 15 | gamma: 4 16 | min_child_weight: 6 17 | silent: 0 18 | objective: multi:softmax 19 | num_class: 10 20 | num_round: 10 21 | outputPath: s3://my-bucket/xgboost/ 22 | inputDataConfig: 23 | - channel: train 24 | fileSystem: 25 | fileSystemType: EFS 26 | fileSystemID: fs-e2466d9b 27 | directoryPath: /xgboost-mnist/train 28 | - channel: validation 29 | fileSystem: 30 | fileSystemType: EFS 31 | fileSystemID: fs-e2466d9b 32 | directoryPath: /xgboost-mnist/validation 33 | vpcConfig: 34 | securityGroupIDs: 35 | - sg-4b23c421 36 | subnets: 37 | - subnet-ae0a65c6 38 | - subnet-2af0fa67 39 | - subnet-dddb7fa7 40 | 41 | -------------------------------------------------------------------------------- /hack/charts/training-jobs/examples/fsx_values.yaml: -------------------------------------------------------------------------------- 1 | name: xgboost-mnist-from-for-fsx 2 | spec: 3 | instanceCount: 1 4 | instanceType: ml.m4.xlarge 5 | volumeSizeInGB: 5 6 | maxRuntimeInSeconds: 86400 7 | roleArn: arn:aws:iam::123456789012:role/service-role/AmazonSageMaker-ExecutionRole 8 | region: us-west-2 9 | image: 433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:1 10 | trainingInputMode: File 11 | hyperparameters: 12 | max_depth: 5 13 | eta: 0.2 14 | num_round: 10 15 | gamma: 4 16 | min_child_weight: 6 17 | silent: 0 18 | objective: multi:softmax 19 | num_class: 10 20 | num_round: 10 21 | outputPath: s3://my-bucket/xgboost/ 22 | inputDataConfig: 23 | - channel: train 24 | fileSystem: 25 | fileSystemType: FSxLustre 26 | fileSystemID: fs-0d0f8e3f721664c98 27 | directoryPath: /fsx/sagemaker/xgboost-mnist 28 | - channel: validation 29 | fileSystem: 30 | fileSystemType: FSxLustre 31 | fileSystemID: fs-0d0f8e3f721664c98 32 | directoryPath: /fsx/sagemaker/xgboost-mnist 33 | vpcConfig: 34 | securityGroupIDs: 35 | - sg-4b23c421 36 | subnets: 37 | - subnet-ae0a65c6 38 | - subnet-2af0fa67 39 | - subnet-dddb7fa7 40 | 41 | -------------------------------------------------------------------------------- /hack/charts/training-jobs/examples/s3_values.yaml: -------------------------------------------------------------------------------- 1 | name: xgboost-mnist-from-for-s3 2 | spec: 3 | instanceCount: 1 4 | instanceType: ml.m4.xlarge 5 | volumeSizeInGB: 5 6 | maxRuntimeInSeconds: 86400 7 | roleArn: arn:aws:iam::123456789012:role/service-role/AmazonSageMaker-ExecutionRole 8 | region: us-west-2 9 | image: 433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:1 10 | trainingInputMode: File 11 | hyperparameters: 12 | max_depth: 5 13 | eta: 0.2 14 | num_round: 10 15 | gamma: 4 16 | min_child_weight: 6 17 | silent: 0 18 | objective: multi:softmax 19 | num_class: 10 20 | num_round: 10 21 | inputDataConfig: 22 | - channel: train 23 | s3: 24 | s3Uri: s3://my-bucket/train/ 25 | - channel: validation 26 | s3: 27 | s3Uri: s3://my-bucket/validation/ 28 | outputPath: s3://my-buckes/xgboost/ 29 | 30 | -------------------------------------------------------------------------------- /hack/charts/training-jobs/examples/spot_values.yaml: -------------------------------------------------------------------------------- 1 | name: xgboost-mnist-from-for-spot 2 | spec: 3 | instanceCount: 1 4 | instanceType: ml.m4.xlarge 5 | volumeSizeInGB: 5 6 | maxRuntimeInSeconds: 86400 7 | roleArn: arn:aws:iam::123456789012:role/service-role/AmazonSageMaker-ExecutionRole 8 | region: us-west-2 9 | image: 433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:1 10 | trainingInputMode: File 11 | hyperparameters: 12 | max_depth: 5 13 | eta: 0.2 14 | num_round: 10 15 | gamma: 4 16 | min_child_weight: 6 17 | silent: 0 18 | objective: multi:softmax 19 | num_class: 10 20 | num_round: 10 21 | inputDataConfig: 22 | - channel: train 23 | s3: 24 | s3Uri: s3://my-bucket/train/ 25 | - channel: validation 26 | s3: 27 | s3Uri: s3://my-bucket/validation/ 28 | outputPath: s3://my-bucket/xgboost/ 29 | enableManagedSpotTraining: true 30 | stoppingCondition: 31 | maxRuntime: 3599 32 | maxWaitTime: 3600 33 | checkpointConfig: 34 | s3Uri: s3://my-bucket/checkpoints/ 35 | -------------------------------------------------------------------------------- /hack/charts/training-jobs/templates/NOTES.txt: -------------------------------------------------------------------------------- 1 | Successfully deployed a new training job chart. 2 | 3 | Your release is named {{ .Release.Name }}. 4 | 5 | To learn more about the release, try: 6 | 7 | $ helm status {{ .Release.Name }} 8 | $ helm get all {{ .Release.Name }} -------------------------------------------------------------------------------- /hack/charts/training-jobs/templates/job.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: TrainingJob 3 | metadata: 4 | name: {{ .Values.name }} 5 | spec: 6 | hyperParameters: 7 | {{- range $key, $value := .Values.spec.hyperparameters }} 8 | - name: {{ $key }} 9 | value: {{ $value | quote }} 10 | {{- end }} 11 | algorithmSpecification: 12 | trainingImage: {{ .Values.spec.image }} 13 | trainingInputMode: {{ .Values.spec.trainingInputMode }} 14 | roleArn: {{ .Values.spec.roleArn }} 15 | region: {{ .Values.spec.region }} 16 | outputDataConfig: 17 | s3OutputPath: {{ .Values.spec.outputPath }} 18 | resourceConfig: 19 | instanceCount: {{ .Values.spec.instanceCount }} 20 | instanceType: {{ .Values.spec.instanceType }} 21 | volumeSizeInGB: {{ .Values.spec.volumeSizeInGB }} 22 | stoppingCondition: 23 | maxRuntimeInSeconds: {{ .Values.spec.maxRuntimeInSeconds }} 24 | {{- if .Values.spec.vpcConfig }} 25 | vpcConfig: 26 | securityGroupIds: 27 | {{- range .Values.spec.vpcConfig.securityGroupIDs }} 28 | - {{ . }} 29 | {{- end }} 30 | subnets: 31 | {{- range .Values.spec.vpcConfig.subnets }} 32 | - {{ . }} 33 | {{- end }} 34 | {{- end }} 35 | inputDataConfig: 36 | {{- range .Values.spec.inputDataConfig }} 37 | - channelName: {{ .channel }} 38 | dataSource: 39 | {{- if .s3 }} 40 | s3DataSource: 41 | s3DataType: {{ .s3.s3DataType | default "S3Prefix" }} 42 | s3Uri: {{ .s3.s3Uri }} 43 | s3DataDistributionType: {{ .s3.s3DataDistributionType | default "FullyReplicated" }} 44 | {{- end }} 45 | {{- if .fileSystem }} 46 | fileSystemDataSource: 47 | fileSystemId: {{ .fileSystem.fileSystemID }} 48 | fileSystemAccessMode: {{ .fileSystemAccessMode | default "ro" }} 49 | fileSystemType: {{ .fileSystem.fileSystemType }} 50 | directoryPath: {{ .fileSystem.directoryPath }} 51 | {{- end }} 52 | contentType: {{ .contentType | default "text/csv" }} 53 | compressionType: {{ .compressionType | default "None" }} 54 | {{- end }} 55 | {{- if .Values.spec.enableManagedSpotTraining }} 56 | enableManagedSpotTraining: {{ .Values.spec.enableManagedSpotTraining }} 57 | {{- end }} 58 | {{- if .Values.spec.stoppingCondition }} 59 | stoppingCondition: 60 | maxRuntimeInSeconds: {{ .Values.spec.stoppingCondition.maxRuntime }} 61 | maxWaitTimeInSeconds: {{ .Values.spec.stoppingCondition.maxWaitTime }} 62 | {{- end }} 63 | {{- if .Values.spec.checkpointConfig }} 64 | checkpointConfig: 65 | s3Uri: {{ .Values.spec.checkpointConfig.s3Uri}} 66 | {{- end }} 67 | {{- if .Values.spec.tags }} 68 | tags: 69 | {{- range $key, $value := .Values.spec.tags }} 70 | - name: {{ $key }} 71 | value: {{ $value | quote }} 72 | {{- end }} 73 | {{- end }} 74 | 75 | -------------------------------------------------------------------------------- /hack/charts/training-jobs/values.yaml: -------------------------------------------------------------------------------- 1 | name: xgboost-mnist-from-for-s3 2 | spec: 3 | instanceCount: 1 4 | instanceType: ml.m4.xlarge 5 | volumeSizeInGB: 5 6 | maxRuntimeInSeconds: 86400 7 | roleArn: arn:aws:iam::123456789012:role/service-role/AmazonSageMaker-ExecutionRole 8 | region: us-west-2 9 | image: 433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:1 10 | trainingInputMode: File 11 | hyperparameters: 12 | max_depth: 5 13 | eta: 0.2 14 | num_round: 10 15 | gamma: 4 16 | min_child_weight: 6 17 | silent: 0 18 | objective: multi:softmax 19 | num_class: 10 20 | num_round: 10 21 | inputDataConfig: 22 | - channel: train 23 | s3: 24 | s3Uri: s3://my-bucket/train/ 25 | - channel: validation 26 | s3: 27 | s3Uri: s3://my-bucket/validation/ 28 | outputPath: s3://my-bucket/xgboost/ 29 | tags: 30 | example-tag: example-value 31 | 32 | -------------------------------------------------------------------------------- /release/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/amazon-sagemaker-operator-for-k8s/e131566f83b8d860300c1ccf377b3982b385f177/release/.gitkeep -------------------------------------------------------------------------------- /release/rolebased/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/amazon-sagemaker-operator-for-k8s/e131566f83b8d860300c1ccf377b3982b385f177/release/rolebased/.gitkeep -------------------------------------------------------------------------------- /release/rolebased/namespaced/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/amazon-sagemaker-operator-for-k8s/e131566f83b8d860300c1ccf377b3982b385f177/release/rolebased/namespaced/.gitkeep -------------------------------------------------------------------------------- /samples/bring-your-own-container/README.md: -------------------------------------------------------------------------------- 1 | # Bring Your Own Container Sample 2 | 3 | This sample demonstrates how to start training jobs using your own training script, packaged in a SageMaker-compatible container, using the Amazon SageMaker Operator for Kubernetes. 4 | 5 | ## Prerequisites 6 | 7 | This sample assumes that you have already configured an EKS cluster with the operator. It also assumes that you have installed `kubectl` - you can find a link on our [installation page](https://sagemaker.readthedocs.io/en/stable/amazon_sagemaker_operators_for_kubernetes.html#prerequisites). 8 | 9 | In order to follow this script, you must first create a training script packaged in a Dockerfile that is [compatible with Amazon SageMaker](https://docs.aws.amazon.com/sagemaker/latest/dg/amazon-sagemaker-containers.html). The [Distributed Mask R-CNN sample](https://github.com/awslabs/amazon-sagemaker-examples/tree/master/advanced_functionality/distributed_tensorflow_mask_rcnn), published by the SageMaker team, contains a predefined training script and helper bash scripts for reference. 10 | 11 | ## Preparing the Training Script 12 | 13 | ### Uploading your Script 14 | 15 | All SageMaker training jobs are run from within a container with all necessary dependencies and modules pre-installed and with the training scripts referencing the acceptable input and output directories. This container should be uploaded to an [ECR repository](https://aws.amazon.com/ecr/) accessible from within your AWS account. When uploaded correctly, you should have a repository URL and tag associated with the container image - this will be needed for the next step. 16 | 17 | A container image URL and tag looks has the following structure: 18 | ``` 19 | .dkr.ecr..amazonaws.com/: 20 | ``` 21 | 22 | ### Updating the Training Specification 23 | 24 | In the `my-training-job.yaml` file, modify the placeholder values with those associated with your account and training job. The `spec.algorithmSpecification.trainingImage` should be the container image from the previous step. The `spec.roleArn` field should be the ARN of an IAM role which has permissions to access your S3 resources. If you have not yet created a role with these permissions, you can find an example policy at [Amazon SageMaker Roles](https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-roles.html#sagemaker-roles-createtrainingjob-perms). 25 | 26 | ## Submitting your Training Job 27 | 28 | To submit your prepared training job specification, apply the specification to your EKS cluster as such: 29 | ``` 30 | $ kubectl apply -f my-training-job.yaml 31 | trainingjob.sagemaker.aws.amazon.com/my-training-job created 32 | ``` 33 | 34 | To monitor the training job once it has started, you can see the full status and any additional errors with the following command: 35 | ``` 36 | $ kubectl describe trainingjob my-training-job 37 | ``` -------------------------------------------------------------------------------- /samples/bring-your-own-container/my-training-job.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: TrainingJob 3 | metadata: 4 | name: my-training-job 5 | spec: 6 | hyperParameters: # Modify these parameters to meet your own script's needs 7 | - name: mode_fpn 8 | value: "True" 9 | - name: mode_mask 10 | value: "True" 11 | - name: eval_period 12 | value: "1" 13 | - name: batch_norm 14 | value: "FreezeBN" 15 | algorithmSpecification: 16 | trainingImage: # The URL and tag of your ECR container 17 | trainingInputMode: File 18 | roleArn: # A role with SageMaker and S3 access 19 | region: # The region in which to run the training job 20 | outputDataConfig: 21 | s3OutputPath: s3:///output # The output path of our model 22 | resourceConfig: 23 | instanceCount: 1 24 | instanceType: ml.m4.xlarge 25 | volumeSizeInGB: 5 26 | stoppingCondition: 27 | maxRuntimeInSeconds: 86400 28 | inputDataConfig: 29 | - channelName: train 30 | dataSource: 31 | s3DataSource: 32 | s3DataType: S3Prefix 33 | s3Uri: s3:///mask-rcnn/sagemaker/input/train/ # The source of the training data 34 | s3DataDistributionType: FullyReplicated 35 | contentType: application/tfrecord 36 | compressionType: None -------------------------------------------------------------------------------- /samples/efs-xgboost-mnist-trainingjob.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: TrainingJob 3 | metadata: 4 | name: efs-xgboost-mnist 5 | spec: 6 | hyperParameters: 7 | - name: max_depth 8 | value: "5" 9 | - name: eta 10 | value: "0.2" 11 | - name: gamma 12 | value: "4" 13 | - name: min_child_weight 14 | value: "6" 15 | - name: silent 16 | value: "0" 17 | - name: objective 18 | value: multi:softmax 19 | - name: num_class 20 | value: "10" 21 | - name: num_round 22 | value: "10" 23 | algorithmSpecification: 24 | trainingImage: 433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:1 25 | trainingInputMode: File 26 | roleArn: arn:aws:iam::123456789012:role/service-role/AmazonSageMaker-ExecutionRole 27 | region: us-west-2 28 | outputDataConfig: 29 | s3OutputPath: s3://my-bucket/xgboost 30 | resourceConfig: 31 | instanceCount: 1 32 | instanceType: ml.m4.xlarge 33 | volumeSizeInGB: 5 34 | stoppingCondition: 35 | maxRuntimeInSeconds: 86400 36 | inputDataConfig: 37 | - channelName: train 38 | dataSource: 39 | fileSystemDataSource: 40 | fileSystemType: EFS 41 | fileSystemAccessMode: ro 42 | fileSystemId: fs-e2466d9b 43 | directoryPath: /xgboost-mnist/train 44 | contentType: text/csv 45 | compressionType: None 46 | - channelName: validation 47 | dataSource: 48 | fileSystemDataSource: 49 | fileSystemType: EFS 50 | fileSystemAccessMode: ro 51 | fileSystemId: fs-e2466d9b 52 | directoryPath: /xgboost-mnist/validation 53 | contentType: text/csv 54 | compressionType: None 55 | vpcConfig: 56 | securityGroupIds: 57 | - sg-4b23c421 58 | subnets: 59 | - subnet-ae0a65c6 60 | - subnet-2af0fa67 61 | - subnet-dddb7fa7 -------------------------------------------------------------------------------- /samples/fsx-xgboost-mnist-trainingjob.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: TrainingJob 3 | metadata: 4 | name: fsx-xgboost-mnist 5 | spec: 6 | hyperParameters: 7 | - name: max_depth 8 | value: "5" 9 | - name: eta 10 | value: "0.2" 11 | - name: gamma 12 | value: "4" 13 | - name: min_child_weight 14 | value: "6" 15 | - name: silent 16 | value: "0" 17 | - name: objective 18 | value: multi:softmax 19 | - name: num_class 20 | value: "10" 21 | - name: num_round 22 | value: "10" 23 | algorithmSpecification: 24 | trainingImage: 433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:1 25 | trainingInputMode: File 26 | roleArn: arn:aws:iam::123456789012:role/service-role/AmazonSageMaker-ExecutionRole 27 | region: us-west-2 28 | outputDataConfig: 29 | s3OutputPath: s3://my-bucket/xgboost 30 | resourceConfig: 31 | instanceCount: 1 32 | instanceType: ml.m4.xlarge 33 | volumeSizeInGB: 5 34 | stoppingCondition: 35 | maxRuntimeInSeconds: 86400 36 | inputDataConfig: 37 | - channelName: train 38 | dataSource: 39 | fileSystemDataSource: 40 | fileSystemType: FSxLustre 41 | fileSystemAccessMode: ro 42 | fileSystemId: fs-0d0f8e3f721664c98 43 | directoryPath: /fsx/sagemaker/xgboost-mnist 44 | contentType: text/csv 45 | compressionType: None 46 | - channelName: validation 47 | dataSource: 48 | fileSystemDataSource: 49 | fileSystemType: FSxLustre 50 | fileSystemAccessMode: ro 51 | fileSystemId: fs-0d0f8e3f721664c98 52 | directoryPath: /fsx/sagemaker/xgboost-mnist 53 | contentType: text/csv 54 | compressionType: None 55 | vpcConfig: 56 | securityGroupIds: 57 | - sg-4b23c421 58 | subnets: 59 | - subnet-ae0a65c6 60 | - subnet-2af0fa67 61 | - subnet-dddb7fa7 62 | -------------------------------------------------------------------------------- /samples/hap-custom-metric.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: HostingAutoscalingPolicy 3 | metadata: 4 | name: hap-custom 5 | spec: 6 | resourceId: 7 | - endpointName: Endpoint-20200410010239-FZFY 8 | variantName: variant-name-1 9 | region: us-east-2 10 | policyName: CPUUtilizationScalingPolicy 11 | policyType: TargetTrackingScaling 12 | minCapacity: 1 13 | maxCapacity: 2 14 | suspendedState: 15 | dynamicScalingInSuspended: true 16 | targetTrackingScalingPolicyConfiguration: 17 | targetValue: 60.0 18 | scaleInCooldown: 700 19 | scaleOutCooldown: 300 20 | customizedMetricSpecification: 21 | metricName: CPUUtilization 22 | namespace: /aws/sagemaker/Endpoints 23 | dimensions: 24 | - name: EndpointName 25 | value: Endpoint-20200410010239-FZFY 26 | - name: VariantName 27 | value: variant-name-1 28 | statistic: Average 29 | unit: Percent 30 | -------------------------------------------------------------------------------- /samples/hap-predefined-metric.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: HostingAutoscalingPolicy 3 | metadata: 4 | name: hap-predefined 5 | spec: 6 | resourceId: 7 | - endpointName: xgboost-2020-06-01-20-39-37-305 8 | variantName: AllTraffic 9 | - endpointName: Endpoint-20200410015622-Z0LK 10 | variantName: variant-name-1 11 | region: us-east-2 12 | policyName: SageMakerEndpointInvocationScalingPolicy 13 | policyType: TargetTrackingScaling 14 | minCapacity: 1 15 | maxCapacity: 1 16 | targetTrackingScalingPolicyConfiguration: 17 | targetValue: 60.0 18 | predefinedMetricSpecification: 19 | predefinedMetricType: SageMakerVariantInvocationsPerInstance 20 | scaleInCooldown: 700 21 | scaleOutCooldown: 300 22 | disableScaleIn: true 23 | -------------------------------------------------------------------------------- /samples/image-classifier-augmented-manifest.yaml: -------------------------------------------------------------------------------- 1 | # This example uses the AWS SageMaker first party Image Classification algorithm. 2 | # It also demonstrates the use of AugmentedManifestFiles as an input source from 3 | # S3. For more information regarding this algorithm, visit 4 | # https://docs.aws.amazon.com/sagemaker/latest/dg/image-classification.html. 5 | apiVersion: sagemaker.aws.amazon.com/v1 6 | kind: TrainingJob 7 | metadata: 8 | name: augmented-s3-manifest-image-classifier 9 | spec: 10 | roleArn: arn:aws:iam::123456789012:role/service-role/AmazonSageMaker-ExecutionRole 11 | region: us-east-1 12 | algorithmSpecification: 13 | trainingImage: 811284229777.dkr.ecr.us-east-1.amazonaws.com/image-classification:1 14 | trainingInputMode: Pipe 15 | outputDataConfig: 16 | s3OutputPath: s3://my-bucket/models 17 | inputDataConfig: 18 | - channelName: train 19 | dataSource: 20 | s3DataSource: 21 | s3DataType: AugmentedManifestFile 22 | s3Uri: s3://my-bucket/train_lst/train_manifest.json 23 | s3DataDistributionType: FullyReplicated 24 | attributeNames: ["source-ref", "class"] 25 | contentType: application/x-image 26 | compressionType: None 27 | recordWrapperType: RecordIO 28 | - channelName: validation 29 | dataSource: 30 | s3DataSource: 31 | s3DataType: AugmentedManifestFile 32 | s3Uri: s3://my-bucket/val_lst/val_manifest.json 33 | s3DataDistributionType: FullyReplicated 34 | attributeNames: ["source-ref", "class"] 35 | contentType: application/x-image 36 | compressionType: None 37 | recordWrapperType: RecordIO 38 | resourceConfig: 39 | instanceCount: 4 40 | instanceType: ml.p3.16xlarge 41 | volumeSizeInGB: 5 42 | hyperParameters: 43 | - name: top_k 44 | value: "1" 45 | - name: num_training_samples 46 | value: "105434" 47 | - name: mini_batch_size 48 | value: "32" 49 | - name: learning_rate 50 | value: "0.001" 51 | - name: image_shape 52 | value: "3,150,250" 53 | - name: precision_dtype 54 | value: "float32" 55 | - name: num_layers 56 | value: "50" 57 | - name: use_pretrained_model 58 | value: "1" 59 | - name: num_classes 60 | value: "2" 61 | - name: epochs 62 | value: "100" 63 | stoppingCondition: 64 | maxRuntimeInSeconds: 360000 65 | 66 | -------------------------------------------------------------------------------- /samples/kmeans-mnist-processingjob.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: ProcessingJob 3 | metadata: 4 | name: kmeans-mnist-processing 5 | spec: 6 | environment: 7 | - name: MYVAR 8 | value: my_value 9 | - name: MYVAR2 10 | value: my_value2 11 | appSpecification: 12 | imageUri: 763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-training:1.5.0-cpu-py36-ubuntu16.04 13 | containerEntrypoint: 14 | - python 15 | - /opt/ml/processing/code/kmeans_preprocessing.py 16 | roleArn: arn:aws:iam::123456789012:role/service-role/AmazonSageMaker-ExecutionRole 17 | region: us-west-2 18 | networkConfig: 19 | enableNetworkIsolation: False 20 | processingOutputConfig: 21 | outputs: 22 | - outputName: train_data 23 | s3Output: 24 | s3Uri: s3://my-bucket/mnist_kmeans_example/output/ 25 | localPath: /opt/ml/processing/output_train/ 26 | s3UploadMode: EndOfJob 27 | - outputName: test_data 28 | s3Output: 29 | s3Uri: s3://my-bucket/mnist_kmeans_example/output/ 30 | localPath: /opt/ml/processing/output_test/ 31 | s3UploadMode: EndOfJob 32 | - outputName: valid_data 33 | s3Output: 34 | s3Uri: s3://my-bucket/mnist_kmeans_example/output/ 35 | localPath: /opt/ml/processing/output_valid/ 36 | s3UploadMode: EndOfJob 37 | processingResources: 38 | clusterConfig: 39 | instanceCount: 1 40 | instanceType: ml.m5.xlarge 41 | volumeSizeInGB: 20 42 | processingInputs: 43 | - inputName: mnist_tar 44 | s3Input: 45 | s3Uri: s3://sagemaker-sample-data-us-west-2/algorithms/kmeans/mnist/mnist.pkl.gz 46 | localPath: /opt/ml/processing/input 47 | s3DataType: S3Prefix 48 | s3InputMode: File 49 | s3CompressionType: None 50 | - inputName: source_code 51 | s3Input: 52 | s3Uri: s3://my-bucket/mnist_kmeans_example/processing_code/kmeans_preprocessing.py 53 | localPath: /opt/ml/processing/code 54 | s3DataType: S3Prefix 55 | s3InputMode: File 56 | s3CompressionType: None 57 | stoppingCondition: 58 | maxRuntimeInSeconds: 1800 59 | tags: 60 | - key: tagKey 61 | value: tagValue 62 | -------------------------------------------------------------------------------- /samples/kmeans_preprocessing.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import gzip 3 | import numpy 4 | import io 5 | from sagemaker.amazon.common import write_numpy_to_dense_tensor 6 | 7 | print("Extracting MNIST data set") 8 | # Load the dataset 9 | with gzip.open('/opt/ml/processing/input/mnist.pkl.gz', 'rb') as f: 10 | train_set, valid_set, test_set = pickle.load(f, encoding='latin1') 11 | 12 | # process the data 13 | # Convert the training data into the format required by the SageMaker KMeans algorithm 14 | print("Writing training data") 15 | with open('/opt/ml/processing/output_train/train_data', 'wb') as train_file: 16 | write_numpy_to_dense_tensor(train_file, train_set[0], train_set[1]) 17 | 18 | print("Writing test data") 19 | with open('/opt/ml/processing/output_test/test_data', 'wb') as test_file: 20 | write_numpy_to_dense_tensor(test_file, test_set[0], test_set[1]) 21 | 22 | print("Writing validation data") 23 | # Convert the valid data into the format required by the SageMaker KMeans algorithm 24 | numpy.savetxt('/opt/ml/processing/output_valid/valid-data.csv', valid_set[0], delimiter=',', fmt='%g') 25 | -------------------------------------------------------------------------------- /samples/spot-xgboost-mnist-trainingjob.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: TrainingJob 3 | metadata: 4 | name: spot-xgboost-mnist 5 | spec: 6 | hyperParameters: 7 | - name: max_depth 8 | value: "5" 9 | - name: eta 10 | value: "0.2" 11 | - name: gamma 12 | value: "4" 13 | - name: min_child_weight 14 | value: "6" 15 | - name: silent 16 | value: "0" 17 | - name: objective 18 | value: multi:softmax 19 | - name: num_class 20 | value: "10" 21 | - name: num_round 22 | value: "10" 23 | algorithmSpecification: 24 | trainingImage: 433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:1 25 | trainingInputMode: File 26 | roleArn: arn:aws:iam::123456789012:role/service-role/AmazonSageMaker-ExecutionRole 27 | region: us-west-2 28 | outputDataConfig: 29 | s3OutputPath: s3://my-bucket/xgboost 30 | resourceConfig: 31 | instanceCount: 1 32 | instanceType: ml.m4.xlarge 33 | volumeSizeInGB: 5 34 | stoppingCondition: 35 | maxRuntimeInSeconds: 3599 36 | maxWaitTimeInSeconds: 3600 37 | enableManagedSpotTraining: true 38 | inputDataConfig: 39 | - channelName: train 40 | dataSource: 41 | s3DataSource: 42 | s3DataType: S3Prefix 43 | s3Uri: s3://my-bucket/xgboost/train/ 44 | s3DataDistributionType: FullyReplicated 45 | contentType: text/csv 46 | compressionType: None 47 | - channelName: validation 48 | dataSource: 49 | s3DataSource: 50 | s3DataType: S3Prefix 51 | s3Uri: s3://my-bucket/xgboost/validation/ 52 | s3DataDistributionType: FullyReplicated 53 | contentType: text/csv 54 | compressionType: None 55 | checkpointConfig: 56 | s3Uri: s3://my-bucket/xgboost/checkpoints/ 57 | -------------------------------------------------------------------------------- /samples/xgboost-mnist-batchtransform.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: BatchTransformJob 3 | metadata: 4 | name: xgboost-batch 5 | spec: 6 | region: us-west-2 7 | modelName: sagemaker-tensorflow-model 8 | transformInput: 9 | contentType: text/csv 10 | dataSource: 11 | s3DataSource: 12 | s3DataType: S3Prefix 13 | s3Uri: s3://sagemaker-sample-data-us-west-2/batch-transform/mnist-1000-samples 14 | transformOutput: 15 | s3OutputPath: s3://my-bucket/sagemaker-tensorflow-model 16 | transformResources: 17 | instanceCount: 2 18 | instanceType: ml.m4.xlarge 19 | -------------------------------------------------------------------------------- /samples/xgboost-mnist-custom-endpoint.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: TrainingJob 3 | metadata: 4 | name: xgboost-mnist-custom-endpoint 5 | spec: 6 | hyperParameters: 7 | - name: max_depth 8 | value: "5" 9 | - name: eta 10 | value: "0.2" 11 | - name: gamma 12 | value: "4" 13 | - name: min_child_weight 14 | value: "6" 15 | - name: silent 16 | value: "0" 17 | - name: objective 18 | value: multi:softmax 19 | - name: num_class 20 | value: "10" 21 | - name: num_round 22 | value: "10" 23 | algorithmSpecification: 24 | trainingImage: 433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:1 25 | trainingInputMode: File 26 | roleArn: arn:aws:iam::123456789012:role/service-role/AmazonSageMaker-ExecutionRole 27 | region: us-west-2 28 | sageMakerEndpoint: https://sagemaker.us-west-2.amazonaws.com 29 | outputDataConfig: 30 | s3OutputPath: s3://my-bucket/xgboost 31 | resourceConfig: 32 | instanceCount: 1 33 | instanceType: ml.m4.xlarge 34 | volumeSizeInGB: 5 35 | stoppingCondition: 36 | maxRuntimeInSeconds: 86400 37 | inputDataConfig: 38 | - channelName: train 39 | dataSource: 40 | s3DataSource: 41 | s3DataType: S3Prefix 42 | s3Uri: s3://my-bucket/xgboost/train/ 43 | s3DataDistributionType: FullyReplicated 44 | contentType: text/csv 45 | compressionType: None 46 | - channelName: validation 47 | dataSource: 48 | s3DataSource: 49 | s3DataType: S3Prefix 50 | s3Uri: s3://my-bucket/xgboost/validation/ 51 | s3DataDistributionType: FullyReplicated 52 | contentType: text/csv 53 | compressionType: None 54 | -------------------------------------------------------------------------------- /samples/xgboost-mnist-hostingdeployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: HostingDeployment 3 | metadata: 4 | name: hosting-deployment 5 | spec: 6 | region: us-east-2 7 | productionVariants: 8 | - variantName: AllTraffic 9 | modelName: xgboost-model 10 | initialInstanceCount: 1 11 | instanceType: ml.r5.large 12 | initialVariantWeight: 1 13 | models: 14 | - name: xgboost-model 15 | executionRoleArn: arn:aws:iam::123456789012:role/service-role/AmazonSageMaker-ExecutionRole 16 | containers: 17 | - containerHostname: xgboost 18 | modelDataUrl: s3://my-bucket/xgboost/model.tar.gz 19 | image: 825641698319.dkr.ecr.us-east-2.amazonaws.com/xgboost:latest 20 | -------------------------------------------------------------------------------- /samples/xgboost-mnist-trainingjob-debugger.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: TrainingJob 3 | metadata: 4 | name: xgboost-mnist-debugger 5 | spec: 6 | hyperParameters: 7 | - name: max_depth 8 | value: "5" 9 | - name: eta 10 | value: "0.2" 11 | - name: gamma 12 | value: "4" 13 | - name: min_child_weight 14 | value: "6" 15 | - name: silent 16 | value: "0" 17 | - name: objective 18 | value: reg:squarederror 19 | - name: subsample 20 | value: "0.7" 21 | - name: num_round 22 | value: "51" 23 | algorithmSpecification: 24 | trainingImage: 246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:0.90-2-cpu-py3 25 | trainingInputMode: File 26 | roleArn: arn:aws:iam::123456789012:role/service-role/AmazonSageMaker-ExecutionRole 27 | region: us-west-2 28 | outputDataConfig: 29 | s3OutputPath: s3://my-bucket/xgboost-debugger/output 30 | resourceConfig: 31 | instanceCount: 1 32 | instanceType: ml.m4.xlarge 33 | volumeSizeInGB: 5 34 | stoppingCondition: 35 | maxRuntimeInSeconds: 86400 36 | inputDataConfig: 37 | - channelName: train 38 | dataSource: 39 | s3DataSource: 40 | s3DataType: S3Prefix 41 | s3Uri: s3://my-bucket/xgboost-debugger/train 42 | s3DataDistributionType: FullyReplicated 43 | contentType: libsvm 44 | compressionType: None 45 | - channelName: validation 46 | dataSource: 47 | s3DataSource: 48 | s3DataType: S3Prefix 49 | s3Uri: s3://my-bucket/xgboost-debugger/validation 50 | s3DataDistributionType: FullyReplicated 51 | contentType: libsvm 52 | compressionType: None 53 | debugHookConfig: 54 | s3OutputPath: s3://my-bucket/xgboost-debugger/hookconfig 55 | collectionConfigurations: 56 | - collectionName: feature_importance 57 | collectionParameters: 58 | - name: save_interval 59 | value: "5" 60 | - collectionName: losses 61 | collectionParameters: 62 | - name: save_interval" 63 | value: "500" 64 | - collectionName: average_shap 65 | collectionParameters: 66 | - name: save_interval 67 | value: "5" 68 | - collectionName: metrics 69 | collectionParameters: 70 | - name: save_interval 71 | value: "5" 72 | debugRuleConfigurations: 73 | - ruleConfigurationName: LossNotDecreasing 74 | ruleEvaluatorImage: 895741380848.dkr.ecr.us-west-2.amazonaws.com/sagemaker-debugger-rules:latest 75 | ruleParameters: 76 | - name: collection_names 77 | value: metrics 78 | - name: num_steps 79 | value: "10" 80 | - name: rule_to_invoke 81 | value: LossNotDecreasing 82 | -------------------------------------------------------------------------------- /samples/xgboost-mnist-trainingjob.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: TrainingJob 3 | metadata: 4 | name: xgboost-mnist 5 | spec: 6 | hyperParameters: 7 | - name: max_depth 8 | value: "5" 9 | - name: eta 10 | value: "0.2" 11 | - name: gamma 12 | value: "4" 13 | - name: min_child_weight 14 | value: "6" 15 | - name: silent 16 | value: "0" 17 | - name: objective 18 | value: multi:softmax 19 | - name: num_class 20 | value: "10" 21 | - name: num_round 22 | value: "10" 23 | algorithmSpecification: 24 | trainingImage: 433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:1 25 | trainingInputMode: File 26 | roleArn: arn:aws:iam::123456789012:role/service-role/AmazonSageMaker-ExecutionRole 27 | region: us-west-2 28 | outputDataConfig: 29 | s3OutputPath: s3://my-bucket/xgboost/ 30 | resourceConfig: 31 | instanceCount: 1 32 | instanceType: ml.m4.xlarge 33 | volumeSizeInGB: 5 34 | stoppingCondition: 35 | maxRuntimeInSeconds: 86400 36 | inputDataConfig: 37 | - channelName: train 38 | dataSource: 39 | s3DataSource: 40 | s3DataType: S3Prefix 41 | s3Uri: s3://my-bucket/xgboost/train/ 42 | s3DataDistributionType: FullyReplicated 43 | contentType: text/csv 44 | compressionType: None 45 | - channelName: validation 46 | dataSource: 47 | s3DataSource: 48 | s3DataType: S3Prefix 49 | s3Uri: s3://my-bucket/xgboost/validation/ 50 | s3DataDistributionType: FullyReplicated 51 | contentType: text/csv 52 | compressionType: None 53 | tags: 54 | - key: tagKey 55 | value: tagValue 56 | -------------------------------------------------------------------------------- /samples/xgboost-multi-model-hostingdeployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: HostingDeployment 3 | metadata: 4 | name: multi-model-hosting-deployment 5 | spec: 6 | region: us-west-2 7 | productionVariants: 8 | - variantName: AllTraffic 9 | modelName: xgboost-model 10 | initialInstanceCount: 1 11 | instanceType: ml.r5.large 12 | initialVariantWeight: 1 13 | models: 14 | - name: multi-model-xgboost 15 | executionRoleArn: arn:aws:iam::123456789012:role/service-role/AmazonSageMaker-ExecutionRole 16 | containers: 17 | - containerHostname: xgboost 18 | modelDataUrl: s3://my-bucket/xgboost/ 19 | mode: MultiModel 20 | image: 246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:1.2-1 21 | -------------------------------------------------------------------------------- /scripts/build-release-tarball-Dockerfile: -------------------------------------------------------------------------------- 1 | # Build the release tarball. MacOS default tar creates extra tar headers which produce annoying 2 | # stderr output. By controlling the build environment we improve the experience for end-users. 3 | # 4 | FROM alpine:latest 5 | 6 | RUN mkdir -p sagemaker-k8s-operator-install-scripts 7 | WORKDIR /sagemaker-k8s-operator-install-scripts 8 | 9 | COPY ./README.md README.md 10 | COPY ./config config 11 | 12 | WORKDIR / 13 | RUN tar -czvf sagemaker-k8s-operator-install-scripts.tar.gz sagemaker-k8s-operator-install-scripts 14 | -------------------------------------------------------------------------------- /scripts/manager-builder-Dockerfile: -------------------------------------------------------------------------------- 1 | # Build the manager binary 2 | FROM golang:1.13-alpine as builder 3 | 4 | WORKDIR /workspace 5 | # Copy the Go Modules manifests 6 | COPY go.mod go.mod 7 | COPY go.sum go.sum 8 | # cache deps before building and copying source so that we don't need to re-download as much 9 | # and so that source changes don't invalidate our downloaded layer 10 | RUN go mod download 11 | 12 | # Copy the go source 13 | COPY main.go main.go 14 | COPY api/ api/ 15 | COPY controllers/ controllers/ 16 | 17 | # Build 18 | RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -a -o manager main.go 19 | 20 | # Use distroless as minimal base image to package the manager binary 21 | # Refer to https://github.com/GoogleContainerTools/distroless for more details 22 | FROM gcr.io/distroless/static:latest 23 | WORKDIR / 24 | COPY --from=builder /workspace/manager . 25 | ENTRYPOINT ["/manager"] 26 | -------------------------------------------------------------------------------- /scripts/upload_xgboost_mnist_dataset/README.md: -------------------------------------------------------------------------------- 1 | # `upload_mnist` 2 | This script uploads the MNIST dataset that is compatible with the [Amazon SageMaker XGBoost algorithm](https://docs.aws.amazon.com/sagemaker/latest/dg/xgboost.html) to the specified S3 bucket. 3 | It downloads the MNIST dataset, splits it into train, test, and validation partitions, then uploads the partitions as CSV files into S3. 4 | 5 | ## Dataset 6 | The dataset is found here: [http://deeplearning.net/data/mnist/mnist.pkl.gz](http://deeplearning.net/data/mnist/mnist.pkl.gz). The code that converts the pickled numpy format to CSV was inspired by the [Amazon SageMaker Developer Guide](https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-dg.pdf). 7 | 8 | ## Example: 9 | 10 | ```bash 11 | ./upload_xgboost_mnist_dataset --s3-bucket ${BUCKET_NAME} --s3-prefix mnist-data 12 | ``` 13 | 14 | ## Requirements 15 | * `Python3` 16 | * `boto3` 17 | * `numpy` 18 | * `argparse` 19 | -------------------------------------------------------------------------------- /scripts/upload_xgboost_mnist_dataset/upload_xgboost_mnist_dataset: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import boto3 5 | from botocore.exceptions import ClientError 6 | import csv 7 | import gzip 8 | import io 9 | import numpy as np 10 | import pickle 11 | import sys 12 | import urllib.request 13 | 14 | DATA_URL = 'http://deeplearning.net/data/mnist/mnist.pkl.gz' 15 | 16 | def main(): 17 | 18 | s3_bucket, s3_prefix = parse_args() 19 | 20 | for data_partition_name, csv_data in convert_data_to_csv(get_data_partitions(DATA_URL)): 21 | key = f'{s3_prefix}/{data_partition_name}/examples' 22 | url = f's3://{s3_bucket}/{key}' 23 | print(f'Uploading {csv_data.getbuffer().nbytes} bytes to {url}') 24 | 25 | try: 26 | boto3.Session().resource('s3').Bucket(s3_bucket).Object(key).upload_fileobj(csv_data) 27 | except ClientError as e: 28 | print(f'Unable to upload {url}: {e}') 29 | return 1 30 | 31 | 32 | def parse_args(): 33 | parser = argparse.ArgumentParser(description='Helper script that splits the MNIST dataset into train, validation, and test sets, then uploads them in CSV format to the specified s3 bucket') 34 | parser.add_argument('--s3-bucket', type=str, required=True) 35 | parser.add_argument('--s3-prefix', type=str, required=True) 36 | args = parser.parse_args() 37 | return args.s3_bucket, args.s3_prefix 38 | 39 | 40 | def get_data_partitions(url): 41 | ''' 42 | Download and pickle dataset. 43 | ''' 44 | print(f'Downloading dataset from {url}') 45 | with gzip.GzipFile(fileobj=urllib.request.urlopen(url), mode='r') as gzip_file: 46 | train_set, valid_set, test_set = pickle.load(gzip_file, encoding='latin1') 47 | return [('train', train_set), ('validation', valid_set), ('test', test_set)] 48 | 49 | 50 | def convert_data_to_csv(data_partitions): 51 | ''' 52 | Convert np data partitions to csv format. 53 | ''' 54 | for data_partition_name, data_partition in data_partitions: 55 | print(f'{data_partition_name}: {data_partition[0].shape} {data_partition[1].shape}') 56 | labels = [t.tolist() for t in data_partition[1]] 57 | features = [t.tolist() for t in data_partition[0]] 58 | 59 | if data_partition_name != 'test': 60 | examples = np.insert(features, 0, labels, axis=1) 61 | else: 62 | examples = features 63 | 64 | with io.BytesIO() as f: 65 | np.savetxt(f, examples, delimiter=',') 66 | f.seek(0) 67 | yield data_partition_name, f 68 | 69 | 70 | if __name__ == '__main__': 71 | sys.exit(main()) 72 | -------------------------------------------------------------------------------- /smlogs-kubectl-plugin/.gitignore: -------------------------------------------------------------------------------- 1 | bin 2 | cover.out 3 | -------------------------------------------------------------------------------- /smlogs-kubectl-plugin/Makefile: -------------------------------------------------------------------------------- 1 | build: fmt vet 2 | go build -o bin/kubectl-smlogs cmd/kubectl-smlogs.go 3 | 4 | build-release: 5 | CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 go build -a -o bin/kubectl-smlogs.darwin.amd64 cmd/kubectl-smlogs.go 6 | CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -a -o bin/kubectl-smlogs.linux.amd64 cmd/kubectl-smlogs.go 7 | 8 | test: fmt vet 9 | go test -v ./pkg/... -coverprofile cover.out 10 | 11 | run: fmt vet 12 | go run ./cmd/kubectl-smlogs.go 13 | 14 | fmt: 15 | go fmt ./... 16 | 17 | vet: 18 | go vet ./... 19 | -------------------------------------------------------------------------------- /smlogs-kubectl-plugin/README.md: -------------------------------------------------------------------------------- 1 | ### SMLogs Kubectl Plugin 2 | 3 | This Go module implements a [kubectl plugin](https://kubernetes.io/docs/tasks/extend-kubectl/kubectl-plugins/) for accessing cloudwatch logs of SageMaker jobs managed by the SageMaker Kubernetes operator. 4 | 5 | ### Installation 6 | The [official installation guide on kubectl plugins](https://kubernetes.io/docs/tasks/extend-kubectl/kubectl-plugins/#installing-kubectl-plugins) is helpful. To be able to run the plugin via `kubectl`, you need to 7 | place the binary on your PATH. You can either add the `./bin` directory to your path or symlink the binary into your path. 8 | 9 | ```bash 10 | # Generate the binary by running below command 11 | make build 12 | 13 | # Above command will create a binary called kubectl-smlogs in `bin` directory of your current working directory. 14 | 15 | # Symlink assumming ~/bin exists and is on your PATH: 16 | ln -s "$(pwd)"/bin/kubectl-smlogs ~/bin 17 | 18 | # PATH 19 | export PATH=$PATH:"$(pwd)"/bin 20 | ``` 21 | 22 | ### Authentication 23 | The plugin uses your local aws config. You can create an aws config [using the official AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html). 24 | 25 | ### Example Usage 26 | #### 1. Using Help 27 | 28 | Cobra generates help and usage hints based on the source which you can utilize as: 29 | `kubectl smlogs --help` 30 | 31 | This will list the command, subcommands and flags available for use. 32 | 33 | #### 2. Logs for a Training job 34 | `kubectl smlogs TrainingJob -f` 35 | 36 | or even the following works - 37 | `kubectl smlogs trainingjobs ` 38 | 39 | 40 | #### 3. Logs for a HPO job 41 | - There exists a subcommand for HPO but only to guide the user to use the right subcommand as: 42 | ``` 43 | k smlogs HyperParameterTuningJob 44 | Error: For HPO logs, Refer to the the Spawned Training Jobs. Use `kubectl get TrainingJob` to list resource names 45 | ``` 46 | 47 | - Use the following command to get a list of the HPO spawned training jobs. 48 | `kubectl get trainingjob` 49 | 50 | - and now get the logs for any one of these 51 | `kubectl smlogs TrainingJob ` 52 | 53 | 54 | #### 4. Logs for a BatchTransform job 55 | `kubectl smlogs BatchTransformJob -f` 56 | 57 | or even the following works - 58 | `kubectl smlogs batchtransformjobs ` 59 | -------------------------------------------------------------------------------- /smlogs-kubectl-plugin/cmd/kubectl-smlogs.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package main 18 | 19 | import ( 20 | "os" 21 | 22 | "github.com/spf13/pflag" 23 | 24 | "go.amzn.com/sagemaker/smlogs-kubectl-plugin/pkg/cmd" 25 | "k8s.io/cli-runtime/pkg/genericclioptions" 26 | ) 27 | 28 | func main() { 29 | flags := pflag.NewFlagSet("kubectl-smlogs", pflag.ExitOnError) 30 | pflag.CommandLine = flags 31 | 32 | root := cmd.NewCmdSmLogs(genericclioptions.IOStreams{In: os.Stdin, Out: os.Stdout, ErrOut: os.Stderr}) 33 | if err := root.Execute(); err != nil { 34 | os.Exit(1) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /smlogs-kubectl-plugin/go.mod: -------------------------------------------------------------------------------- 1 | module go.amzn.com/sagemaker/smlogs-kubectl-plugin 2 | 3 | go 1.13 4 | 5 | require ( 6 | github.com/aws/amazon-sagemaker-operator-for-k8s v0.0.0 7 | github.com/aws/aws-sdk-go v1.37.3 8 | github.com/spf13/cobra v1.1.1 9 | github.com/spf13/pflag v1.0.5 10 | k8s.io/apimachinery v0.20.0 11 | k8s.io/cli-runtime v0.20.0 12 | k8s.io/client-go v0.20.0 13 | sigs.k8s.io/controller-runtime v0.6.2 14 | ) 15 | 16 | replace github.com/aws/amazon-sagemaker-operator-for-k8s => ../ 17 | -------------------------------------------------------------------------------- /smlogs-kubectl-plugin/pkg/cmd/cloudwatchlogs_client.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package cmd 18 | 19 | import ( 20 | "github.com/aws/aws-sdk-go/aws" 21 | awsrequest "github.com/aws/aws-sdk-go/aws/request" 22 | awssession "github.com/aws/aws-sdk-go/aws/session" 23 | cloudwatchlogs "github.com/aws/aws-sdk-go/service/cloudwatchlogs" 24 | ) 25 | 26 | // Interface which enables us to mock the CloudWatchLogsClient. 27 | type mockableCloudWatchLogsClient interface { 28 | FilterLogEventsRequest(*cloudwatchlogs.FilterLogEventsInput) (*awsrequest.Request, *cloudwatchlogs.FilterLogEventsOutput) 29 | } 30 | 31 | // Concrete implementation which forwards to the actual client. 32 | type concreteCloudWatchLogsClient struct { 33 | client *cloudwatchlogs.CloudWatchLogs 34 | } 35 | 36 | // Forwarding implementation of FilterLogEventsRequest. 37 | func (m concreteCloudWatchLogsClient) FilterLogEventsRequest(input *cloudwatchlogs.FilterLogEventsInput) (*awsrequest.Request, *cloudwatchlogs.FilterLogEventsOutput) { 38 | req, output := m.client.FilterLogEventsRequest(input) 39 | return req, output 40 | } 41 | 42 | // Create client wrapped by interface to allow for mocking. 43 | func createCloudWatchLogsClientForConfig(awsConfig aws.Config) mockableCloudWatchLogsClient { 44 | session, _ := awssession.NewSessionWithOptions( 45 | awssession.Options{ 46 | SharedConfigState: awssession.SharedConfigEnable, 47 | Config: awsConfig, 48 | }) 49 | return concreteCloudWatchLogsClient{ 50 | client: cloudwatchlogs.New(session), 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /tests/build_canary.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | docker build -f images/Dockerfile.canary . -t ${IMG:-canary-test-container} --build-arg DATA_BUCKET --build-arg COMMIT_SHA --build-arg RESULT_BUCKET -------------------------------------------------------------------------------- /tests/build_integration.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | docker build -f images/Dockerfile.integration -t ${IMG:-integration-test-container} . -------------------------------------------------------------------------------- /tests/codebuild/generate_iam_role.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Helper script to generate an IAM Role needed to install operator using role-based authentication. 4 | # https://sagemaker.readthedocs.io/en/stable/amazon_sagemaker_operators_for_kubernetes.html#create-an-iam-role 5 | # 6 | # Run as: 7 | # $ ./generate_iam_role.sh ${cluster_arn/cluster_name} ${operator_namespace} ${role_name} ${cluster_region} 8 | # 9 | 10 | CLUSTER_ARN="${1}" 11 | OPERATOR_NAMESPACE="${2}" 12 | ROLE_NAME="${3}" 13 | CLUSTER_REGION="${4:-us-east-1}" 14 | aws_account=$(aws sts get-caller-identity --query Account --output text) 15 | trustfile="trust.json" 16 | 17 | # if using an existing cluster, use the cluster arn to get the region and cluster name 18 | # example, cluster_arn=arn:aws:eks:us-east-1:12345678910:cluster/test 19 | cluster_name=$(echo ${CLUSTER_ARN} | cut -d'/' -f2) 20 | 21 | # A function to get the OIDC_ID associated with an EKS cluster 22 | function get_oidc_id { 23 | # TODO: Ideally this should be based on version compatibility instead of command failure 24 | eksctl utils associate-iam-oidc-provider --cluster ${cluster_name} --region ${CLUSTER_REGION} --approve 25 | if [[ $? -ge 1 ]]; then 26 | eksctl utils associate-iam-oidc-provider --name ${cluster_name} --region ${CLUSTER_REGION} --approve 27 | fi 28 | 29 | local oidc=$(aws eks describe-cluster --name ${cluster_name} --region ${CLUSTER_REGION} --query cluster.identity.oidc.issuer --output text) 30 | oidc_id=$(echo ${oidc} | rev | cut -d'/' -f1 | rev) 31 | } 32 | 33 | # A function that generates an IAM role for the given account, cluster, namespace, region 34 | # Parameter: 35 | # $1: Name of the trust file to generate. 36 | function create_namespaced_iam_role { 37 | local trustfile="${1}" 38 | # Check if role already exists 39 | aws iam get-role --role-name ${ROLE_NAME} 40 | if [[ $? -eq 0 ]]; then 41 | echo "A role for this cluster and namespace already exists in this account, assuming sagemaker access and proceeding." 42 | else 43 | echo "IAM Role does not exist, creating a new Role for the cluster" 44 | aws iam create-role --role-name ${ROLE_NAME} --assume-role-policy-document file://${trustfile} --output=text 45 | aws iam attach-role-policy --role-name ${ROLE_NAME} --policy-arn arn:aws:iam::aws:policy/AmazonSageMakerFullAccess 46 | fi 47 | } 48 | 49 | # Remove the generated trust file 50 | # Parameter: 51 | # $1: Name of the trust file to delete. 52 | function delete_generated_file { 53 | rm "${1}" 54 | } 55 | 56 | echo "Get the OIDC ID for the cluster" 57 | get_oidc_id 58 | echo "Delete the trust json file if it already exists" 59 | delete_generated_file "${trustfile}" 60 | echo "Generate a trust json" 61 | ./generate_trust_policy.sh ${CLUSTER_REGION} ${aws_account} ${oidc_id} ${OPERATOR_NAMESPACE} > "${trustfile}" 62 | echo "Create the IAM Role using these values" 63 | create_namespaced_iam_role "${trustfile}" 64 | echo "Cleanup for the next run!" 65 | delete_generated_file "${trustfile}" 66 | 67 | -------------------------------------------------------------------------------- /tests/codebuild/generate_trust_policy.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Helper script to generate trust policy needed to install operator using role-based authentication. 4 | # https://sagemaker.readthedocs.io/en/stable/amazon_sagemaker_operators_for_kubernetes.html#create-an-iam-role 5 | # 6 | # Run as: 7 | # $ ./generate_trust_policy ${EKS_CLUSTER_REGION} ${AWS_ACCOUNT_ID} ${OIDC_ID} [optional: ${OPERATOR_NAMESPACE}] > trust.json 8 | # 9 | # For example: 10 | # $ ./generate_trust_policy us-west-2 123456789012 D48675832CA65BD10A532F597OIDCID > trust.json 11 | # This will create a file `trust.json` containing a role policy that enables the operator in an EKS cluster to assume AWS roles. 12 | # 13 | # The OPERATOR_NAMESPACE parameter is for when you want to run the operator in a custom namespace other than "sagemaker-k8s-operator-system". 14 | 15 | cluster_region="$1" 16 | account_number="$2" 17 | oidc_id="$3" 18 | operator_namespace="${4:-sagemaker-k8s-operator-system}" 19 | 20 | printf '{ 21 | "Version": "2012-10-17", 22 | "Statement": [ 23 | { 24 | "Effect": "Allow", 25 | "Principal": { 26 | "Federated": "arn:aws:iam::'"${account_number}"':oidc-provider/oidc.eks.'"${cluster_region}"'.amazonaws.com/id/'"${oidc_id}"'" 27 | }, 28 | "Action": "sts:AssumeRoleWithWebIdentity", 29 | "Condition": { 30 | "StringEquals": { 31 | "oidc.eks.'"${cluster_region}"'.amazonaws.com/id/'"${oidc_id}"':aud": "sts.amazonaws.com", 32 | "oidc.eks.'"${cluster_region}"'.amazonaws.com/id/'"${oidc_id}"':sub": "system:serviceaccount:'"${operator_namespace}"':sagemaker-k8s-operator-default" 33 | } 34 | } 35 | } 36 | ] 37 | } 38 | ' -------------------------------------------------------------------------------- /tests/codebuild/inject_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Inject environment variables into the job YAMLs 4 | function inject_variables() 5 | { 6 | variables=("ROLE_ARN" "DATA_BUCKET" "FSX_ID") 7 | 8 | local file_name="$1" 9 | for i in "${variables[@]}" 10 | do 11 | local curr_var=${!i} 12 | sed -i "s|{$i}|${curr_var}|g" "${file_name}" 13 | done 14 | } 15 | 16 | # Injects necessary environment variables into resource yaml specs. This allows 17 | # for generic integration tests to be created, substituting values that are 18 | # specific to the account they are run within. 19 | function inject_all_variables 20 | { 21 | inject_variables testfiles/xgboost-mnist-trainingjob.yaml 22 | inject_variables testfiles/kmeans-mnist-processingjob.yaml 23 | inject_variables testfiles/spot-xgboost-mnist-trainingjob.yaml 24 | inject_variables testfiles/xgboost-mnist-custom-endpoint.yaml 25 | # inject_variables testfiles/efs-xgboost-mnist-trainingjob.yaml 26 | # inject_variables testfiles/fsx-kmeans-mnist-trainingjob.yaml 27 | inject_variables testfiles/xgboost-mnist-hpo.yaml 28 | inject_variables testfiles/spot-xgboost-mnist-hpo.yaml 29 | inject_variables testfiles/xgboost-mnist-hpo-custom-endpoint.yaml 30 | inject_variables testfiles/xgboost-model.yaml 31 | inject_variables testfiles/xgboost-mnist-batchtransform.yaml 32 | inject_variables testfiles/xgboost-hosting-deployment.yaml 33 | inject_variables testfiles/xgboost-hosting-deployment-with-name.yaml 34 | inject_variables testfiles/xgboost-hosting-deployment-multi-container.yaml 35 | inject_variables testfiles/failing-xgboost-mnist-trainingjob.yaml 36 | inject_variables testfiles/failing-xgboost-mnist-hpo.yaml 37 | inject_variables testfiles/xgboost-mnist-trainingjob-debugger.yaml 38 | inject_variables testfiles/xgboost-mnist-trainingjob-namespaced.yaml 39 | inject_variables testfiles/xgboost-mnist-trainingjob-china.yaml 40 | inject_variables testfiles/xgboost-mnist-hpo-china.yaml 41 | inject_variables testfiles/xgboost-model-china.yaml 42 | inject_variables testfiles/xgboost-mnist-batchtransform-china.yaml 43 | inject_variables testfiles/hd-retain-varient-properties.yaml 44 | } 45 | -------------------------------------------------------------------------------- /tests/codebuild/local-run/.env.example: -------------------------------------------------------------------------------- 1 | # The bucket containing the test data 2 | DATA_BUCKET= 3 | 4 | # The bucket containing the installation scripts 5 | ALPHA_TARBALL_BUCKET= 6 | 7 | # The particular commit SHA to test against 8 | CODEBUILD_RESOLVED_SOURCE_VERSION= 9 | 10 | # (Optional) Skip installation of the CRDs and controller into the cluster 11 | # SKIP_INSTALLATION=true 12 | 13 | # The credentials to pass the operator installation scripts 14 | OPERATOR_AWS_ACCESS_KEY_ID= 15 | OPERATOR_AWS_SECRET_ACCESS_KEY= 16 | 17 | # The role ARN to inject into the SageMaker jobs 18 | ROLE_ARN=arn:aws:iam:: 19 | 20 | # (Optional) An existing FSX cluster with training data 21 | # FSX_ID=fs-0c80e27f4c1c49d96 22 | 23 | # (Optional) Print all controller logs in the event of a failure 24 | # PRINT_DEBUG=false -------------------------------------------------------------------------------- /tests/codebuild/local-run/.gitignore: -------------------------------------------------------------------------------- 1 | local-kubeconfig 2 | .env -------------------------------------------------------------------------------- /tests/codebuild/local-run/README.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | The files in this directory allow a developer to run integration tests locally, in almost exactly the same environment as they would run in Codebuild, against their own cluster. This speeds up the development process of integration tests as you don't have to wait for an EKS cluster to spin up or spin down. This can also be used to run integration tests against a change (for example, before a developer pushes a CR), albiet with some caveats. 3 | 4 | ## How to run against an existing cluster 5 | 6 | ### 1. Configure your environment variables 7 | Environment variables are the way we configure the options and debugging modes for our integration tests. 8 | We provide an example `.env` file (`.env.example`) which contains a list of each option and how to configure 9 | your integration tests. 10 | 11 | ```bash 12 | cp .env.example .env 13 | vim .env 14 | ``` 15 | 16 | ### 2. Run integration test script 17 | 18 | ```bash 19 | KUBECONFIG=/path/to/kubeconfig ./run_integration_test_against_existing_cluster.sh 20 | ``` 21 | 22 | This will build a Docker image that is based on the integration test Docker image. The script will copy the kubeconfig file specified (`~/.kube/config` if none specified) into the Docker image so that the integration tests use your existing cluster. The script then uses AWS CodeBuild's [tool](https://github.com/aws/aws-codebuild-docker-images/tree/master/local_builds) for running CodeBuild tests locally to run tests. 23 | 24 | ## Notes / Caveats: 25 | * The integration test files are copied into the Docker container at runtime (akin to cloning a repo), so you do not need to push them anywhere beforehand. 26 | * Our tests are currently set up to pull an installation package from S3. This package contains the CRDs and controller image, as well as any binaries we hope to test. If you would like to install your CRDs prior, and run the controller locally, use the `SKIP_INSTALLATION=true` option in your `.env`. 27 | -------------------------------------------------------------------------------- /tests/codebuild/local-run/local-codebuild-Dockerfile: -------------------------------------------------------------------------------- 1 | # Start from the integration test image as a base image. 2 | FROM integration-test-container 3 | 4 | # Install aws-iam-authenticator, it is used by EKS kubeconfigs. 5 | RUN curl -SO https://amazon-eks.s3-us-west-2.amazonaws.com/1.14.6/2019-08-22/bin/linux/amd64/aws-iam-authenticator && chmod +x aws-iam-authenticator && mv aws-iam-authenticator /bin 6 | 7 | # Create the default kubeconfig directory and copy the kubeconfig to it. 8 | RUN mkdir -p /root/.kube/ 9 | COPY local-kubeconfig /root/.kube/config 10 | -------------------------------------------------------------------------------- /tests/codebuild/local-run/run_integration_test_against_existing_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | if [ -z "$KUBECONFIG" ]; then 4 | KUBECONFIG=~/.kube/config 5 | fi 6 | 7 | if [ ! -f "$KUBECONFIG" ]; then 8 | echo "Kubeconfig '$KUBECONFIG' does not seem to exist" 9 | exit 1 10 | fi 11 | 12 | set -e 13 | 14 | echo "Pulling latest aws-codebuild-local image" 15 | docker pull amazon/aws-codebuild-local:latest --disable-content-trust=false 16 | 17 | echo "Copying kubeconfig to ${pwd}, so that it can be included in local docker image." 18 | cp "$KUBECONFIG" local-kubeconfig 19 | 20 | echo "Adding layer to integration test container with your kubeconfig" 21 | docker build . -f local-codebuild-Dockerfile -t local-codebuild 22 | 23 | echo "Running integration test" 24 | # AWS_REGION must be us-west-2 because our pipelines are defined there. 25 | AWS_REGION=us-west-2 ./codebuild_build.sh \ 26 | -e .env \ 27 | -i local-codebuild \ 28 | -a ./artifact \ 29 | -c \ 30 | -b ../../../codebuild/integration_test.yaml \ 31 | -s "$(realpath ../../../)" 32 | -------------------------------------------------------------------------------- /tests/codebuild/private-link-test/generate-deny-egress-yaml: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Helper script to produce NetworkPolicy that only allows DNS to all IPs and 4 | HTTPS to certain IPs. 5 | Each IP should be provided on its own line to stdin. The NetworkPolicy yaml 6 | definition is provided to stdout. 7 | 8 | Usage: 9 | $ ./generate-deny-egress-yaml << EOF 10 | 127.0.1.2 11 | 169.23.23.23 12 | EOF 13 | """ 14 | 15 | import sys 16 | 17 | base = """ 18 | apiVersion: networking.k8s.io/v1 19 | kind: NetworkPolicy 20 | metadata: 21 | name: deny-egress-for-private-link-test 22 | namespace: sagemaker-k8s-operator-system 23 | spec: 24 | podSelector: {} 25 | policyTypes: 26 | - Egress 27 | egress: 28 | # Allow DNS traffic 29 | - to: 30 | - ipBlock: 31 | cidr: 0.0.0.0/0 32 | ports: 33 | - protocol: UDP 34 | port: 53""" 35 | 36 | per_ip = """ 37 | # Allow HTTPS traffic to the specified IP 38 | - to: 39 | - ipBlock: 40 | cidr: %s/32 41 | ports: 42 | - protocol: TCP 43 | port: 443""" 44 | 45 | print(base, end='') 46 | for line in sys.stdin: 47 | print(per_ip % line.strip(), end='') 48 | print('') 49 | -------------------------------------------------------------------------------- /tests/codebuild/private-link-test/non-private-link-trainingjob.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: TrainingJob 3 | metadata: 4 | name: non-private-link-training-job 5 | spec: 6 | hyperParameters: 7 | - name: max_depth 8 | value: "5" 9 | - name: eta 10 | value: "0.2" 11 | - name: gamma 12 | value: "4" 13 | - name: min_child_weight 14 | value: "6" 15 | - name: silent 16 | value: "0" 17 | - name: objective 18 | value: multi:softmax 19 | - name: num_class 20 | value: "10" 21 | - name: num_round 22 | value: "10" 23 | algorithmSpecification: 24 | trainingImage: 433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:1 25 | trainingInputMode: File 26 | roleArn: {ROLE_ARN} 27 | region: us-west-2 28 | outputDataConfig: 29 | s3OutputPath: s3://{DATA_BUCKET}/xgboost 30 | resourceConfig: 31 | instanceCount: 1 32 | instanceType: ml.m4.xlarge 33 | volumeSizeInGB: 5 34 | stoppingCondition: 35 | maxRuntimeInSeconds: 86400 36 | inputDataConfig: 37 | - channelName: train 38 | dataSource: 39 | s3DataSource: 40 | s3DataType: S3Prefix 41 | s3Uri: s3://{DATA_BUCKET}/train/ 42 | s3DataDistributionType: FullyReplicated 43 | contentType: text/csv 44 | compressionType: None 45 | - channelName: validation 46 | dataSource: 47 | s3DataSource: 48 | s3DataType: S3Prefix 49 | s3Uri: s3://{DATA_BUCKET}/validation/ 50 | s3DataDistributionType: FullyReplicated 51 | contentType: text/csv 52 | compressionType: None 53 | -------------------------------------------------------------------------------- /tests/codebuild/private-link-test/private-link-trainingjob.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: TrainingJob 3 | metadata: 4 | name: private-link-training-job 5 | spec: 6 | region: us-west-2 7 | # Replaced by integration test. 8 | sageMakerEndpoint: SAGEMAKER_ENDPOINT 9 | hyperParameters: 10 | - name: max_depth 11 | value: "5" 12 | - name: eta 13 | value: "0.2" 14 | - name: gamma 15 | value: "4" 16 | - name: min_child_weight 17 | value: "6" 18 | - name: silent 19 | value: "0" 20 | - name: objective 21 | value: multi:softmax 22 | - name: num_class 23 | value: "10" 24 | - name: num_round 25 | value: "10" 26 | algorithmSpecification: 27 | trainingImage: 433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:1 28 | trainingInputMode: File 29 | roleArn: {ROLE_ARN} 30 | outputDataConfig: 31 | s3OutputPath: s3://{DATA_BUCKET}/xgboost 32 | resourceConfig: 33 | instanceCount: 1 34 | instanceType: ml.m4.xlarge 35 | volumeSizeInGB: 5 36 | stoppingCondition: 37 | maxRuntimeInSeconds: 86400 38 | inputDataConfig: 39 | - channelName: train 40 | dataSource: 41 | s3DataSource: 42 | s3DataType: S3Prefix 43 | s3Uri: s3://{DATA_BUCKET}/train/ 44 | s3DataDistributionType: FullyReplicated 45 | contentType: text/csv 46 | compressionType: None 47 | - channelName: validation 48 | dataSource: 49 | s3DataSource: 50 | s3DataType: S3Prefix 51 | s3Uri: s3://{DATA_BUCKET}/validation/ 52 | s3DataDistributionType: FullyReplicated 53 | contentType: text/csv 54 | compressionType: None 55 | -------------------------------------------------------------------------------- /tests/codebuild/run_all_sample_canary_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source create_tests.sh 4 | source feature_tests.sh 5 | source delete_tests.sh 6 | source inject_tests.sh 7 | source smlogs_tests.sh 8 | 9 | run_canary_tests "default" 10 | run_feature_canary_tests "default" 11 | verify_canary_tests "default" 12 | run_update_canary_tests "default" 13 | verify_update_canary_tests "default" 14 | verify_feature_canary_tests "default" 15 | run_smlogs_canary_tests "default" 16 | delete_all_resources "default" # Delete all existing resources to re-use metadata names 17 | run_delete_canary_tests "default" 18 | -------------------------------------------------------------------------------- /tests/codebuild/run_all_sample_canary_tests_china.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source create_tests.sh 4 | source feature_tests.sh 5 | source delete_tests.sh 6 | source inject_tests.sh 7 | source smlogs_tests.sh 8 | 9 | run_canary_tests_china "default" 10 | verify_canary_tests_china "default" 11 | run_update_canary_tests "default" 12 | verify_update_canary_tests "default" 13 | delete_all_resources "default" # Delete all existing resources to re-use metadata names 14 | -------------------------------------------------------------------------------- /tests/codebuild/run_all_sample_namespace_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source create_tests.sh 4 | source delete_tests.sh 5 | source inject_tests.sh 6 | source feature_tests.sh 7 | 8 | crd_namespace="${1}" 9 | noop_namespace="noop-namespace" 10 | 11 | run_canary_tests "${crd_namespace}" 12 | verify_canary_tests "${crd_namespace}" 13 | run_namespace_deployment_tests "${noop_namespace}" # Deploy job to namespace without operator. 14 | verify_job_fails_outside_operator_namespace "${noop_namespace}" 15 | delete_all_resources "${crd_namespace}" # Delete all existing resources to re-use metadata names 16 | delete_all_resources "${noop_namespace}" 17 | run_delete_canary_tests "${crd_namespace}" -------------------------------------------------------------------------------- /tests/codebuild/run_all_sample_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source create_tests.sh 4 | source update_tests.sh 5 | source feature_tests.sh 6 | source delete_tests.sh 7 | source inject_tests.sh 8 | source smlogs_tests.sh 9 | 10 | crd_namespace=${1} 11 | 12 | run_integration_tests ${crd_namespace} 13 | run_feature_integration_tests ${crd_namespace} 14 | verify_integration_tests ${crd_namespace} 15 | # run_update_integration_tests ${crd_namespace} 16 | # verify_update_integration_tests ${crd_namespace} 17 | verify_feature_integration_tests ${crd_namespace} 18 | run_smlogs_integration_tests ${crd_namespace} 19 | delete_all_resources ${crd_namespace} # Delete all existing resources to re-use metadata names 20 | run_delete_integration_tests ${crd_namespace} -------------------------------------------------------------------------------- /tests/codebuild/smlogs_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Ensure that the smlogs kubectl plugin is able to detect and properly process the output of each job type. 4 | function run_smlogs_canary_tests 5 | { 6 | echo "Running smlogs canary tests" 7 | # Verify smlogs worked. 8 | if [ "$(kubectl smlogs trainingjob xgboost-mnist | wc -l)" -lt "1" ]; then 9 | echo "smlogs trainingjob did not produce any output." 10 | exit 1 11 | fi 12 | if [ "$(kubectl smlogs batchtransformjob xgboost-batch | wc -l)" -lt "1" ]; then 13 | echo "smlogs batchtransformjob did not produce any output." 14 | exit 1 15 | fi 16 | } 17 | 18 | function run_smlogs_integration_tests 19 | { 20 | echo "Running smlogs integration tests" 21 | run_smlogs_canary_tests 22 | } 23 | -------------------------------------------------------------------------------- /tests/codebuild/testfiles/efs-xgboost-mnist-trainingjob.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: TrainingJob 3 | metadata: 4 | name: efs-xgboost-mnist 5 | spec: 6 | hyperParameters: 7 | - name: max_depth 8 | value: "5" 9 | - name: eta 10 | value: "0.2" 11 | - name: gamma 12 | value: "4" 13 | - name: min_child_weight 14 | value: "6" 15 | - name: silent 16 | value: "0" 17 | - name: objective 18 | value: multi:softmax 19 | - name: num_class 20 | value: "10" 21 | - name: num_round 22 | value: "10" 23 | algorithmSpecification: 24 | trainingImage: 433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:1 25 | trainingInputMode: File 26 | roleArn: {ROLE_ARN} 27 | region: us-west-2 28 | outputDataConfig: 29 | s3OutputPath: s3://{DATA_BUCKET}/xgboost 30 | resourceConfig: 31 | instanceCount: 1 32 | instanceType: ml.m4.xlarge 33 | volumeSizeInGB: 5 34 | stoppingCondition: 35 | maxRuntimeInSeconds: 86400 36 | inputDataConfig: 37 | - channelName: train 38 | dataSource: 39 | fileSystemDataSource: 40 | fileSystemType: EFS 41 | fileSystemAccessMode: ro 42 | fileSystemId: fs-e2466d9b 43 | directoryPath: /xgboost-mnist/train 44 | contentType: text/csv 45 | compressionType: None 46 | - channelName: validation 47 | dataSource: 48 | fileSystemDataSource: 49 | fileSystemType: EFS 50 | fileSystemAccessMode: ro 51 | fileSystemId: fs-e2466d9b 52 | directoryPath: /xgboost-mnist/validation 53 | contentType: text/csv 54 | compressionType: None 55 | vpcConfig: 56 | securityGroupIds: 57 | - sg-4ace4a2f 58 | subnets: 59 | - subnet-e5b2538e 60 | - subnet-c7247bbd 61 | - subnet-c34bcc8f 62 | -------------------------------------------------------------------------------- /tests/codebuild/testfiles/failing-xgboost-mnist-trainingjob.yaml: -------------------------------------------------------------------------------- 1 | # This TrainingJob was designed to fail during the "Training" phase. 2 | # The image specified refers to a plain Tensorflow DLC image that does not have 3 | # the correct entrypoint for a SageMaker training job. This will fail with a 4 | # python exception that should be captured by the operator. 5 | apiVersion: sagemaker.aws.amazon.com/v1 6 | kind: TrainingJob 7 | metadata: 8 | name: failing-xgboost-mnist 9 | spec: 10 | hyperParameters: 11 | - name: max_depth 12 | value: "5" 13 | - name: eta 14 | value: "0.2" 15 | - name: gamma 16 | value: "4" 17 | - name: min_child_weight 18 | value: "6" 19 | - name: silent 20 | value: "0" 21 | - name: objective 22 | value: multi:softmax 23 | - name: num_class 24 | value: "10" 25 | - name: num_round 26 | value: "10" 27 | algorithmSpecification: 28 | trainingImage: 763104351884.dkr.ecr.us-west-2.amazonaws.com/tensorflow-training:2.0.1-gpu-py36-cu100-ubuntu18.04 29 | trainingInputMode: File 30 | roleArn: {ROLE_ARN} 31 | region: us-west-2 32 | outputDataConfig: 33 | s3OutputPath: s3://{DATA_BUCKET}/xgboost/ 34 | resourceConfig: 35 | instanceCount: 1 36 | instanceType: ml.m4.xlarge 37 | volumeSizeInGB: 5 38 | stoppingCondition: 39 | maxRuntimeInSeconds: 86400 40 | inputDataConfig: 41 | - channelName: train 42 | dataSource: 43 | s3DataSource: 44 | s3DataType: S3Prefix 45 | s3Uri: s3://{DATA_BUCKET}/train/ 46 | s3DataDistributionType: FullyReplicated 47 | contentType: text/csv 48 | compressionType: None 49 | - channelName: validation 50 | dataSource: 51 | s3DataSource: 52 | s3DataType: S3Prefix 53 | s3Uri: s3://{DATA_BUCKET}/validation/ 54 | s3DataDistributionType: FullyReplicated 55 | contentType: text/csv 56 | compressionType: None 57 | tags: 58 | - key: tagKey 59 | value: tagValue -------------------------------------------------------------------------------- /tests/codebuild/testfiles/fsx-kmeans-mnist-trainingjob.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: TrainingJob 3 | metadata: 4 | name: fsx-kmeans-mnist 5 | spec: 6 | hyperParameters: 7 | - name: k 8 | value: "10" 9 | - name: feature_dim 10 | value: "784" 11 | algorithmSpecification: 12 | trainingImage: 174872318107.dkr.ecr.us-west-2.amazonaws.com/kmeans:1 13 | trainingInputMode: File 14 | roleArn: {ROLE_ARN} 15 | region: us-west-2 16 | outputDataConfig: 17 | s3OutputPath: s3://{DATA_BUCKET}/kmeans_mnist_example/output 18 | resourceConfig: 19 | instanceCount: 1 20 | instanceType: ml.m4.xlarge 21 | volumeSizeInGB: 5 22 | stoppingCondition: 23 | maxRuntimeInSeconds: 86400 24 | inputDataConfig: 25 | - channelName: train 26 | dataSource: 27 | fileSystemDataSource: 28 | fileSystemType: FSxLustre 29 | fileSystemAccessMode: ro 30 | fileSystemId: "{FSX_ID}" 31 | directoryPath: /fsx/kmeans_mnist_example/input 32 | contentType: "text/csv;label_size=0" 33 | compressionType: None 34 | vpcConfig: 35 | securityGroupIds: 36 | - sg-e6b84ca2 37 | subnets: 38 | - subnet-187e9960 39 | - subnet-21d8d86a 40 | - subnet-91b1d8ba 41 | -------------------------------------------------------------------------------- /tests/codebuild/testfiles/hd-autoscaling-retain-varient-properties.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: HostingAutoscalingPolicy 3 | metadata: 4 | name: hosting-retain-varient-properties 5 | spec: 6 | resourceId: 7 | - endpointName: hosting-retain-varient-properties 8 | variantName: AllTraffic 9 | region: us-west-2 10 | policyName: SageMakerEndpointInvocationScalingPolicy 11 | policyType: TargetTrackingScaling 12 | minCapacity: 2 13 | maxCapacity: 2 14 | targetTrackingScalingPolicyConfiguration: 15 | targetValue: 70.0 16 | predefinedMetricSpecification: 17 | predefinedMetricType: SageMakerVariantInvocationsPerInstance 18 | scaleInCooldown: 700 19 | scaleOutCooldown: 300 20 | disableScaleIn: true 21 | -------------------------------------------------------------------------------- /tests/codebuild/testfiles/hd-retain-varient-properties.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: HostingDeployment 3 | metadata: 4 | name: hosting-retain-varient-properties 5 | spec: 6 | endpointName: hosting-retain-varient-properties 7 | region: us-west-2 8 | retainAllVariantProperties: true 9 | excludeRetainedVariantProperties: 10 | - variantPropertyType: "DesiredWeight" 11 | productionVariants: 12 | - variantName: AllTraffic 13 | modelName: xgboost-model 14 | initialInstanceCount: 1 15 | instanceType: ml.r5.large 16 | initialVariantWeight: 2 17 | models: 18 | - name: xgboost-model 19 | executionRoleArn: "{ROLE_ARN}" 20 | containers: 21 | - containerHostname: xgboost 22 | modelDataUrl: s3://{DATA_BUCKET}/inference/xgboost-mnist/model.tar.gz 23 | image: 433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:latest 24 | environment: 25 | - name: my_env_key 26 | value: my_env_value 27 | primaryContainer: xgboost 28 | tags: 29 | - key: name 30 | value: sagemaker-k8s-operator tag test 31 | -------------------------------------------------------------------------------- /tests/codebuild/testfiles/kmeans-mnist-processingjob.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: ProcessingJob 3 | metadata: 4 | name: kmeans-mnist 5 | spec: 6 | environment: 7 | - name: my_var 8 | value: my_value 9 | - name: my_var2 10 | value: my_value2 11 | appSpecification: 12 | imageUri: 763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-training:1.5.0-cpu-py36-ubuntu16.04 13 | containerEntrypoint: 14 | - python 15 | - /opt/ml/processing/code/kmeans_preprocessing.py 16 | roleArn: {ROLE_ARN} 17 | region: us-west-2 18 | networkConfig: 19 | enableNetworkIsolation: False 20 | processingOutputConfig: 21 | outputs: 22 | - outputName: train_data 23 | s3Output: 24 | s3Uri: s3://{DATA_BUCKET}/mnist_kmeans_example/output/ 25 | localPath: /opt/ml/processing/output_train/ 26 | s3UploadMode: EndOfJob 27 | - outputName: test_data 28 | s3Output: 29 | s3Uri: s3://{DATA_BUCKET}/mnist_kmeans_example/output1/ 30 | localPath: /opt/ml/processing/output_test/ 31 | s3UploadMode: EndOfJob 32 | - outputName: valid_data 33 | s3Output: 34 | s3Uri: s3://{DATA_BUCKET}/mnist_kmeans_example/output1/ 35 | localPath: /opt/ml/processing/output_valid/ 36 | s3UploadMode: EndOfJob 37 | processingResources: 38 | clusterConfig: 39 | instanceCount: 1 40 | instanceType: ml.m5.xlarge 41 | volumeSizeInGB: 20 42 | processingInputs: 43 | - inputName: mnist_tar 44 | s3Input: 45 | s3Uri: s3://sagemaker-sample-data-us-west-2/algorithms/kmeans/mnist/mnist.pkl.gz 46 | localPath: /opt/ml/processing/input 47 | s3DataType: S3Prefix 48 | s3InputMode: File 49 | s3CompressionType: None 50 | - inputName: source_code 51 | s3Input: 52 | s3Uri: s3://{DATA_BUCKET}/mnist_kmeans_example/processing_code/kmeans_preprocessing.py 53 | localPath: /opt/ml/processing/code 54 | s3DataType: S3Prefix 55 | s3InputMode: File 56 | s3CompressionType: None 57 | tags: 58 | - key: tagKey 59 | value: tagValue 60 | -------------------------------------------------------------------------------- /tests/codebuild/testfiles/spot-xgboost-mnist-trainingjob.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: TrainingJob 3 | metadata: 4 | name: spot-xgboost-mnist 5 | spec: 6 | hyperParameters: 7 | - name: max_depth 8 | value: "5" 9 | - name: eta 10 | value: "0.2" 11 | - name: gamma 12 | value: "4" 13 | - name: min_child_weight 14 | value: "6" 15 | - name: silent 16 | value: "0" 17 | - name: objective 18 | value: multi:softmax 19 | - name: num_class 20 | value: "10" 21 | - name: num_round 22 | value: "10" 23 | algorithmSpecification: 24 | trainingImage: 433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:1 25 | trainingInputMode: File 26 | roleArn: {ROLE_ARN} 27 | region: us-west-2 28 | outputDataConfig: 29 | s3OutputPath: s3://{DATA_BUCKET}/xgboost 30 | resourceConfig: 31 | instanceCount: 1 32 | instanceType: ml.m4.xlarge 33 | volumeSizeInGB: 5 34 | stoppingCondition: 35 | maxRuntimeInSeconds: 3599 36 | maxWaitTimeInSeconds: 3600 37 | enableManagedSpotTraining: true 38 | inputDataConfig: 39 | - channelName: train 40 | dataSource: 41 | s3DataSource: 42 | s3DataType: S3Prefix 43 | s3Uri: s3://{DATA_BUCKET}/train/ 44 | s3DataDistributionType: FullyReplicated 45 | contentType: text/csv 46 | compressionType: None 47 | - channelName: validation 48 | dataSource: 49 | s3DataSource: 50 | s3DataType: S3Prefix 51 | s3Uri: s3://{DATA_BUCKET}/validation/ 52 | s3DataDistributionType: FullyReplicated 53 | contentType: text/csv 54 | compressionType: None 55 | checkpointConfig: 56 | s3Uri: s3://{DATA_BUCKET}/checkpoints/ 57 | -------------------------------------------------------------------------------- /tests/codebuild/testfiles/xgboost-hosting-deployment-multi-container.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: HostingDeployment 3 | metadata: 4 | name: xgboost-hosting-multi-container 5 | spec: 6 | region: us-west-2 7 | productionVariants: 8 | - variantName: AllTraffic 9 | modelName: xgboost-model 10 | initialInstanceCount: 1 11 | instanceType: ml.r5.large 12 | initialVariantWeight: 1 13 | models: 14 | - name: xgboost-model 15 | executionRoleArn: "{ROLE_ARN}" 16 | containers: 17 | - containerHostname: xgboost-1 18 | modelDataUrl: s3://{DATA_BUCKET}/inference/xgboost-mnist/model.tar.gz 19 | image: 433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:1 20 | mode: SingleModel 21 | - containerHostname: xgboost-2 22 | modelDataUrl: s3://{DATA_BUCKET}/inference/xgboost-mnist/model.tar.gz 23 | image: 433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:1 24 | mode: SingleModel 25 | -------------------------------------------------------------------------------- /tests/codebuild/testfiles/xgboost-hosting-deployment-with-name.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: HostingDeployment 3 | metadata: 4 | name: named-xgboost-hosting 5 | spec: 6 | region: us-west-2 7 | endpointName: named-xgboost-hosting 8 | productionVariants: 9 | - variantName: AllTraffic 10 | modelName: xgboost-model 11 | initialInstanceCount: 1 12 | instanceType: ml.r5.large 13 | initialVariantWeight: 1 14 | models: 15 | - name: xgboost-model 16 | executionRoleArn: "{ROLE_ARN}" 17 | containers: 18 | - containerHostname: xgboost 19 | modelDataUrl: s3://{DATA_BUCKET}/inference/xgboost-mnist/model.tar.gz 20 | image: 433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:latest 21 | environment: 22 | - name: my_env_key 23 | value: my_env_value 24 | primaryContainer: xgboost 25 | tags: 26 | - key: name 27 | value: sagemaker-k8s-operator tag test 28 | -------------------------------------------------------------------------------- /tests/codebuild/testfiles/xgboost-hosting-deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: HostingDeployment 3 | metadata: 4 | name: xgboost-hosting 5 | spec: 6 | region: us-west-2 7 | productionVariants: 8 | - variantName: AllTraffic 9 | modelName: xgboost-model 10 | initialInstanceCount: 1 11 | instanceType: ml.r5.large 12 | initialVariantWeight: 1 13 | models: 14 | - name: xgboost-model 15 | executionRoleArn: "{ROLE_ARN}" 16 | containers: 17 | - containerHostname: xgboost 18 | modelDataUrl: s3://{DATA_BUCKET}/inference/xgboost-mnist/ 19 | image: 246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:1.2-1 20 | mode: MultiModel 21 | environment: 22 | - name: my_env_key 23 | value: my_env_value 24 | primaryContainer: xgboost 25 | tags: 26 | - key: name 27 | value: sagemaker-k8s-operator tag test 28 | -------------------------------------------------------------------------------- /tests/codebuild/testfiles/xgboost-hostingautoscaling-custom.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: HostingAutoscalingPolicy 3 | metadata: 4 | name: hap-custom-metric 5 | spec: 6 | resourceId: 7 | - endpointName: PLACEHOLDER-ENDPOINT-3 8 | variantName: AllTraffic 9 | region: us-west-2 10 | policyName: custom-scaling-policy 11 | policyType: TargetTrackingScaling 12 | minCapacity: 1 13 | maxCapacity: 1 14 | suspendedState: 15 | dynamicScalingInSuspended: true 16 | targetTrackingScalingPolicyConfiguration: 17 | targetValue: 60.0 18 | scaleInCooldown: 700 19 | scaleOutCooldown: 300 20 | customizedMetricSpecification: 21 | metricName: CPUUtilization 22 | namespace: /aws/sagemaker/Endpoints 23 | dimensions: 24 | - name: EndpointName 25 | value: PLACEHOLDER-ENDPOINT-3 26 | - name: VariantName 27 | value: AllTraffic 28 | statistic: Average 29 | unit: Percent 30 | -------------------------------------------------------------------------------- /tests/codebuild/testfiles/xgboost-hostingautoscaling.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: HostingAutoscalingPolicy 3 | metadata: 4 | name: hap-predefined 5 | spec: 6 | resourceId: 7 | - endpointName: PLACEHOLDER-ENDPOINT-1 8 | variantName: AllTraffic 9 | - endpointName: PLACEHOLDER-ENDPOINT-2 10 | variantName: AllTraffic 11 | region: us-west-2 12 | policyName: SageMakerEndpointInvocationScalingPolicy 13 | policyType: TargetTrackingScaling 14 | minCapacity: 1 15 | maxCapacity: 2 16 | targetTrackingScalingPolicyConfiguration: 17 | targetValue: 70.0 18 | predefinedMetricSpecification: 19 | predefinedMetricType: SageMakerVariantInvocationsPerInstance 20 | scaleInCooldown: 700 21 | scaleOutCooldown: 300 22 | disableScaleIn: true 23 | -------------------------------------------------------------------------------- /tests/codebuild/testfiles/xgboost-mnist-batchtransform-china.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: BatchTransformJob 3 | metadata: 4 | name: xgboost-batch-china 5 | spec: 6 | region: cn-northwest-1 7 | modelName: xgboost-model 8 | transformInput: 9 | contentType: text/csv 10 | dataSource: 11 | s3DataSource: 12 | s3DataType: S3Prefix 13 | s3Uri: s3://{DATA_BUCKET}/batch-transform/mnist-1000-samples 14 | transformOutput: 15 | s3OutputPath: s3://{DATA_BUCKET}/batch-model 16 | transformResources: 17 | instanceCount: 1 18 | instanceType: ml.m4.xlarge 19 | tags: 20 | - key: test-key 21 | value: test-value 22 | -------------------------------------------------------------------------------- /tests/codebuild/testfiles/xgboost-mnist-batchtransform.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: BatchTransformJob 3 | metadata: 4 | name: xgboost-batch 5 | spec: 6 | region: us-west-2 7 | modelName: xgboost-model 8 | transformInput: 9 | contentType: text/csv 10 | dataSource: 11 | s3DataSource: 12 | s3DataType: S3Prefix 13 | s3Uri: s3://sagemaker-sample-data-us-west-2/batch-transform/mnist-1000-samples 14 | transformOutput: 15 | s3OutputPath: s3://{DATA_BUCKET}/batch-model 16 | transformResources: 17 | instanceCount: 2 18 | instanceType: ml.m4.xlarge 19 | tags: 20 | - key: test-key 21 | value: test-value 22 | -------------------------------------------------------------------------------- /tests/codebuild/testfiles/xgboost-mnist-custom-endpoint.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: TrainingJob 3 | metadata: 4 | name: xgboost-mnist-custom-endpoint 5 | spec: 6 | hyperParameters: 7 | - name: max_depth 8 | value: "5" 9 | - name: eta 10 | value: "0.2" 11 | - name: gamma 12 | value: "4" 13 | - name: min_child_weight 14 | value: "6" 15 | - name: silent 16 | value: "0" 17 | - name: objective 18 | value: multi:softmax 19 | - name: num_class 20 | value: "10" 21 | - name: num_round 22 | value: "10" 23 | algorithmSpecification: 24 | trainingImage: 433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:1 25 | trainingInputMode: File 26 | roleArn: {ROLE_ARN} 27 | region: us-west-2 28 | sageMakerEndpoint: https://sagemaker.us-west-2.amazonaws.com 29 | outputDataConfig: 30 | s3OutputPath: s3://{DATA_BUCKET}/xgboost 31 | resourceConfig: 32 | instanceCount: 1 33 | instanceType: ml.m4.xlarge 34 | volumeSizeInGB: 5 35 | stoppingCondition: 36 | maxRuntimeInSeconds: 86400 37 | inputDataConfig: 38 | - channelName: train 39 | dataSource: 40 | s3DataSource: 41 | s3DataType: S3Prefix 42 | s3Uri: s3://{DATA_BUCKET}/train/ 43 | s3DataDistributionType: FullyReplicated 44 | contentType: text/csv 45 | compressionType: None 46 | - channelName: validation 47 | dataSource: 48 | s3DataSource: 49 | s3DataType: S3Prefix 50 | s3Uri: s3://{DATA_BUCKET}/validation/ 51 | s3DataDistributionType: FullyReplicated 52 | contentType: text/csv 53 | compressionType: None 54 | -------------------------------------------------------------------------------- /tests/codebuild/testfiles/xgboost-mnist-trainingjob-china.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: TrainingJob 3 | metadata: 4 | name: xgboost-mnist-china 5 | spec: 6 | hyperParameters: 7 | - name: max_depth 8 | value: "5" 9 | - name: eta 10 | value: "0.2" 11 | - name: gamma 12 | value: "4" 13 | - name: min_child_weight 14 | value: "6" 15 | - name: silent 16 | value: "0" 17 | - name: objective 18 | value: multi:softmax 19 | - name: num_class 20 | value: "10" 21 | - name: num_round 22 | value: "10" 23 | algorithmSpecification: 24 | trainingImage: 387376663083.dkr.ecr.cn-northwest-1.amazonaws.com.cn/xgboost:1 25 | trainingInputMode: File 26 | roleArn: {ROLE_ARN} 27 | region: cn-northwest-1 28 | outputDataConfig: 29 | s3OutputPath: s3://{DATA_BUCKET}/xgboost/ 30 | resourceConfig: 31 | instanceCount: 1 32 | instanceType: ml.m4.xlarge 33 | volumeSizeInGB: 5 34 | stoppingCondition: 35 | maxRuntimeInSeconds: 86400 36 | inputDataConfig: 37 | - channelName: train 38 | dataSource: 39 | s3DataSource: 40 | s3DataType: S3Prefix 41 | s3Uri: s3://{DATA_BUCKET}/train/ 42 | s3DataDistributionType: FullyReplicated 43 | contentType: text/csv 44 | compressionType: None 45 | - channelName: validation 46 | dataSource: 47 | s3DataSource: 48 | s3DataType: S3Prefix 49 | s3Uri: s3://{DATA_BUCKET}/validation/ 50 | s3DataDistributionType: FullyReplicated 51 | contentType: text/csv 52 | compressionType: None 53 | tags: 54 | - key: tagKey 55 | value: tagValue 56 | -------------------------------------------------------------------------------- /tests/codebuild/testfiles/xgboost-mnist-trainingjob-namespaced.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: TrainingJob 3 | metadata: 4 | name: xgboost-mnist-namespaced 5 | spec: 6 | hyperParameters: 7 | - name: max_depth 8 | value: "5" 9 | - name: eta 10 | value: "0.2" 11 | - name: gamma 12 | value: "4" 13 | - name: min_child_weight 14 | value: "6" 15 | - name: silent 16 | value: "0" 17 | - name: objective 18 | value: multi:softmax 19 | - name: num_class 20 | value: "10" 21 | - name: num_round 22 | value: "10" 23 | algorithmSpecification: 24 | trainingImage: 433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:1 25 | trainingInputMode: File 26 | roleArn: {ROLE_ARN} 27 | region: us-west-2 28 | outputDataConfig: 29 | s3OutputPath: s3://{DATA_BUCKET}/xgboost/ 30 | resourceConfig: 31 | instanceCount: 1 32 | instanceType: ml.m4.xlarge 33 | volumeSizeInGB: 5 34 | stoppingCondition: 35 | maxRuntimeInSeconds: 86400 36 | inputDataConfig: 37 | - channelName: train 38 | dataSource: 39 | s3DataSource: 40 | s3DataType: S3Prefix 41 | s3Uri: s3://{DATA_BUCKET}/train/ 42 | s3DataDistributionType: FullyReplicated 43 | contentType: text/csv 44 | compressionType: None 45 | - channelName: validation 46 | dataSource: 47 | s3DataSource: 48 | s3DataType: S3Prefix 49 | s3Uri: s3://{DATA_BUCKET}/validation/ 50 | s3DataDistributionType: FullyReplicated 51 | contentType: text/csv 52 | compressionType: None 53 | tags: 54 | - key: tagKey 55 | value: tagValue 56 | -------------------------------------------------------------------------------- /tests/codebuild/testfiles/xgboost-mnist-trainingjob.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: TrainingJob 3 | metadata: 4 | name: xgboost-mnist 5 | spec: 6 | hyperParameters: 7 | - name: max_depth 8 | value: "5" 9 | - name: eta 10 | value: "0.2" 11 | - name: gamma 12 | value: "4" 13 | - name: min_child_weight 14 | value: "6" 15 | - name: silent 16 | value: "0" 17 | - name: objective 18 | value: multi:softmax 19 | - name: num_class 20 | value: "10" 21 | - name: num_round 22 | value: "10" 23 | algorithmSpecification: 24 | trainingImage: 433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:1 25 | trainingInputMode: File 26 | roleArn: {ROLE_ARN} 27 | region: us-west-2 28 | outputDataConfig: 29 | s3OutputPath: s3://{DATA_BUCKET}/xgboost/ 30 | resourceConfig: 31 | instanceCount: 1 32 | instanceType: ml.m4.xlarge 33 | volumeSizeInGB: 5 34 | stoppingCondition: 35 | maxRuntimeInSeconds: 86400 36 | inputDataConfig: 37 | - channelName: train 38 | dataSource: 39 | s3DataSource: 40 | s3DataType: S3Prefix 41 | s3Uri: s3://{DATA_BUCKET}/train/ 42 | s3DataDistributionType: FullyReplicated 43 | contentType: text/csv 44 | compressionType: None 45 | - channelName: validation 46 | dataSource: 47 | s3DataSource: 48 | s3DataType: S3Prefix 49 | s3Uri: s3://{DATA_BUCKET}/validation/ 50 | s3DataDistributionType: FullyReplicated 51 | contentType: text/csv 52 | compressionType: None 53 | tags: 54 | - key: tagKey 55 | value: tagValue 56 | -------------------------------------------------------------------------------- /tests/codebuild/testfiles/xgboost-model-china.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: Model 3 | metadata: 4 | name: xgboost-model-china 5 | spec: 6 | primaryContainer: 7 | containerHostname: xgboost 8 | modelDataUrl: s3://{DATA_BUCKET}/batch-transform/xgboost-mnist/model.tar.gz 9 | image: 387376663083.dkr.ecr.cn-northwest-1.amazonaws.com.cn/xgboost:latest 10 | region: cn-northwest-1 11 | executionRoleArn: {ROLE_ARN} 12 | tags: 13 | - key: key 14 | value: value 15 | -------------------------------------------------------------------------------- /tests/codebuild/testfiles/xgboost-model.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: sagemaker.aws.amazon.com/v1 2 | kind: Model 3 | metadata: 4 | name: xgboost-model 5 | spec: 6 | primaryContainer: 7 | containerHostname: xgboost 8 | modelDataUrl: s3://{DATA_BUCKET}/batch-transform/xgboost-mnist/model.tar.gz 9 | image: 433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:latest 10 | region: us-west-2 11 | executionRoleArn: {ROLE_ARN} 12 | tags: 13 | - key: key 14 | value: value 15 | -------------------------------------------------------------------------------- /tests/codebuild/update_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source common.sh 4 | source inject_tests.sh 5 | 6 | # Updates the spec and re-applies to ensure updates work as expected. 7 | # Parameter: 8 | # $1: Namespace of the CRD 9 | function run_update_canary_tests 10 | { 11 | local crd_namespace="$1" 12 | } 13 | 14 | # Parameter: 15 | # $1: CRD namespace 16 | function run_update_integration_tests 17 | { 18 | echo "Running update integration tests" 19 | local crd_namespace="$1" 20 | run_update_canary_tests "${crd_namespace}" 21 | 22 | echo "Injecting variables into tests" 23 | inject_all_variables 24 | 25 | echo "Starting Update Tests" 26 | update_hap_test "${crd_namespace}" named-xgboost-hosting testfiles/xgboost-hostingautoscaling.yaml 27 | } 28 | 29 | # Verifies that all resources were created and are running/completed for the canary tests. 30 | # Parameter: 31 | # $1: Namespace of the CRD 32 | function verify_update_canary_tests 33 | { 34 | local crd_namespace="$1" 35 | echo "Verifying update tests" 36 | } 37 | 38 | 39 | # Verifies that each integration update test has completed successfully. 40 | # Parameter: 41 | # $1: CRD namespace 42 | function verify_update_integration_tests 43 | { 44 | echo "Verifying Update integration tests" 45 | local crd_namespace="$1" 46 | verify_update_canary_tests 47 | 48 | # At this point there are two variants in total(1 predefined and 1 custom) that have HAP applied. 49 | verify_test "${crd_namespace}" HostingAutoscalingPolicy hap-predefined 5m Created 50 | # verify_hap_test "2" 51 | } 52 | 53 | # Updates the ResourceID List and the MaxCapacity in the spec to check for updates 54 | # Parameter: 55 | # $1: Target namespace 56 | # $2: K8s Name of the hostingdeployment to apply autoscaling 57 | # $3: Filename of the hap test to update 58 | function update_hap_test() 59 | { 60 | local target_namespace="$1" 61 | local hosting_deployment_1="$2" 62 | local file_name="$3" 63 | local hostingdeployment_type="hostingdeployment" 64 | local updated_filename="${file_name}-updated-${target_namespace}" 65 | 66 | # Copy and update the test file 67 | cp "${file_name}" "${updated_filename}" 68 | # Update the Resource ID list to remove one endpoint/variant 69 | yq d -i "${updated_filename}" "spec.resourceId[1]" 70 | yq w -i "$updated_filename" "spec.resourceId[0].endpointName" ${hosting_deployment_1} 71 | yq w -i "$updated_filename" "spec.maxCapacity" 3 72 | 73 | # HAP Test 1: Using the Predefined Metric 74 | run_test "$target_namespace" "$updated_filename" 75 | 76 | yq w -i testfiles/xgboost-hosting-deployment.yaml "metadata.name" ${hosting_deployment_1} 77 | rm "${updated_filename}" 78 | } -------------------------------------------------------------------------------- /tests/images/Dockerfile.canary: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | RUN apt-get update && apt-get install -y curl \ 4 | wget \ 5 | git \ 6 | python \ 7 | python-pip \ 8 | vim \ 9 | sudo \ 10 | jq 11 | 12 | RUN pip install awscli 13 | 14 | # Add yq repository and install yq 15 | RUN apt-get update && apt install -y software-properties-common && sudo add-apt-repository ppa:rmescandon/yq && apt update && apt install -y yq 16 | 17 | # Install kubectl 18 | RUN curl -o kubectl https://amazon-eks.s3-us-west-2.amazonaws.com/1.12.9/2019-06-21/bin/linux/amd64/kubectl \ 19 | && chmod +x ./kubectl && cp ./kubectl /bin 20 | 21 | # Install eksctl 22 | RUN curl --silent --location "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_$(uname -s)_amd64.tar.gz" | tar xz -C /tmp && mv /tmp/eksctl /bin 23 | 24 | # Install kustomize 25 | RUN curl -LO https://github.com/kubernetes-sigs/kustomize/releases/download/kustomize%2Fv3.2.1/kustomize_kustomize.v3.2.1_linux_amd64 && mv kustomize_kustomize.v3.2.1_linux_amd64 /bin/kustomize && chmod u+x /bin/kustomize 26 | 27 | # Install dig, used for PrivateLink test. 28 | RUN apt-get install -y dnsutils 29 | 30 | # Set the environment variables 31 | ARG DATA_BUCKET 32 | ENV DATA_BUCKET=$DATA_BUCKET 33 | 34 | ARG COMMIT_SHA 35 | ENV COMMIT_SHA=$COMMIT_SHA 36 | 37 | ARG RESULT_BUCKET 38 | ENV RESULT_BUCKET=$RESULT_BUCKET 39 | 40 | # Set working directory 41 | RUN mkdir -p /app/testfiles 42 | WORKDIR /app/testfiles 43 | 44 | COPY codebuild/testfiles/*.yaml ./ 45 | 46 | WORKDIR /app 47 | 48 | COPY codebuild/*.sh ./ 49 | RUN chmod +x ./*.sh 50 | 51 | COPY sagemaker-k8s-operator.tar.gz . 52 | 53 | CMD ["./run_canarytest.sh"] 54 | -------------------------------------------------------------------------------- /tests/images/Dockerfile.canary.china: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | RUN apt-get update && apt-get install -y curl \ 4 | wget \ 5 | git \ 6 | python \ 7 | python-pip \ 8 | vim \ 9 | sudo \ 10 | jq 11 | 12 | RUN pip install awscli 13 | 14 | # Add yq repository and install yq 15 | RUN apt-get update && apt install -y software-properties-common && sudo add-apt-repository ppa:rmescandon/yq && apt update && apt install -y yq 16 | 17 | # Install kubectl 18 | RUN curl -LO https://storage.googleapis.com/kubernetes-release/release/v1.18.6/bin/linux/amd64/kubectl \ 19 | && chmod +x ./kubectl && cp ./kubectl /bin 20 | 21 | # Install eksctl 22 | RUN curl --silent --location "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_$(uname -s)_amd64.tar.gz" | tar xz -C /tmp && mv /tmp/eksctl /bin 23 | 24 | COPY ./codebuild/ ./app/ 25 | 26 | WORKDIR /app/ 27 | CMD ["./run_canarytest_china.sh"] -------------------------------------------------------------------------------- /tests/images/Dockerfile.integration: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | RUN apt-get update && apt-get install -y curl \ 4 | wget \ 5 | git \ 6 | python \ 7 | python-pip \ 8 | vim \ 9 | sudo \ 10 | jq 11 | 12 | 13 | # Enable the Docker repository 14 | RUN apt update && apt install -y apt-transport-https ca-certificates curl gnupg-agent software-properties-common && curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - && add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" 15 | 16 | # Install Docker CE 17 | RUN apt update && apt install -y docker-ce 18 | 19 | # Install yq 20 | RUN sudo add-apt-repository ppa:rmescandon/yq && apt update && apt install -y yq 21 | 22 | # Install Ruby and travis 23 | RUN apt update && apt install -y ruby-dev libffi-dev make gcc 24 | RUN gem install travis 25 | 26 | RUN pip install awscli 27 | 28 | # Install kustomize 29 | RUN curl -LO https://github.com/kubernetes-sigs/kustomize/releases/download/kustomize%2Fv3.2.1/kustomize_kustomize.v3.2.1_linux_amd64 && mv kustomize_kustomize.v3.2.1_linux_amd64 /bin/kustomize && chmod u+x /bin/kustomize 30 | 31 | # Install kubectl 32 | RUN curl -o kubectl https://amazon-eks.s3-us-west-2.amazonaws.com/1.12.9/2019-06-21/bin/linux/amd64/kubectl \ 33 | && chmod +x ./kubectl && cp ./kubectl /bin 34 | 35 | # Install eksctl 36 | RUN curl --silent --location "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_$(uname -s)_amd64.tar.gz" | tar xz -C /tmp && mv /tmp/eksctl /bin 37 | 38 | # Install Kubebuilder which is required for make test 39 | RUN wget https://github.com/kubernetes-sigs/kubebuilder/releases/download/v2.3.1/kubebuilder_2.3.1_linux_amd64.tar.gz \ 40 | && tar -zxvf kubebuilder_2.3.1_linux_amd64.tar.gz \ 41 | && mv kubebuilder_2.3.1_linux_amd64 /usr/local/kubebuilder \ 42 | && rm -rf kubebuilder_2.3.1_linux_amd64.tar.gz 43 | 44 | # Install dig, used for PrivateLink test. 45 | RUN apt-get install -y dnsutils 46 | 47 | # Add Golang 48 | RUN sudo add-apt-repository -y ppa:longsleep/golang-backports && sudo apt-get update && sudo apt-get install -y golang-go 49 | 50 | # This is how you start docker engine on container. Make sure container is 51 | # running in privileged mode. 52 | # I had to comment this line since codebuild overrides this. 53 | # Uncomment this line if you want to use this as build environment for this project locally 54 | # ENTRYPOINT sudo service docker start && bash 55 | --------------------------------------------------------------------------------