├── .gitignore ├── Chapter01 └── ML Process Example.ipynb ├── Chapter02 ├── Autopilot Example.ipynb └── abalone_with_headers.csv ├── Chapter03 ├── Image │ └── AutoGluon Image Example.ipynb ├── Tabular │ └── AutoGluon Tabular Example.ipynb └── policy.json ├── Chapter04 ├── cdk │ └── abalone_endpoint_stack.py └── scripts │ ├── build.py │ └── deploy.py ├── Chapter05 ├── Notebook │ └── Abalone CICD Example.ipynb └── cdk │ ├── abalone_cicd_pipeline_stack.py │ └── app.py ├── Chapter06 ├── cdk │ ├── abalone_cicd_pipeline_stack.py │ ├── abalone_endpoint_stack.py │ └── app.py └── scripts │ └── deploy.py ├── Chapter07 ├── Files │ └── buildspec.yml └── Notebook │ └── Abalone Step Functions Workflow Example.ipynb ├── Chapter08 ├── airflow │ ├── dags │ │ └── .airflowignore │ ├── rerquirements.txt │ └── scripts │ │ └── evaluate.py ├── cdk │ ├── abalone_data_pipeline_stack.py │ └── app.py └── lambda │ └── analyze_results │ └── index.py ├── Chapter09 ├── Files │ └── airflow │ │ ├── dags │ │ ├── .airflowignore │ │ ├── abalone_data_pipeline.py │ │ └── model │ │ │ └── model_training.py │ │ └── scripts │ │ └── preprocess.py └── Notebook │ └── Simulating New Abalone Survey Data.ipynb ├── Chapter10 ├── Files │ ├── airflow │ │ ├── dags │ │ │ ├── .airflowignore │ │ │ └── continuous_training_pipeline.py │ │ └── requirements.txt │ ├── cdk │ │ ├── acme_pipeline_stack.py │ │ ├── app.py │ │ ├── cdk.json │ │ ├── data_workflow_stack.py │ │ ├── ml_workflow_stack.py │ │ └── requirements.txt │ ├── lambda │ │ ├── createExperiment │ │ │ └── index.py │ │ ├── evaluateResults │ │ │ └── index.py │ │ ├── registerModel │ │ │ └── index.py │ │ ├── registryCreator │ │ │ └── index.py │ │ └── releaseChange │ │ │ └── index.py │ └── scripts │ │ ├── evaluation.py │ │ └── preprocessing.py ├── Notebooks │ ├── ACME Model Artifacts Example.ipynb │ └── SageMaker Feature Store Example.ipynb └── www │ ├── 404.html │ ├── css │ ├── error-page.css │ └── main-page.css │ ├── img │ ├── team-work.jpeg │ ├── undersea-abalone.jpg │ └── video-monitoring.jpeg │ ├── index.html │ ├── robots.txt │ ├── scss │ ├── _bootstrap-overrides.scss │ ├── _global.scss │ ├── _masthead.scss │ ├── _mixins.scss │ ├── _navbar.scss │ ├── _variables.scss │ └── one-page-wonder.scss │ └── vendor │ ├── bootstrap │ ├── css │ │ ├── bootstrap-grid.css │ │ ├── bootstrap-grid.css.map │ │ ├── bootstrap-grid.min.css │ │ ├── bootstrap-grid.min.css.map │ │ ├── bootstrap-reboot.css │ │ ├── bootstrap-reboot.css.map │ │ ├── bootstrap-reboot.min.css │ │ ├── bootstrap-reboot.min.css.map │ │ ├── bootstrap.css │ │ ├── bootstrap.css.map │ │ ├── bootstrap.min.css │ │ └── bootstrap.min.css.map │ └── js │ │ ├── bootstrap.bundle.js │ │ ├── bootstrap.bundle.js.map │ │ ├── bootstrap.bundle.min.js │ │ ├── bootstrap.bundle.min.js.map │ │ ├── bootstrap.js │ │ ├── bootstrap.js.map │ │ ├── bootstrap.min.js │ │ └── bootstrap.min.js.map │ └── jquery │ ├── jquery.js │ ├── jquery.min.js │ ├── jquery.min.map │ ├── jquery.slim.js │ ├── jquery.slim.min.js │ └── jquery.slim.min.map ├── Chapter11 ├── Files │ ├── cdk │ │ ├── acme_pipeline_stack.py │ │ ├── production_application_stack.py │ │ └── test_application_stack.py │ ├── lambda │ │ ├── createBaseline │ │ │ └── index.py │ │ └── formHandler │ │ │ ├── Dockerfile │ │ │ ├── index.py │ │ │ └── requirements.txt │ ├── scripts │ │ └── invoke.py │ └── tests │ │ ├── __init__.py │ │ ├── requirements.txt │ │ └── system_tests.py └── Notebook │ └── Simulating New Abalone Survey Data.ipynb ├── LICENSE └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | **/.DS_Store 3 | .venv -------------------------------------------------------------------------------- /Chapter01/ML Process Example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Business Case: Predicting Abalone Age\n", 8 | "\n", 9 | ">__NOTE:__ This Jupyter Notebook uses a Python3.6 kernel." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import sys\n", 19 | "print(f\"Python Version: {sys.version}.\")" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "%%capture\n", 29 | "!{sys.executable} -m pip install -U pip matplotlib numpy pandas scikit-learn tensorflow" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "import warnings\n", 39 | "import matplotlib.pyplot as plt\n", 40 | "import numpy as np\n", 41 | "import pandas as pd\n", 42 | "\n", 43 | "from sklearn import preprocessing\n", 44 | "from sklearn.model_selection import train_test_split\n", 45 | "from sklearn.metrics import mean_squared_error\n", 46 | "\n", 47 | "import tensorflow as tf\n", 48 | "from tensorflow import keras\n", 49 | "from tensorflow.keras.models import Sequential\n", 50 | "from tensorflow.keras.layers import Dense\n", 51 | "\n", 52 | "class cleanPrint(keras.callbacks.Callback):\n", 53 | " def on_epoch_end(self, epoch, logs):\n", 54 | " if epoch+1 % 100 == 0:\n", 55 | " print(\"!\")\n", 56 | " else:\n", 57 | " print(\"-\", end=\"\")\n", 58 | "\n", 59 | "%matplotlib inline\n", 60 | "warnings.filterwarnings(\"ignore\")" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "---\n", 68 | "## Exploratory Data Analysis: Abalone Dataset" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "column_names = [\"sex\", \"length\", \"diameter\", \"height\", \"whole_weight\", \"shucked_weight\", \"viscera_weight\", \"shell_weight\", \"rings\"]\n", 78 | "abalone_data = pd.read_csv(\"http://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data\", names=column_names)\n", 79 | "abalone_data.head()" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "abalone_data.describe()" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "---\n", 96 | "\n", 97 | "## Data Preparation" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "data = abalone_data[[\"rings\", \"sex\", \"length\", \"diameter\", \"height\", \"whole_weight\", \"shucked_weight\", \"viscera_weight\", \"shell_weight\"]]\n", 107 | "data = pd.get_dummies(data)\n", 108 | "y = data.rings.values\n", 109 | "del data[\"rings\"]\n", 110 | "X = data.values.astype(np.float)\n", 111 | "X = preprocessing.normalize(X)\n", 112 | "training_features, testing_features, training_labels, testing_labels = train_test_split(X, y, test_size=0.2, random_state=42)" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "---\n", 120 | "## Model Training" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "network_layers = [\n", 130 | " Dense(256, activation='relu', kernel_initializer=\"normal\", input_dim=10),\n", 131 | " Dense(128, activation='relu'),\n", 132 | " Dense(64, activation='relu'),\n", 133 | " Dense(32, activation='relu'),\n", 134 | " Dense(1, activation='linear')\n", 135 | "]\n", 136 | "\n", 137 | "model = Sequential(network_layers)\n", 138 | "model.compile(optimizer=\"adam\", loss=\"mse\", metrics=[\"mae\", \"accuracy\"])\n", 139 | "model.summary() " 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "training_results = model.fit(training_features, training_labels, validation_data=(testing_features, testing_labels), batch_size=32, epochs=2000, shuffle=True, verbose=0, callbacks=[cleanPrint()])" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "---\n", 156 | "## Model Evaluation (Before Optimization)\n", 157 | "\n", 158 | "### Plot Model Evaluaiton Metrics (RMSE)" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": null, 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [ 167 | "fig, ax = plt.subplots(figsize=(15, 10))\n", 168 | "ax.plot(testing_labels, model.predict(testing_features), \"ob\")\n", 169 | "ax.plot([0, 25], [0, 25], \"-r\")\n", 170 | "ax.text(8, 1, f\"RMSE = {mean_squared_error(testing_labels, model.predict(testing_features), squared=False)}\", color=\"r\", fontsize=14, weight=\"bold\")\n", 171 | "plt.title(\"Abalone Model Evaluation\", fontweight=\"bold\", fontsize=12)\n", 172 | "plt.xlabel(\"Actual 'Rings'\", fontweight=\"bold\", fontsize=12)\n", 173 | "plt.ylabel(\"Predicted 'Rings'\", fontweight=\"bold\", fontsize=12)\n", 174 | "plt.legend([\"Predictions\", \"Regression Line\"], loc=\"upper left\", prop={\"weight\": \"bold\"})\n", 175 | "plt.show()" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "### Plot additional performance summaries\n", 183 | "\n", 184 | "#### Training vs. Testing Loss" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "plt.rcParams[\"figure.figsize\"] = (15, 10)\n", 194 | "plt.plot(training_results.history[\"loss\"])\n", 195 | "plt.plot(training_results.history[\"val_loss\"])\n", 196 | "plt.title(\"Training vs. Testing Loss\", fontweight=\"bold\", fontsize=14)\n", 197 | "plt.ylabel(\"Loss\", fontweight=\"bold\", fontsize=14)\n", 198 | "plt.xlabel(\"Epochs\", fontweight=\"bold\", fontsize=14)\n", 199 | "plt.legend([\"Training Loss\", \"Testing Loss\"], loc=\"upper right\", prop={\"weight\": \"bold\"})\n", 200 | "plt.grid()\n", 201 | "plt.show()" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "metadata": {}, 208 | "outputs": [], 209 | "source": [ 210 | "plt.rcParams[\"figure.figsize\"] = (15, 10)\n", 211 | "plt.plot(training_results.history[\"mae\"])\n", 212 | "plt.plot(training_results.history[\"val_mae\"])\n", 213 | "plt.title(\"Training vs. Testing Mean Absolute Error\", fontweight=\"bold\", fontsize=14)\n", 214 | "plt.ylabel(\"mae\", fontweight=\"bold\", fontsize=14)\n", 215 | "plt.xlabel(\"Epochs\", fontweight=\"bold\", fontsize=14)\n", 216 | "plt.legend([\"Training MAE\", \"Testing MAE\"], loc=\"upper right\", prop={\"weight\": \"bold\"})\n", 217 | "plt.grid()\n", 218 | "plt.show()" 219 | ] 220 | }, 221 | { 222 | "cell_type": "markdown", 223 | "metadata": {}, 224 | "source": [ 225 | "---\n", 226 | "\n", 227 | "## Model Evaluation (After Optimization)" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": null, 233 | "metadata": {}, 234 | "outputs": [], 235 | "source": [ 236 | "network_layers = [\n", 237 | " Dense(64, activation='relu', kernel_initializer=\"normal\", input_dim=10),\n", 238 | " Dense(64, activation='relu'),\n", 239 | " Dense(1, activation='linear')\n", 240 | "]\n", 241 | "\n", 242 | "model = Sequential(network_layers)\n", 243 | "model.compile(optimizer=\"adam\", loss=\"mse\", metrics=[\"mae\", \"accuracy\"])\n", 244 | "model.summary()\n", 245 | "training_results = model.fit(training_features, training_labels, validation_data=(testing_features, testing_labels), batch_size=8, epochs=200, shuffle=True, verbose=1)\n" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": null, 251 | "metadata": {}, 252 | "outputs": [], 253 | "source": [ 254 | "fig, ax = plt.subplots(figsize=(15, 10))\n", 255 | "ax.plot(testing_labels, model.predict(testing_features), \"ob\")\n", 256 | "ax.plot([0, 25], [0, 25], \"-r\")\n", 257 | "ax.text(8, 1, f\"RMSE = {mean_squared_error(testing_labels, model.predict(testing_features), squared=False)}\", color=\"r\", fontsize=14, weight=\"bold\")\n", 258 | "plt.grid()\n", 259 | "plt.title(\"Abalone Model Evaluation\", fontweight=\"bold\", fontsize=12)\n", 260 | "plt.xlabel(\"Actual 'Rings'\", fontweight=\"bold\", fontsize=12)\n", 261 | "plt.ylabel(\"Predicted 'Rings'\", fontweight=\"bold\", fontsize=12)\n", 262 | "plt.legend([\"Predictions\", \"Regression Line\"], loc=\"upper left\", prop={\"weight\": \"bold\"})\n", 263 | "plt.show()" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": null, 269 | "metadata": {}, 270 | "outputs": [], 271 | "source": [ 272 | "plt.rcParams[\"figure.figsize\"] = (15, 10)\n", 273 | "plt.plot(training_results.history[\"loss\"])\n", 274 | "plt.plot(training_results.history[\"val_loss\"])\n", 275 | "plt.title(\"Training vs. Testing Loss\", fontweight=\"bold\", fontsize=14)\n", 276 | "plt.ylabel(\"Loss\", fontweight=\"bold\", fontsize=14)\n", 277 | "plt.xlabel(\"Epochs\", fontweight=\"bold\", fontsize=14)\n", 278 | "plt.legend([\"Training Loss\", \"Testing Loss\"], loc=\"upper right\", prop={\"weight\": \"bold\"})\n", 279 | "plt.grid()\n", 280 | "plt.show()" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": null, 286 | "metadata": {}, 287 | "outputs": [], 288 | "source": [ 289 | "plt.rcParams[\"figure.figsize\"] = (15, 10)\n", 290 | "plt.plot(training_results.history[\"mae\"])\n", 291 | "plt.plot(training_results.history[\"val_mae\"])\n", 292 | "plt.title(\"Training vs. Testing Mean Absolute Error\", fontweight=\"bold\", fontsize=14)\n", 293 | "plt.ylabel(\"mae\", fontweight=\"bold\", fontsize=14)\n", 294 | "plt.xlabel(\"Epochs\", fontweight=\"bold\", fontsize=14)\n", 295 | "plt.legend([\"Training MAE\", \"Testing MAE\"], loc=\"upper right\", prop={\"weight\": \"bold\"})\n", 296 | "plt.grid()\n", 297 | "plt.show()" 298 | ] 299 | } 300 | ], 301 | "metadata": { 302 | "instance_type": "ml.t3.medium", 303 | "interpreter": { 304 | "hash": "91cd747e2918a8daa704f3bc8e42b880a4d8049712a274f2deb0fa8e8f710896" 305 | }, 306 | "kernelspec": { 307 | "display_name": "Python 3.8.10 64-bit ('3.8.10': pyenv)", 308 | "name": "python3" 309 | }, 310 | "language_info": { 311 | "codemirror_mode": { 312 | "name": "ipython", 313 | "version": 3 314 | }, 315 | "file_extension": ".py", 316 | "mimetype": "text/x-python", 317 | "name": "python", 318 | "nbconvert_exporter": "python", 319 | "pygments_lexer": "ipython3", 320 | "version": "3.8.10" 321 | } 322 | }, 323 | "nbformat": 4, 324 | "nbformat_minor": 4 325 | } 326 | -------------------------------------------------------------------------------- /Chapter02/Autopilot Example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Autopilot Example\n", 8 | ">__NOTE:__ Make sure to use the Pyton 3 (Data Science) Jupyter Kernel." 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "import sagemaker\n", 18 | "import pandas as pd\n", 19 | "\n", 20 | "role = sagemaker.get_execution_role()\n", 21 | "session = sagemaker.session.Session()" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "## Download Data" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "column_names = [\"sex\", \"length\", \"diameter\", \"height\", \"whole_weight\", \"shucked_weight\", \"viscera_weight\", \"shell_weight\", \"rings\"]\n", 38 | "abalone_data = pd.read_csv(\"http://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data\", names=column_names)\n", 39 | "abalone_data.to_csv(\"abalone_with_headers.csv\", index=False)" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "## Create the Autopilot Experiment" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "from sagemaker.automl.automl import AutoML\n", 56 | "automl_job = AutoML(\n", 57 | " role=role,\n", 58 | " target_attribute_name=\"rings\",\n", 59 | " output_path=f\"s3://{session.default_bucket()}/abalone-v1/output\",\n", 60 | " base_job_name=\"abalone\",\n", 61 | " sagemaker_session=session,\n", 62 | " max_candidates=250\n", 63 | ")\n" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "## Start the Autopilot Experiment" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "automl_job.fit(inputs=session.upload_data(\"abalone_with_headers.csv\", bucket=session.default_bucket(), key_prefix=\"abalone-v1/input\"), wait=False)" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "## Analyze the Autopilot Experiment\n", 87 | "\n", 88 | ">__NOTE:__ Wait until the Autopilot Experiment has completed before proceeding." 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "from sagemaker.analytics import ExperimentAnalytics\n", 98 | "automl_experiment = ExperimentAnalytics(\n", 99 | " sagemaker_session=session,\n", 100 | " experiment_name=\"{}-aws-auto-ml-job\".format(automl_job.describe_auto_ml_job()[\"AutoMLJobName\"])\n", 101 | ")" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "df = automl_experiment.dataframe()\n", 111 | "df = df.filter([\"TrialComponentName\",\"validation:accuracy - Last\", \"train:accuracy - Last\"])\n", 112 | "df = df.sort_values(by=\"validation:accuracy - Last\", ascending=False)[:5]\n", 113 | "df" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "## Plot Trial Comparison" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "import matplotlib.pyplot as plt\n", 130 | "%matplotlib inline\n", 131 | "\n", 132 | "legend_colors = [\"r\", \"b\", \"g\", \"c\", \"m\"]\n", 133 | "ig, ax = plt.subplots(figsize=(15, 10))\n", 134 | "legend = []\n", 135 | "i = 0\n", 136 | "for column, value in df.iterrows():\n", 137 | " ax.plot(value[\"train:accuracy - Last\"], value[\"validation:accuracy - Last\"], \"o\", c=legend_colors[i], label=value.TrialComponentName)\n", 138 | " i +=1\n", 139 | "plt.title(\"Training vs.Testing Accuracy\", fontweight=\"bold\", fontsize=14)\n", 140 | "plt.ylabel(\"validation:accuracy - Last\", fontweight=\"bold\", fontsize=14)\n", 141 | "plt.xlabel(\"train:accuracy - Last\", fontweight=\"bold\", fontsize=14)\n", 142 | "plt.grid()\n", 143 | "plt.legend()\n", 144 | "plt.show()" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "## Best Candidate Overview\n", 152 | "\n", 153 | "### Best Candidate Job" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": {}, 160 | "outputs": [], 161 | "source": [ 162 | "automl_job.best_candidate()[\"CandidateName\"]" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": {}, 168 | "source": [ 169 | "### Best Candidate Evaluation Metrics" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": [ 178 | "automl_job.best_candidate()[\"FinalAutoMLJobObjectiveMetric\"]" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": {}, 184 | "source": [ 185 | "## Candidate Artifacts\n", 186 | "\n", 187 | "### Data Exploration Notebook " 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": {}, 194 | "outputs": [], 195 | "source": [ 196 | "automl_job.describe_auto_ml_job()[\"AutoMLJobArtifacts\"][\"DataExplorationNotebookLocation\"]" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": {}, 202 | "source": [ 203 | "### Candidate Definition Notebook" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "metadata": {}, 210 | "outputs": [], 211 | "source": [ 212 | "automl_job.describe_auto_ml_job()[\"AutoMLJobArtifacts\"][\"CandidateDefinitionNotebookLocation\"]" 213 | ] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": {}, 218 | "source": [ 219 | "### Explainability Report" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": {}, 226 | "outputs": [], 227 | "source": [ 228 | "automl_job.describe_auto_ml_job()[\"BestCandidate\"][\"CandidateProperties\"][\"CandidateArtifactLocations\"][\"Explainability\"]" 229 | ] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "metadata": {}, 234 | "source": [ 235 | "## Deploy the Best Candidate\n", 236 | "\n", 237 | ">__NOTE:__ Deploying the Best Model will incur AWS usage costs." 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": null, 243 | "metadata": {}, 244 | "outputs": [], 245 | "source": [ 246 | "automl_job.deploy(\n", 247 | " initial_instance_count=1,\n", 248 | " instance_type=\"ml.m5.xlarge\",\n", 249 | " candidate=automl_job.best_candidate(),\n", 250 | " sagemaker_session=session,\n", 251 | " endpoint_name=\"-\".join(automl_job.best_candidate()[\"CandidateName\"].split(\"-\")[0:7])\n", 252 | ")" 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": {}, 258 | "source": [ 259 | "## Cleanup\n", 260 | "\n", 261 | "### Delete Hoasted Endpoint" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": null, 267 | "metadata": {}, 268 | "outputs": [], 269 | "source": [ 270 | "!aws sagemaker delete-endpoint --endpoint-name {\"-\".join(automl_job.best_candidate()[\"CandidateName\"].split(\"-\")[0:7])}" 271 | ] 272 | }, 273 | { 274 | "cell_type": "markdown", 275 | "metadata": {}, 276 | "source": [ 277 | "### Delete the Endpoint Configuration" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": null, 283 | "metadata": {}, 284 | "outputs": [], 285 | "source": [ 286 | "!aws sagemaker delete-endpoint-config --endpoint-config-name {\"-\".join(automl_job.best_candidate()[\"CandidateName\"].split(\"-\")[0:7])}" 287 | ] 288 | } 289 | ], 290 | "metadata": { 291 | "instance_type": "ml.t3.medium", 292 | "kernelspec": { 293 | "display_name": "Python 3 (Data Science)", 294 | "language": "python", 295 | "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/datascience-1.0" 296 | }, 297 | "language_info": { 298 | "codemirror_mode": { 299 | "name": "ipython", 300 | "version": 3 301 | }, 302 | "file_extension": ".py", 303 | "mimetype": "text/x-python", 304 | "name": "python", 305 | "nbconvert_exporter": "python", 306 | "pygments_lexer": "ipython3", 307 | "version": "3.7.10" 308 | } 309 | }, 310 | "nbformat": 4, 311 | "nbformat_minor": 4 312 | } 313 | -------------------------------------------------------------------------------- /Chapter03/Image/AutoGluon Image Example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# AutoGluon Image Example\n", 8 | ">__NOTE:__ Make sure to use the Pyton 3 (Data Science) Jupyter Kernel.\n", 9 | "\n", 10 | "## Prerequisites\n", 11 | "\n", 12 | "### Intalling the Image Build CLI" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "%%capture\n", 22 | "import sys\n", 23 | "import warnings\n", 24 | "warnings.filterwarnings(\"ignore\")\n", 25 | "%matplotlib inline\n", 26 | "\n", 27 | "!{sys.executable} -m pip install -U pip sagemaker-studio-image-build" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "### Configuring the AutoGluon Training/Testing Script" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "%%writefile train.py\n", 44 | "import os\n", 45 | "import json\n", 46 | "import boto3\n", 47 | "import json\n", 48 | "import warnings\n", 49 | "import numpy as np\n", 50 | "import pandas as pd\n", 51 | "from autogluon.vision import ImagePredictor\n", 52 | "\n", 53 | "warnings.filterwarnings(\"ignore\", category=DeprecationWarning)\n", 54 | "prefix = \"/opt/ml\"\n", 55 | "input_path = os.path.join(prefix, \"input/data\")\n", 56 | "output_path = os.path.join(prefix, \"output\")\n", 57 | "model_path = os.path.join(prefix, \"model\")\n", 58 | "param_path = os.path.join(prefix, \"input/config/hyperparameters.json\")\n", 59 | "\n", 60 | "\n", 61 | "def train(params):\n", 62 | " time_limit = int(params[\"time_limit\"])\n", 63 | " presets = \"\".join([str(i) for i in list(params[\"presets\"])])\n", 64 | " channel_name = \"training\"\n", 65 | " training_path = os.path.join(input_path, channel_name)\n", 66 | " training_dataset = ImagePredictor.Dataset.from_folder(training_path)\n", 67 | " predictor = ImagePredictor().fit(training_dataset, time_limit=time_limit, presets=presets)\n", 68 | " with open(os.path.join(model_path, \"FitSummary.json\"), \"w\") as f:\n", 69 | " json.dump(predictor.fit_summary(), f)\n", 70 | " predictor.save(os.path.join(model_path, \"ImagePredictor.Autogluon\"))\n", 71 | " return \"AutoGluon Job Complete\"\n", 72 | "\n", 73 | "\n", 74 | "if __name__ == \"__main__\":\n", 75 | " print(\"Loading Parameters\\n\")\n", 76 | " with open(param_path) as f:\n", 77 | " params = json.load(f)\n", 78 | " print(\"Training Models\\n\")\n", 79 | " result = train(params)\n", 80 | " print(result)" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "### Container Image Build Instructions (Dockerfile)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "%%writefile Dockerfile\n", 97 | "ARG REGION\n", 98 | "FROM 763104351884.dkr.ecr.${REGION}.amazonaws.com/autogluon-training:0.3.1-gpu-py37-cu102-ubuntu18.04\n", 99 | "RUN pip install -U pip wheel setuptools\n", 100 | "RUN pip install autogluon\n", 101 | "RUN mkdir -p /opt/program\n", 102 | "RUN mkdir -p /opt/ml\n", 103 | "COPY train.py /opt/program\n", 104 | "WORKDIR /opt/program\n", 105 | "ENTRYPOINT [\"python\", \"train.py\"]" 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": {}, 111 | "source": [ 112 | "### Container Build Process" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [ 121 | "import boto3\n", 122 | "import sagemaker\n", 123 | "\n", 124 | "aws_region = sagemaker.Session().boto_session.region_name\n", 125 | "!sm-docker build --build-arg REGION={aws_region} ." 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "---\n", 133 | "\n", 134 | "## AutoGluon Experiment\n", 135 | "\n", 136 | "### Download the Image Data" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "import io\n", 146 | "import urllib\n", 147 | "import zipfile\n", 148 | "\n", 149 | "dataset_url = \"https://storage.googleapis.com/laurencemoroney-blog.appspot.com/rps.zip\"\n", 150 | "with urllib.request.urlopen(dataset_url) as rps_zipfile:\n", 151 | " with zipfile.ZipFile(io.BytesIO(rps_zipfile.read())) as z:\n", 152 | " z.extractall(\"data\")" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "### Experiment Parameters\n", 160 | "\n", 161 | ">__NOTE:__ Make sure to update the `image_uri` parameter with the _Image URI_ output the __Container Build Process__." 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [ 170 | "import sagemaker\n", 171 | "import datetime\n", 172 | "\n", 173 | "image_uri = \"\"\n", 174 | "role = sagemaker.get_execution_role()\n", 175 | "session = sagemaker.session.Session()\n", 176 | "bucket = session.default_bucket()\n", 177 | "job_version = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')[:-3]\n", 178 | "job_name = f\"autogluon-image-{job_version}\"" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": {}, 184 | "source": [ 185 | "### Create the AutoGluon Estimator\n", 186 | "\n", 187 | ">__TIP:__ To leverage [Managed Spot Training](https://docs.aws.amazon.com/sagemaker/latest/dg/model-managed-spot-training.html) to further resuce training costs, uncomment the lines in the following code cell." 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": {}, 194 | "outputs": [], 195 | "source": [ 196 | "from sagemaker.estimator import Estimator\n", 197 | "\n", 198 | "autogluon = Estimator(\n", 199 | " image_uri=image_uri,\n", 200 | " role=role,\n", 201 | " output_path=f\"s3://{bucket}/{job_name}\",\n", 202 | " base_job_name=job_name,\n", 203 | " instance_count=1,\n", 204 | " instance_type=\"ml.p2.xlarge\",\n", 205 | " hyperparameters={\n", 206 | " \"presets\": \"medium_quality_faster_train\",\n", 207 | " \"time_limit\": \"600\",\n", 208 | " \"bucket\": bucket,\n", 209 | " \"training_job\": job_name\n", 210 | " },\n", 211 | " volume_size=50,\n", 212 | "# use_spot_instances=True,\n", 213 | "# max_wait=3600,\n", 214 | "# max_run=8*3600\n", 215 | ")" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "metadata": {}, 221 | "source": [ 222 | "### Execute the Experiment" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [ 231 | "autogluon.fit(\n", 232 | " inputs={\n", 233 | " \"training\": session.upload_data(\n", 234 | " \"data/rps\",\n", 235 | " bucket=bucket,\n", 236 | " key_prefix=f\"{job_name}/input\"\n", 237 | " )\n", 238 | " }\n", 239 | ")" 240 | ] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "metadata": {}, 245 | "source": [ 246 | "### Experiment Results\n", 247 | "\n", 248 | "#### Download Model Artifacts" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": null, 254 | "metadata": {}, 255 | "outputs": [], 256 | "source": [ 257 | "!mkdir extract\n", 258 | "sagemaker.s3.S3Downloader.download(autogluon.model_data, \"./\")\n", 259 | "!tar xfz ./model.tar.gz -C extract" 260 | ] 261 | }, 262 | { 263 | "cell_type": "markdown", 264 | "metadata": {}, 265 | "source": [ 266 | "#### Review Model Summary" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "metadata": {}, 273 | "outputs": [], 274 | "source": [ 275 | "import json\n", 276 | "with open(\"extract/FitSummary.json\", \"r\") as f:\n", 277 | " fit_summary = json.load(f)\n", 278 | "print(json.dumps(fit_summary, indent=4))\n", 279 | "print(f\"\"\"Best Model Training Accuracy: {fit_summary[\"train_acc\"]} \\nBest Model Validation Accuracy: {fit_summary[\"valid_acc\"]}\"\"\")" 280 | ] 281 | } 282 | ], 283 | "metadata": { 284 | "instance_type": "ml.t3.medium", 285 | "kernelspec": { 286 | "display_name": "Python 3 (Data Science)", 287 | "language": "python", 288 | "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/datascience-1.0" 289 | }, 290 | "language_info": { 291 | "codemirror_mode": { 292 | "name": "ipython", 293 | "version": 3 294 | }, 295 | "file_extension": ".py", 296 | "mimetype": "text/x-python", 297 | "name": "python", 298 | "nbconvert_exporter": "python", 299 | "pygments_lexer": "ipython3", 300 | "version": "3.7.10" 301 | } 302 | }, 303 | "nbformat": 4, 304 | "nbformat_minor": 4 305 | } 306 | -------------------------------------------------------------------------------- /Chapter03/Tabular/AutoGluon Tabular Example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# AutoGluon Tabular Example\n", 8 | ">__NOTE:__ Make sure to use the Pyton 3 (Data Science) Jupyter Kernel.\n", 9 | "\n", 10 | "## Prerequisites\n", 11 | "\n", 12 | "### Intalling the Image Build CLI" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "%%capture\n", 22 | "import sys\n", 23 | "import warnings\n", 24 | "warnings.filterwarnings('ignore')\n", 25 | "%matplotlib inline\n", 26 | "\n", 27 | "!{sys.executable} -m pip install -U pip sagemaker-studio-image-build" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "### Configuring the AutoGluon Training/Testing Script" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "%%writefile train.py\n", 44 | "import os\n", 45 | "import json\n", 46 | "import boto3\n", 47 | "import json\n", 48 | "import warnings\n", 49 | "import numpy as np\n", 50 | "import pandas as pd\n", 51 | "from autogluon.tabular import TabularDataset, TabularPredictor\n", 52 | "\n", 53 | "warnings.filterwarnings(\"ignore\", category=DeprecationWarning)\n", 54 | "prefix = \"/opt/ml\"\n", 55 | "input_path = os.path.join(prefix, \"input/data\")\n", 56 | "output_path = os.path.join(prefix, \"output\")\n", 57 | "model_path = os.path.join(prefix, \"model\")\n", 58 | "param_path = os.path.join(prefix, 'input/config/hyperparameters.json')\n", 59 | "\n", 60 | "\n", 61 | "def train(params):\n", 62 | " label = params[\"label\"]\n", 63 | " channel_name = \"training\"\n", 64 | " training_path = os.path.join(input_path, channel_name)\n", 65 | " training_dataset = TabularDataset(os.path.join(training_path, \"training.csv\"))\n", 66 | " predictor = TabularPredictor(label=label, path=model_path).fit(training_dataset)\n", 67 | " with open(os.path.join(model_path, \"Fit_Summary.txt\"), \"w\") as f:\n", 68 | " print(predictor.fit_summary(), file=f)\n", 69 | " return predictor\n", 70 | " \n", 71 | "\n", 72 | "def test(params, predictor):\n", 73 | " label = params[\"label\"]\n", 74 | " channel_name = \"testing\"\n", 75 | " testing_path = os.path.join(input_path, channel_name)\n", 76 | " testing_dataset = TabularDataset(os.path.join(testing_path, \"testing.csv\"))\n", 77 | " ground_truth = testing_dataset[label]\n", 78 | " testing_data = testing_dataset.drop(columns=label)\n", 79 | " predictions = predictor.predict(testing_data)\n", 80 | " with open(os.path.join(model_path, \"Model_Evaluation.txt\"), \"w\") as f:\n", 81 | " print(\n", 82 | " json.dumps(\n", 83 | " predictor.evaluate_predictions(\n", 84 | " y_true=ground_truth,\n", 85 | " y_pred=predictions,\n", 86 | " auxiliary_metrics=True\n", 87 | " ),\n", 88 | " indent=4\n", 89 | " ),\n", 90 | " file=f\n", 91 | " )\n", 92 | " leaderboard = predictor.leaderboard(testing_dataset, silent=True)\n", 93 | " leaderboard.to_csv(os.path.join(model_path, \"Leaderboard.csv\"))\n", 94 | "\n", 95 | "\n", 96 | "if __name__ == \"__main__\":\n", 97 | " print(\"Loading Parameters\\n\")\n", 98 | " with open(param_path) as f:\n", 99 | " params = json.load(f)\n", 100 | " print(\"Training Models\\n\")\n", 101 | " predictor = train(params)\n", 102 | " print(\"Testig Models\\n\")\n", 103 | " test(params, predictor)\n", 104 | " print(\"AutoGluon Job Complete\")" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "### Container Image Build Instructions (Dockerfile)" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "%%writefile Dockerfile\n", 121 | "ARG REGION\n", 122 | "FROM 763104351884.dkr.ecr.${REGION}.amazonaws.com/autogluon-training:0.3.1-cpu-py37-ubuntu18.04\n", 123 | "RUN pip install -U pip\n", 124 | "RUN pip install bokeh==2.0.1\n", 125 | "RUN mkdir -p /opt/program\n", 126 | "RUN mkdir -p /opt/ml\n", 127 | "COPY train.py /opt/program\n", 128 | "WORKDIR /opt/program\n", 129 | "ENTRYPOINT [\"python\", \"train.py\"]" 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "### Container Build Process" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "import boto3\n", 146 | "import sagemaker\n", 147 | "\n", 148 | "aws_region = sagemaker.Session().boto_session.region_name\n", 149 | "!sm-docker build --build-arg REGION={aws_region} ." 150 | ] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "metadata": {}, 155 | "source": [ 156 | "---\n", 157 | "\n", 158 | "## AutoGluon Experiment\n", 159 | "\n", 160 | "### Download the Abalone Data" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": [ 169 | "import numpy as np\n", 170 | "import pandas as pd \n", 171 | "from sklearn.model_selection import train_test_split\n", 172 | "\n", 173 | "column_names = [\"sex\", \"length\", \"diameter\", \"height\", \"whole_weight\", \"shucked_weight\", \"viscera_weight\", \"shell_weight\", \"rings\"]\n", 174 | "abalone_data = pd.read_csv(\"http://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data\", names=column_names)\n", 175 | "training_data, testing_data = train_test_split(abalone_data, test_size=0.1)\n", 176 | "training_data.to_csv(\"training.csv\")\n", 177 | "testing_data.to_csv(\"testing.csv\")" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": {}, 183 | "source": [ 184 | "### Experiment Parameters\n", 185 | "\n", 186 | ">__NOTE:__ Update the `image_uri` parameter with the _Image URI_ output the __Container Build Process__." 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": {}, 193 | "outputs": [], 194 | "source": [ 195 | "import sagemaker\n", 196 | "import datetime\n", 197 | "\n", 198 | "image_uri = \"\"\n", 199 | "role = sagemaker.get_execution_role()\n", 200 | "session = sagemaker.session.Session()\n", 201 | "bucket = session.default_bucket()\n", 202 | "job_version = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')[:-3]\n", 203 | "job_name = f\"abalone-autogluon-{job_version}\"" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "### Create the AutoGluon Estimator " 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": {}, 217 | "outputs": [], 218 | "source": [ 219 | "from sagemaker.estimator import Estimator\n", 220 | "\n", 221 | "autogluon = Estimator(\n", 222 | " image_uri=image_uri,\n", 223 | " role=role,\n", 224 | " output_path=f\"s3://{bucket}/{job_name}\",\n", 225 | " base_job_name=job_name,\n", 226 | " instance_count=1,\n", 227 | " instance_type=\"ml.m5.xlarge\",\n", 228 | " hyperparameters={\n", 229 | " \"label\": \"rings\",\n", 230 | " \"bucket\": bucket,\n", 231 | " \"training_job\": job_name\n", 232 | " },\n", 233 | " volume_size=20\n", 234 | ")" 235 | ] 236 | }, 237 | { 238 | "cell_type": "markdown", 239 | "metadata": {}, 240 | "source": [ 241 | "### Execute the Experiment" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": null, 247 | "metadata": {}, 248 | "outputs": [], 249 | "source": [ 250 | "autogluon.fit(\n", 251 | " inputs={\n", 252 | " \"training\": session.upload_data(\n", 253 | " \"training.csv\",\n", 254 | " bucket=bucket,\n", 255 | " key_prefix=f\"{job_name}/input\"\n", 256 | " ),\n", 257 | " \"testing\": session.upload_data(\n", 258 | " \"testing.csv\",\n", 259 | " bucket=bucket,\n", 260 | " key_prefix=f\"{job_name}/input\"\n", 261 | " )\n", 262 | " }\n", 263 | ")" 264 | ] 265 | }, 266 | { 267 | "cell_type": "markdown", 268 | "metadata": {}, 269 | "source": [ 270 | "### Experiment Results\n", 271 | "\n", 272 | "#### Download Model Artifacts" 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": null, 278 | "metadata": {}, 279 | "outputs": [], 280 | "source": [ 281 | "!mkdir extract\n", 282 | "sagemaker.s3.S3Downloader.download(autogluon.model_data, \"./\")\n", 283 | "!tar xfz ./model.tar.gz -C extract" 284 | ] 285 | }, 286 | { 287 | "cell_type": "markdown", 288 | "metadata": {}, 289 | "source": [ 290 | "#### Review Model Leaderboard" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": null, 296 | "metadata": {}, 297 | "outputs": [], 298 | "source": [ 299 | "df = pd.read_csv(\"./extract/Leaderboard.csv\")\n", 300 | "df = df.filter([\"model\",\"score_test\", \"score_val\"]).sort_values(by=\"score_val\", ascending=False).reset_index().drop(columns=\"index\")\n", 301 | "df" 302 | ] 303 | }, 304 | { 305 | "cell_type": "markdown", 306 | "metadata": {}, 307 | "source": [ 308 | "#### Plot Model Comparison" 309 | ] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": null, 314 | "metadata": {}, 315 | "outputs": [], 316 | "source": [ 317 | "import IPython\n", 318 | "IPython.display.HTML(filename=\"./extract/SummaryOfModels.html\")" 319 | ] 320 | } 321 | ], 322 | "metadata": { 323 | "instance_type": "ml.t3.medium", 324 | "kernelspec": { 325 | "display_name": "Python 3 (Data Science)", 326 | "language": "python", 327 | "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/datascience-1.0" 328 | }, 329 | "language_info": { 330 | "codemirror_mode": { 331 | "name": "ipython", 332 | "version": 3 333 | }, 334 | "file_extension": ".py", 335 | "mimetype": "text/x-python", 336 | "name": "python", 337 | "nbconvert_exporter": "python", 338 | "pygments_lexer": "ipython3", 339 | "version": "3.7.10" 340 | } 341 | }, 342 | "nbformat": 4, 343 | "nbformat_minor": 4 344 | } 345 | -------------------------------------------------------------------------------- /Chapter03/policy.json: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Effect": "Allow", 6 | "Action": [ 7 | "codebuild:DeleteProject", 8 | "codebuild:CreateProject", 9 | "codebuild:BatchGetBuilds", 10 | "codebuild:StartBuild" 11 | ], 12 | "Resource": "arn:aws:codebuild:*:*:project/sagemaker-studio*" 13 | }, 14 | { 15 | "Effect": "Allow", 16 | "Action": "logs:CreateLogStream", 17 | "Resource": "arn:aws:logs:*:*:log-group:/aws/codebuild/sagemaker-studio*" 18 | }, 19 | { 20 | "Effect": "Allow", 21 | "Action": [ 22 | "logs:GetLogEvents", 23 | "logs:PutLogEvents" 24 | ], 25 | "Resource": "arn:aws:logs:*:*:log-group:/aws/codebuild/sagemaker-studio*:log-stream:*" 26 | }, 27 | { 28 | "Effect": "Allow", 29 | "Action": "logs:CreateLogGroup", 30 | "Resource": "*" 31 | }, 32 | { 33 | "Effect": "Allow", 34 | "Action": [ 35 | "ecr:CreateRepository", 36 | "ecr:BatchGetImage", 37 | "ecr:CompleteLayerUpload", 38 | "ecr:DescribeImages", 39 | "ecr:DescribeRepositories", 40 | "ecr:UploadLayerPart", 41 | "ecr:ListImages", 42 | "ecr:InitiateLayerUpload", 43 | "ecr:BatchCheckLayerAvailability", 44 | "ecr:PutImage" 45 | ], 46 | "Resource": "arn:aws:ecr:*:*:repository/sagemaker-studio*" 47 | }, 48 | { 49 | "Effect": "Allow", 50 | "Action": "ecr:GetAuthorizationToken", 51 | "Resource": "*" 52 | }, 53 | { 54 | "Effect": "Allow", 55 | "Action": [ 56 | "s3:GetObject", 57 | "s3:DeleteObject", 58 | "s3:PutObject" 59 | ], 60 | "Resource": "arn:aws:s3:::sagemaker-*/*" 61 | }, 62 | { 63 | "Effect": "Allow", 64 | "Action": [ 65 | "s3:CreateBucket" 66 | ], 67 | "Resource": "arn:aws:s3:::sagemaker*" 68 | }, 69 | { 70 | "Effect": "Allow", 71 | "Action": [ 72 | "iam:GetRole", 73 | "iam:ListRoles" 74 | ], 75 | "Resource": "*" 76 | }, 77 | { 78 | "Effect": "Allow", 79 | "Action": "iam:PassRole", 80 | "Resource": "arn:aws:iam::*:role/*", 81 | "Condition": { 82 | "StringLikeIfExists": { 83 | "iam:PassedToService": "codebuild.amazonaws.com" 84 | } 85 | } 86 | } 87 | ] 88 | } -------------------------------------------------------------------------------- /Chapter04/cdk/abalone_endpoint_stack.py: -------------------------------------------------------------------------------- 1 | import os 2 | import aws_cdk as cdk 3 | import aws_cdk.aws_sagemaker as sagemaker 4 | 5 | class EndpointStack(cdk.Stack): 6 | def __init__(self, app: cdk.App, id: str, *, model_name: str=None, **kwargs) -> None: 7 | super().__init__(app, id, **kwargs) 8 | 9 | bucket_name = cdk.CfnParameter( 10 | self, 11 | "BucketName", 12 | type="String" 13 | ) 14 | 15 | execution_id = cdk.CfnParameter( 16 | self, 17 | "ExecutionId", 18 | type="String" 19 | ) 20 | 21 | model_uri = cdk.CfnParameter( 22 | self, 23 | "ModelUri", 24 | type="String" 25 | ) 26 | 27 | execution_role = cdk.CfnParameter( 28 | self, 29 | "ExecutionRole", 30 | type="String" 31 | ) 32 | 33 | model_image = cdk.CfnParameter( 34 | self, 35 | "ImageUri", 36 | type="String" 37 | ) 38 | 39 | model = sagemaker.CfnModel( 40 | self, 41 | "Model", 42 | model_name="{}-model-{}".format(model_name.capitalize(), execution_id.value_as_string), 43 | execution_role_arn=execution_role.value_as_string, 44 | primary_container=sagemaker.CfnModel.ContainerDefinitionProperty( 45 | image=model_image.value_as_string, 46 | model_data_url=model_uri.value_as_string, 47 | image_config=sagemaker.CfnModel.ImageConfigProperty( 48 | repository_access_mode="Platform" 49 | ) 50 | ) 51 | ) 52 | 53 | endpoint_config = sagemaker.CfnEndpointConfig( 54 | self, 55 | "EndpointConfig", 56 | endpoint_config_name="{}-config-{}".format(model_name.capitalize(), execution_id.value_as_string), 57 | production_variants=[ 58 | sagemaker.CfnEndpointConfig.ProductionVariantProperty( 59 | initial_instance_count=2, 60 | initial_variant_weight=1.0, 61 | instance_type="ml.m5.large", 62 | model_name=model.attr_model_name, 63 | variant_name="AllTraffic" 64 | ) 65 | ], 66 | data_capture_config=sagemaker.CfnEndpointConfig.DataCaptureConfigProperty( 67 | capture_content_type_header=sagemaker.CfnEndpointConfig.CaptureContentTypeHeaderProperty( 68 | csv_content_types=[ 69 | "text/csv" 70 | ] 71 | ), 72 | capture_options=[ 73 | sagemaker.CfnEndpointConfig.CaptureOptionProperty(capture_mode="Input"), 74 | sagemaker.CfnEndpointConfig.CaptureOptionProperty(capture_mode="Output") 75 | ], 76 | destination_s3_uri="s3://{}/endpoint-data-capture".format(bucket_name.value_as_string), 77 | enable_capture=True, 78 | initial_sampling_percentage=100.0 79 | ) 80 | ) 81 | endpoint_config.add_depends_on(model) 82 | 83 | endpoint = sagemaker.CfnEndpoint( 84 | self, 85 | "AbaloneEndpoint", 86 | endpoint_config_name=endpoint_config.attr_endpoint_config_name, 87 | endpoint_name="{}-Endpoint".format(model_name.capitalize()) 88 | ) 89 | endpoint.add_depends_on(endpoint_config) -------------------------------------------------------------------------------- /Chapter04/scripts/build.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import logging 3 | import os 4 | import sys 5 | import time 6 | from botocore.exceptions import ClientError 7 | 8 | logger = logging.getLogger() 9 | logging_format = "%(levelname)s: [%(filename)s:%(lineno)s] %(message)s" 10 | logging.basicConfig(format=logging_format, level=os.environ.get("LOGLEVEL", "INFO").upper()) 11 | codepipeline_client = boto3.client("codepipeline") 12 | sagemaker_client = boto3.client("sagemaker") 13 | image_uri = os.environ["IMAGE_URI"] 14 | bucket_name = os.environ["BUCKET_NAME"] 15 | role_arn = os.environ["ROLE_ARN"] 16 | pipeline_name = os.environ["PIPELINE_NAME"] 17 | model_name = os.environ["MODEL_NAME"] 18 | 19 | 20 | def get_execution_id(name=None, task=None): 21 | try: 22 | response = codepipeline_client.get_pipeline_state(name=name) 23 | for stage in response["stageStates"]: 24 | if stage["stageName"] == "Build": 25 | for action in stage["actionStates"]: 26 | if action["actionName"] == task.capitalize(): 27 | return stage["latestExecution"]["pipelineExecutionId"] 28 | except ClientError as e: 29 | error = e.response["Error"]["Message"] 30 | logger.error(error) 31 | raise Exception(error) 32 | 33 | 34 | def get_model_artifact(name=None): 35 | try: 36 | response = sagemaker_client.describe_training_job(TrainingJobName=name) 37 | return response["ModelArtifacts"]["S3ModelArtifacts"] 38 | except ClientError as e: 39 | error = e.response["Error"]["Message"] 40 | logger.error(error) 41 | raise Exception(error) 42 | 43 | 44 | def handle_data(model_name=None, execution_id=None): 45 | try: 46 | response = sagemaker_client.create_processing_job( 47 | ProcessingJobName=f"{model_name}-ProcessingJob-{execution_id}", 48 | ProcessingResources={ 49 | 'ClusterConfig': { 50 | 'InstanceCount': 1, 51 | 'InstanceType': 'ml.m5.xlarge', 52 | 'VolumeSizeInGB': 30 53 | } 54 | }, 55 | StoppingCondition={ 56 | 'MaxRuntimeInSeconds': 3600 57 | }, 58 | AppSpecification={ 59 | 'ImageUri': f"{image_uri}:latest", 60 | 'ContainerEntrypoint': ["python", "app.py", "preprocess"] 61 | }, 62 | ProcessingInputs=[ 63 | { 64 | 'InputName': 'data', 65 | 'S3Input': { 66 | 'S3Uri': f"s3://{bucket_name}/data/{model_name}.data", 67 | 'LocalPath': '/opt/ml/processing/input/data', 68 | 'S3DataType': 'S3Prefix', 69 | 'S3InputMode': 'File', 70 | 'S3DataDistributionType': 'FullyReplicated', 71 | 'S3CompressionType': 'None' 72 | } 73 | } 74 | ], 75 | ProcessingOutputConfig={ 76 | 'Outputs': [ 77 | { 78 | 'OutputName': 'training', 79 | 'S3Output': { 80 | 'S3Uri': f"s3://{bucket_name}/{execution_id}/input/training", 81 | 'LocalPath': '/opt/ml/processing/output/training', 82 | 'S3UploadMode': 'EndOfJob' 83 | } 84 | }, 85 | { 86 | 'OutputName': 'testing', 87 | 'S3Output': { 88 | 'S3Uri': f"s3://{bucket_name}/{execution_id}/input/testing", 89 | 'LocalPath': '/opt/ml/processing/output/testing', 90 | 'S3UploadMode': 'EndOfJob' 91 | } 92 | } 93 | ] 94 | }, 95 | RoleArn=role_arn 96 | ) 97 | return f"{model_name}-ProcessingJob-{execution_id}" 98 | except ClientError as e: 99 | error = e.response["Error"]["Message"] 100 | logger.error(error) 101 | raise Exception(error) 102 | 103 | 104 | def handle_training(model_name=None, execution_id=None): 105 | try: 106 | response = sagemaker_client.create_training_job( 107 | TrainingJobName=f"{model_name}-TrainingJob-{execution_id}", 108 | AlgorithmSpecification={ 109 | 'TrainingImage': f"{image_uri}:latest", 110 | 'TrainingInputMode': 'File', 111 | 'EnableSageMakerMetricsTimeSeries': True, 112 | 'MetricDefinitions': [ 113 | { 114 | 'Name': 'loss', 115 | 'Regex': 'loss: ([0-9\\.]+)' 116 | }, 117 | { 118 | 'Name': 'mae', 119 | 'Regex': 'mae: ([0-9\\.]+)' 120 | }, 121 | { 122 | 'Name': 'validation_loss', 123 | 'Regex': 'val_loss: ([0-9\\.]+)' 124 | }, 125 | { 126 | 'Name': 'validation_mae', 127 | 'Regex': 'val_mae: ([0-9\\.]+)' 128 | } 129 | ] 130 | }, 131 | HyperParameters={ 132 | 'epochs': '200', 133 | 'batch_size': '8' 134 | }, 135 | InputDataConfig=[ 136 | { 137 | 'ChannelName': 'training', 138 | 'ContentType': 'text/csv', 139 | 'DataSource': { 140 | 'S3DataSource': { 141 | 'S3Uri': f"s3://{bucket_name}/{execution_id}/input/training", 142 | 'S3DataType': 'S3Prefix', 143 | 'S3DataDistributionType': 'FullyReplicated' 144 | } 145 | } 146 | } 147 | ], 148 | OutputDataConfig={ 149 | 'S3OutputPath': f"s3://{bucket_name}/{execution_id}" 150 | }, 151 | ResourceConfig={ 152 | 'InstanceType': 'ml.m5.xlarge', 153 | 'InstanceCount': 1, 154 | 'VolumeSizeInGB': 30 155 | }, 156 | RoleArn=role_arn, 157 | StoppingCondition={ 158 | 'MaxRuntimeInSeconds': 3600 159 | } 160 | ) 161 | return f"{model_name}-TrainingJob-{execution_id}" 162 | except ClientError as e: 163 | error = e.response["Error"]["Message"] 164 | logger.error(error) 165 | raise Exception(error) 166 | 167 | 168 | def handle_evaluation(model_name=None, execution_id=None): 169 | try: 170 | response = sagemaker_client.create_processing_job( 171 | ProcessingJobName=f"{model_name}-EvaluationJob-{execution_id}", 172 | ProcessingResources={ 173 | 'ClusterConfig': { 174 | 'InstanceCount': 1, 175 | 'InstanceType': 'ml.m5.xlarge', 176 | 'VolumeSizeInGB': 30 177 | } 178 | }, 179 | StoppingCondition={ 180 | 'MaxRuntimeInSeconds': 3600 181 | }, 182 | AppSpecification={ 183 | 'ImageUri': f"{image_uri}:latest", 184 | 'ContainerEntrypoint': ["python", "app.py", "evaluate"] 185 | }, 186 | ProcessingInputs=[ 187 | { 188 | 'InputName': 'data', 189 | 'S3Input': { 190 | 'S3Uri': f"s3://{bucket_name}/{execution_id}/input/testing", 191 | 'LocalPath': '/opt/ml/processing/input/data', 192 | 'S3DataType': 'S3Prefix', 193 | 'S3InputMode': 'File', 194 | 'S3DataDistributionType': 'FullyReplicated', 195 | 'S3CompressionType': 'None' 196 | } 197 | }, 198 | { 199 | 'InputName': 'model', 200 | 'S3Input': { 201 | 'S3Uri': get_model_artifact(name=f"{model_name}-TrainingJob-{execution_id}"), 202 | 'LocalPath': '/opt/ml/processing/input/model', 203 | 'S3DataType': 'S3Prefix', 204 | 'S3InputMode': 'File', 205 | 'S3DataDistributionType': 'FullyReplicated', 206 | 'S3CompressionType': 'None' 207 | } 208 | } 209 | ], 210 | ProcessingOutputConfig={ 211 | 'Outputs': [ 212 | { 213 | 'OutputName': 'evaluation', 214 | 'S3Output': { 215 | 'S3Uri': f"s3://{bucket_name}/{execution_id}/evaluation", 216 | 'LocalPath': '/opt/ml/processing/output/evaluation', 217 | 'S3UploadMode': 'EndOfJob' 218 | } 219 | } 220 | ] 221 | }, 222 | RoleArn=role_arn 223 | ) 224 | return f"{model_name}-EvaluationJob-{execution_id}" 225 | except ClientError as e: 226 | error = e.response["Error"]["Message"] 227 | logger.error(error) 228 | raise Exception(error) 229 | 230 | 231 | def handle_status(task=None, job_name=None): 232 | if task == "preprocess" or task == "evaluate": 233 | status = sagemaker_client.describe_processing_job(ProcessingJobName=job_name)["ProcessingJobStatus"] 234 | while status == "InProgress": 235 | time.sleep(60) 236 | logger.info(f"Task: {task}, Status: {status}") 237 | status = sagemaker_client.describe_processing_job(ProcessingJobName=job_name)["ProcessingJobStatus"] 238 | return status 239 | elif task == "train": 240 | status = sagemaker_client.describe_training_job(TrainingJobName=job_name)["TrainingJobStatus"] 241 | while status == "InProgress": 242 | time.sleep(60) 243 | logger.info(f"Task: {task}, Status: {status}") 244 | status = sagemaker_client.describe_training_job(TrainingJobName=job_name)["TrainingJobStatus"] 245 | return status 246 | 247 | 248 | if __name__ == "__main__": 249 | task = sys.argv[1] 250 | execution_id = get_execution_id(name=pipeline_name, task=task) 251 | logger.info(f"Executing {task.upper()} task") 252 | if task == "preprocess": 253 | job_name = handle_data(model_name=model_name, execution_id=execution_id) 254 | status = handle_status(task=task, job_name=job_name) 255 | elif task == "train": 256 | job_name = handle_training(model_name=model_name, execution_id=execution_id) 257 | status = handle_status(task=task, job_name=job_name) 258 | elif task == "evaluate": 259 | job_name = handle_evaluation(model_name=model_name, execution_id=execution_id) 260 | status = handle_status(task=task, job_name=job_name) 261 | else: 262 | error = "Invalid argument: Specify 'preprocess', 'train' or 'evaluate'" 263 | logger.error(error) 264 | sys.exit(255) 265 | if status == "Completed": 266 | logger.info(f"Task: {task}, Final Status: {status}") 267 | sys.exit(0) 268 | else: 269 | error = f"Task: {task}, Failed! See CloudWatch Logs for further information" 270 | logger.error(error) 271 | sys.exit(255) 272 | -------------------------------------------------------------------------------- /Chapter04/scripts/deploy.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import logging 3 | import os 4 | import json 5 | import sys 6 | from botocore.exceptions import ClientError 7 | 8 | logger = logging.getLogger() 9 | logging_format = "%(levelname)s: [%(filename)s:%(lineno)s] %(message)s" 10 | logging.basicConfig(format=logging_format, level=os.environ.get("LOGLEVEL", "INFO").upper()) 11 | codepipeline_client = boto3.client("codepipeline") 12 | sagemaker_client = boto3.client("sagemaker") 13 | pipeline_name = os.environ["PIPELINE_NAME"] 14 | model_name = os.environ["MODEL_NAME"] 15 | role_arn = os.environ["ROLE_ARN"] 16 | 17 | 18 | def get_execution_id(name=None, task=None): 19 | try: 20 | response = codepipeline_client.get_pipeline_state(name=name) 21 | for stage in response["stageStates"]: 22 | if stage["stageName"] == "Deploy": 23 | for action in stage["actionStates"]: 24 | if action["actionName"] == task: 25 | return stage["latestExecution"]["pipelineExecutionId"] 26 | except ClientError as e: 27 | error = e.response["Error"]["Message"] 28 | logger.error(error) 29 | raise Exception(error) 30 | 31 | 32 | def get_model_artifact(model_name=None, execution_id=None): 33 | try: 34 | response = sagemaker_client.describe_training_job(TrainingJobName=f"{model_name}-TrainingJob-{execution_id}") 35 | return response["ModelArtifacts"]["S3ModelArtifacts"] 36 | except ClientError as e: 37 | error = e.response["Error"]["Message"] 38 | logger.error(error) 39 | raise Exception(error) 40 | 41 | 42 | if __name__ == "__main__": 43 | task = "DeploymentBuild" 44 | execution_id = get_execution_id(name=pipeline_name, task=task) 45 | logger.info("Creating Stack Parameters") 46 | params = { 47 | "ImageUri": "{}:latest".format(os.environ["IMAGE_URI"]), 48 | "ExecutionId": execution_id, 49 | "BucketName": os.environ["BUCKET_NAME"], 50 | "ModelUri": get_model_artifact(model_name=model_name, execution_id=execution_id), 51 | "ExecutionRole": os.environ["ROLE_ARN"] 52 | } 53 | try: 54 | with open(os.path.join(os.environ["CODEBUILD_SRC_DIR"], "output/params.json"), "w") as f: 55 | json.dump(params, f) 56 | logger.info(json.dumps(params, indent=4)), 57 | sys.exit(0) 58 | except Exception as error: 59 | logger.error(error) 60 | sys.exit(255) 61 | -------------------------------------------------------------------------------- /Chapter05/cdk/app.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import aws_cdk as cdk 5 | from abalone_cicd_pipeline.abalone_endpoint_stack import EndpointStack 6 | from abalone_cicd_pipeline.abalone_cicd_pipeline_stack import PipelineStack 7 | 8 | 9 | MODEL = "abalone" 10 | CODECOMMIT_REPOSITORY = "abalone-cicd-pipeline" 11 | CDK_VERSION = "2.3.0" 12 | 13 | app = cdk.App() 14 | 15 | EndpointStack( 16 | app, 17 | "EndpointStack", 18 | env=cdk.Environment(account=os.getenv("CDK_DEFAULT_ACCOUNT"), region=os.getenv("CDK_DEFAULT_REGION")), 19 | model_name=MODEL 20 | ) 21 | 22 | PipelineStack( 23 | app, 24 | CODECOMMIT_REPOSITORY, 25 | env=cdk.Environment(account=os.getenv("CDK_DEFAULT_ACCOUNT"), region=os.getenv("CDK_DEFAULT_REGION")), 26 | model_name=MODEL, 27 | repo_name=CODECOMMIT_REPOSITORY, 28 | cdk_version=CDK_VERSION 29 | ) 30 | 31 | app.synth() -------------------------------------------------------------------------------- /Chapter06/cdk/abalone_endpoint_stack.py: -------------------------------------------------------------------------------- 1 | import aws_cdk as cdk 2 | import aws_cdk.aws_sagemaker as sagemaker 3 | 4 | class EndpointStack(cdk.Stack): 5 | def __init__(self, app: cdk.App, id: str, *, model_name: str=None, **kwargs) -> None: 6 | super().__init__(app, id, **kwargs) 7 | 8 | bucket_name = cdk.CfnParameter( 9 | self, 10 | "BucketName", 11 | type="String" 12 | ) 13 | 14 | execution_id = cdk.CfnParameter( 15 | self, 16 | "ExecutionId", 17 | type="String" 18 | ) 19 | 20 | endpoint_config = sagemaker.CfnEndpointConfig( 21 | self, 22 | "EndpointConfig", 23 | endpoint_config_name="{}-config-{}".format(model_name.capitalize(), execution_id.value_as_string), 24 | production_variants=[ 25 | sagemaker.CfnEndpointConfig.ProductionVariantProperty( 26 | initial_instance_count=2, 27 | initial_variant_weight=1.0, 28 | instance_type="ml.m5.large", 29 | model_name="{}-{}".format(model_name, execution_id.value_as_string), 30 | variant_name="AllTraffic" 31 | ) 32 | ], 33 | data_capture_config=sagemaker.CfnEndpointConfig.DataCaptureConfigProperty( 34 | capture_content_type_header=sagemaker.CfnEndpointConfig.CaptureContentTypeHeaderProperty( 35 | csv_content_types=[ 36 | "text/csv" 37 | ] 38 | ), 39 | capture_options=[ 40 | sagemaker.CfnEndpointConfig.CaptureOptionProperty(capture_mode="Input"), 41 | sagemaker.CfnEndpointConfig.CaptureOptionProperty(capture_mode="Output") 42 | ], 43 | destination_s3_uri="s3://{}/endpoint-data-capture".format(bucket_name.value_as_string), 44 | enable_capture=True, 45 | initial_sampling_percentage=100.0 46 | ) 47 | ) 48 | 49 | endpoint = sagemaker.CfnEndpoint( 50 | self, 51 | "AbaloneEndpoint", 52 | endpoint_config_name=endpoint_config.attr_endpoint_config_name, 53 | endpoint_name="{}-Endpoint".format(model_name.capitalize()) 54 | ) 55 | endpoint.add_depends_on(endpoint_config) -------------------------------------------------------------------------------- /Chapter06/cdk/app.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import aws_cdk as cdk 5 | from abalone_cicd_pipeline.abalone_endpoint_stack import EndpointStack 6 | from abalone_cicd_pipeline.abalone_cicd_pipeline_stack import PipelineStack 7 | 8 | MODEL = "abalone" 9 | CODECOMMIT_REPOSITORY = "abalone-cicd-pipeline" 10 | CDK_VERSION = "2.3.0" 11 | 12 | app = cdk.App() 13 | 14 | EndpointStack( 15 | app, 16 | "EndpointStack", 17 | env=cdk.Environment(account=os.getenv("CDK_DEFAULT_ACCOUNT"), region=os.getenv("CDK_DEFAULT_REGION")), 18 | model_name=MODEL 19 | ) 20 | 21 | PipelineStack( 22 | app, 23 | CODECOMMIT_REPOSITORY, 24 | env=cdk.Environment(account=os.getenv("CDK_DEFAULT_ACCOUNT"), region=os.getenv("CDK_DEFAULT_REGION")), 25 | model_name=MODEL, 26 | repo_name=CODECOMMIT_REPOSITORY, 27 | cdk_version=CDK_VERSION 28 | ) 29 | 30 | app.synth() -------------------------------------------------------------------------------- /Chapter06/scripts/deploy.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import logging 3 | import os 4 | import json 5 | import sys 6 | from botocore.exceptions import ClientError 7 | 8 | logger = logging.getLogger() 9 | logging_format = "%(levelname)s: [%(filename)s:%(lineno)s] %(message)s" 10 | logging.basicConfig(format=logging_format, level=os.environ.get("LOGLEVEL", "INFO").upper()) 11 | codepipeline_client = boto3.client("codepipeline") 12 | sagemaker_client = boto3.client("sagemaker") 13 | pipeline_name = os.environ["PIPELINE_NAME"] 14 | model_name = os.environ["MODEL_NAME"] 15 | 16 | 17 | def get_execution_id(name=None, task=None): 18 | try: 19 | response = codepipeline_client.get_pipeline_state(name=name) 20 | for stage in response["stageStates"]: 21 | if stage["stageName"] == "Deploy": 22 | for action in stage["actionStates"]: 23 | if action["actionName"] == task: 24 | return stage["latestExecution"]["pipelineExecutionId"] 25 | except ClientError as e: 26 | error = e.response["Error"]["Message"] 27 | logger.error(error) 28 | raise Exception(error) 29 | 30 | 31 | if __name__ == "__main__": 32 | task = "DeploymentBuild" 33 | execution_id = get_execution_id(name=pipeline_name, task=task) 34 | logger.info("Creating Stack Parameters") 35 | params = { 36 | "ExecutionId": execution_id, 37 | "BucketName": os.environ["BUCKET_NAME"] 38 | } 39 | try: 40 | with open(os.path.join(os.environ["CODEBUILD_SRC_DIR"], "output/params.json"), "w") as f: 41 | json.dump(params, f) 42 | logger.info(json.dumps(params, indent=4)), 43 | sys.exit(0) 44 | except Exception as error: 45 | logger.error(error) 46 | sys.exit(255) -------------------------------------------------------------------------------- /Chapter07/Files/buildspec.yml: -------------------------------------------------------------------------------- 1 | version: 0.2 2 | env: 3 | variables: 4 | DATA_PREFIX: abalone_data 5 | EPOCHS: 200 6 | BATCH_SIZE: 8 7 | THRESHOLD: 2.1 8 | 9 | phases: 10 | install: 11 | runtime-versions: 12 | python: 3.8 13 | commands: 14 | - printenv 15 | - echo "Updating Build Environment" 16 | - apt-get update 17 | - python -m pip install --upgrade pip 18 | - python -m pip install --upgrade boto3 awscli sagemaker==2.49.1 stepfunctions==2.2.0 19 | build: 20 | commands: 21 | - echo Build started on `date` 22 | - echo "Creating ML Workflow " 23 | - | 24 | sh -c """ 25 | cd workflow/ 26 | python main.py 27 | """ 28 | post_build: 29 | commands: 30 | - echo "Build Completed" -------------------------------------------------------------------------------- /Chapter08/airflow/dags/.airflowignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Automated-Machine-Learning-on-AWS/fa106b966823211a4588286656b6cf01cc6c14b2/Chapter08/airflow/dags/.airflowignore -------------------------------------------------------------------------------- /Chapter08/airflow/rerquirements.txt: -------------------------------------------------------------------------------- 1 | sagemaker==2.49.1 2 | protobuf==3.19.0 3 | s3fs==0.5.1 4 | boto3>=1.17.4 5 | -------------------------------------------------------------------------------- /Chapter08/airflow/scripts/evaluate.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import tarfile 4 | import pandas as pd 5 | import tensorflow as tf 6 | from sklearn import preprocessing 7 | def load_model(model_path): 8 | model = tf.keras.models.load_model(os.path.join(model_path, "model.h5")) 9 | model.compile(optimizer="adam", loss="mse") 10 | return model 11 | 12 | def evaluate_model(prefix, model): 13 | column_names = ["rings", "sex", "length", "diameter", "height", "whole_weight", "shucked_weight", "viscera_weight", "shell_weight"] 14 | input_path = os.path.join(prefix, "processing/testing") 15 | output_path = os.path.join(prefix, "processing/evaluation") 16 | predictions = [] 17 | truths = [] 18 | test_df = pd.read_csv(os.path.join(input_path, "testing.csv"), names=column_names) 19 | y = test_df["rings"].to_numpy() 20 | X = test_df.drop(["rings"], axis=1).to_numpy() 21 | X = preprocessing.normalize(X) 22 | for row in range(len(X)): 23 | payload = [X[row].tolist()] 24 | result = model.predict(payload) 25 | print(result[0][0]) 26 | predictions.append(float(result[0][0])) 27 | truths.append(float(y[row])) 28 | report = { 29 | "GroundTruth": truths, 30 | "Predictions": predictions 31 | } 32 | with open(os.path.join(output_path, "evaluation.json"), "w") as f: 33 | f.write(json.dumps(report)) 34 | 35 | 36 | if __name__ == "__main__": 37 | print("Extracting model archive") 38 | prefix = "/opt/ml" 39 | model_path = os.path.join(prefix, "model") 40 | tarfile_path = os.path.join(prefix, "processing/model/model.tar.gz") 41 | with tarfile.open(tarfile_path) as tar: 42 | tar.extractall(path=model_path) 43 | print("Loading Trained Model") 44 | model = load_model(model_path) 45 | print("Evaluating Trained Model") 46 | evaluate_model(prefix, model) 47 | print("Done!") -------------------------------------------------------------------------------- /Chapter08/cdk/abalone_data_pipeline_stack.py: -------------------------------------------------------------------------------- 1 | import os 2 | import aws_cdk.aws_codecommit as codecommit 3 | import aws_cdk.aws_codebuild as codebuild 4 | import aws_cdk as cdk 5 | import aws_cdk.aws_s3 as s3 6 | import aws_cdk.aws_ssm as ssm 7 | import aws_cdk.aws_s3_deployment as s3_deployment 8 | import aws_cdk.aws_iam as iam 9 | import aws_cdk.aws_glue as glue 10 | import aws_cdk.aws_lambda as lambda_ 11 | import aws_cdk.aws_events_targets as targets 12 | from constructs import Construct 13 | 14 | class DataPipelineStack(cdk.Stack): 15 | def __init__(self, scope: Construct, id: str, *, airflow_environment_name: str=None, model_name: str=None, repo_name: str=None, **kwargs) -> None: 16 | super().__init__(scope, id, **kwargs) 17 | 18 | code_repo = codecommit.Repository.from_repository_name( 19 | self, 20 | "SourceRepository", 21 | repository_name=repo_name 22 | ) 23 | 24 | data_bucket = s3.Bucket( 25 | self, 26 | "AirflowDataBucket", 27 | bucket_name=f"{model_name}-data-{cdk.Aws.REGION}-{cdk.Aws.ACCOUNT_ID}", 28 | block_public_access=s3.BlockPublicAccess.BLOCK_ALL, 29 | auto_delete_objects=True, 30 | removal_policy=cdk.RemovalPolicy.DESTROY, 31 | versioned=True 32 | ) 33 | ssm.StringParameter( 34 | self, 35 | "DataBucketParameter", 36 | description="Airflow Data Bucket Name", 37 | parameter_name="AirflowDataBucket", 38 | string_value=data_bucket.bucket_name 39 | ) 40 | 41 | sagemaker_role = iam.Role( 42 | self, 43 | "SageMakerBuildRole", 44 | assumed_by=iam.CompositePrincipal( 45 | iam.ServicePrincipal("sagemaker.amazonaws.com") 46 | ), 47 | managed_policies=[ 48 | iam.ManagedPolicy.from_aws_managed_policy_name("AmazonSageMakerFullAccess") 49 | ] 50 | ) 51 | data_bucket.grant_read_write(sagemaker_role) 52 | ssm.StringParameter( 53 | self, 54 | "SageMakerRoleParameter", 55 | description="SageMaker Role ARN", 56 | parameter_name="SageMakerRoleARN", 57 | string_value=sagemaker_role.role_arn 58 | ) 59 | 60 | analyze_results_lambda = lambda_.Function( 61 | self, 62 | "AnalyzeResults", 63 | handler="index.lambda_handler", 64 | runtime=lambda_.Runtime.PYTHON_3_8, 65 | code=lambda_.Code.from_asset(os.path.join(os.path.dirname(__file__), "../artifacts/lambda/analyze_results")), 66 | memory_size=128, 67 | timeout=cdk.Duration.seconds(60) 68 | ) 69 | data_bucket.grant_read(analyze_results_lambda) 70 | ssm.StringParameter( 71 | self, 72 | "AnalyzeResultsParameter", 73 | description="Analyze Results Lambda Function Name", 74 | parameter_name="AnalyzeResultsLambda", 75 | string_value=analyze_results_lambda.function_name 76 | ) 77 | 78 | glue_role = iam.Role( 79 | self, 80 | "GlueRole", 81 | assumed_by=iam.CompositePrincipal( 82 | iam.ServicePrincipal("glue.amazonaws.com") 83 | ), 84 | managed_policies=[ 85 | iam.ManagedPolicy.from_aws_managed_policy_name("service-role/AWSGlueServiceRole") 86 | ] 87 | ) 88 | data_bucket.grant_read_write(glue_role) 89 | 90 | glue_catalog = glue.CfnDatabase( 91 | self, 92 | "GlueDatabase", 93 | catalog_id=cdk.Aws.ACCOUNT_ID, 94 | database_input=glue.CfnDatabase.DatabaseInputProperty( 95 | name=f"{model_name}_new" 96 | ) 97 | ) 98 | 99 | glue_crawler = glue.CfnCrawler( 100 | self, 101 | "GlueCrawler", 102 | name=f"{model_name}-crawler", 103 | role=glue_role.role_arn, 104 | database_name=glue_catalog.ref, 105 | targets={ 106 | "s3Targets": [ 107 | { 108 | "path": f"s3://{data_bucket.bucket_name}/{model_name}_data/new/" 109 | } 110 | ] 111 | } 112 | ) 113 | ssm.StringParameter( 114 | self, 115 | "GlueCrawlerParameter", 116 | description="Glue Crawler Name", 117 | parameter_name="GlueCrawler", 118 | string_value=glue_crawler.name 119 | ) 120 | 121 | glue_job = glue.CfnJob( 122 | self, 123 | "GlueETLJob", 124 | name=f"{model_name}-etl-job", 125 | description="AWS Glue ETL Job to merge new + raw data, and process training data", 126 | role=glue_role.role_arn, 127 | glue_version="2.0", 128 | execution_property=glue.CfnJob.ExecutionPropertyProperty( 129 | max_concurrent_runs=1 130 | ), 131 | command=glue.CfnJob.JobCommandProperty( 132 | name="glueetl", 133 | python_version="3", 134 | script_location=f"s3://{data_bucket.bucket_name}/airflow/scripts/preprocess.py" 135 | ), 136 | default_arguments={ 137 | "--job-language": "python", 138 | "--GLUE_CATALOG": glue_catalog.ref, 139 | "--S3_BUCKET": data_bucket.bucket_name, 140 | "--S3_INPUT_KEY_PREFIX": f"{model_name}_data/raw/abalone.data", 141 | "--S3_OUTPUT_KEY_PREFIX": f"{model_name}_data", 142 | "--TempDir": f"s3://{data_bucket.bucket_name}/glue-temp" 143 | }, 144 | allocated_capacity=5, 145 | timeout=10 146 | ) 147 | ssm.StringParameter( 148 | self, 149 | "GlueJobParameter", 150 | description="Glue Job Name", 151 | parameter_name="GlueJob", 152 | string_value=glue_job.name 153 | ) 154 | 155 | s3_deployment.BucketDeployment( 156 | self, 157 | "DeployData", 158 | sources=[ 159 | s3_deployment.Source.asset(os.path.join(os.path.dirname(__file__), "../artifacts/data")) 160 | ], 161 | destination_bucket=data_bucket, 162 | destination_key_prefix=f"{model_name}_data/raw", 163 | retain_on_delete=False 164 | ) 165 | 166 | code_deployment = codebuild.Project( 167 | self, 168 | "CodeDeploymentProject", 169 | project_name="CodeDeploymentProject", 170 | description="CodeBuild Project to Copy Airflow Artifacts to S3", 171 | source=codebuild.Source.code_commit( 172 | repository=code_repo 173 | ), 174 | environment=codebuild.BuildEnvironment( 175 | build_image=codebuild.LinuxBuildImage.STANDARD_5_0 176 | ), 177 | environment_variables={ 178 | "DATA_BUCKET": codebuild.BuildEnvironmentVariable( 179 | value=data_bucket.bucket_name 180 | ) 181 | }, 182 | build_spec=codebuild.BuildSpec.from_object( 183 | { 184 | "version": "0.2", 185 | "phases": { 186 | "install": { 187 | "runtime-versions": { 188 | "python": 3.8 189 | }, 190 | "commands": [ 191 | "printenv", 192 | "echo 'Updating Build Environment'", 193 | "python -m pip install --upgrade pip", 194 | "python -m pip install --upgrade boto3 awscli" 195 | ] 196 | }, 197 | "build": { 198 | "commands": [ 199 | "echo 'Deploying Airflow Artifacts to S3'", 200 | "cd artifacts", 201 | "aws s3 sync airflow s3://${DATA_BUCKET}/airflow" 202 | ] 203 | }, 204 | "post_build": { 205 | "commands": [ 206 | "echo 'Airflow Artifacts Deployment Complete'" 207 | ] 208 | } 209 | } 210 | } 211 | ) 212 | ) 213 | data_bucket.grant_read_write(code_deployment.role) 214 | 215 | code_repo.on_commit( 216 | "StartDeploymentProject", 217 | target=targets.CodeBuildProject(code_deployment) 218 | ) -------------------------------------------------------------------------------- /Chapter08/cdk/app.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import aws_cdk as cdk 4 | from abalone_data_pipeline.abalone_data_pipeline_stack import DataPipelineStack 5 | 6 | MODEL = "abalone" 7 | CODECOMMIT_REPOSITORY = "abalone-data-pipeline" 8 | 9 | app = cdk.App() 10 | 11 | DataPipelineStack( 12 | app, 13 | CODECOMMIT_REPOSITORY, 14 | env=cdk.Environment(account=os.getenv("CDK_DEFAULT_ACCOUNT"), region=os.getenv("CDK_DEFAULT_REGION")), 15 | model_name=MODEL, 16 | repo_name=CODECOMMIT_REPOSITORY, 17 | airflow_environment_name=f"{MODEL}-airflow-environment" 18 | ) 19 | 20 | app.synth() -------------------------------------------------------------------------------- /Chapter08/lambda/analyze_results/index.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import logging 4 | import boto3 5 | import math 6 | 7 | logger = logging.getLogger() 8 | logger.setLevel(logging.INFO) 9 | 10 | def lambda_handler(event, context): 11 | logger.debug("## Environment Variables ##") 12 | logger.debug(os.environ) 13 | logger.debug("## Event ##") 14 | logger.debug(event) 15 | s3 = boto3.client("s3") 16 | if ("Bucket" in event): 17 | bucket = event["Bucket"] 18 | else: 19 | raise KeyError("S3 'Bucket' not found in Lambda event!") 20 | if ("Key" in event): 21 | key = event["Key"] 22 | else: 23 | raise KeyError("S3 'Key' not found in Lambda event!") 24 | logger.info("Downloading evlauation results file ...") 25 | json_file = json.loads(s3.get_object(Bucket = bucket, Key = key)['Body'].read()) 26 | logger.info("Analyzing Model Evaluation Results ...") 27 | y = json_file["GroundTruth"] 28 | y_hat = json_file["Predictions"] 29 | summation = 0 30 | for i in range (0, len(y)): 31 | squared_diff = (y[i] - y_hat[i])**2 32 | summation += squared_diff 33 | rmse = math.sqrt(summation/len(y)) 34 | logger.info("Root Mean Square Error: {}".format(rmse)) 35 | logger.info("Done!") 36 | return { 37 | "statusCode": 200, 38 | "Result": rmse, 39 | } -------------------------------------------------------------------------------- /Chapter09/Files/airflow/dags/.airflowignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Automated-Machine-Learning-on-AWS/fa106b966823211a4588286656b6cf01cc6c14b2/Chapter09/Files/airflow/dags/.airflowignore -------------------------------------------------------------------------------- /Chapter09/Files/airflow/dags/abalone_data_pipeline.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import json 3 | from datetime import timedelta 4 | 5 | import sagemaker 6 | from sagemaker.tensorflow import TensorFlow 7 | from sagemaker.tensorflow.serving import Model 8 | from sagemaker.processing import ProcessingInput, ProcessingOutput, Processor 9 | from sagemaker.model_monitor import DataCaptureConfig 10 | 11 | import airflow 12 | from airflow import DAG 13 | from airflow.operators.python_operator import PythonOperator 14 | from airflow.providers.amazon.aws.operators.glue import AwsGlueJobOperator 15 | from airflow.providers.amazon.aws.operators.glue_crawler import AwsGlueCrawlerOperator 16 | from airflow.providers.amazon.aws.hooks.lambda_function import AwsLambdaHook 17 | from airflow.operators.python_operator import BranchPythonOperator 18 | from airflow.operators.dummy import DummyOperator 19 | 20 | sagemaker_seesion = sagemaker.Session() 21 | region_name = sagemaker_seesion.boto_region_name 22 | model_name = "abalone" 23 | data_prefix = "abalone_data" 24 | data_bucket = f"""{boto3.client("ssm", region_name=region_name).get_parameter(Name="AirflowDataBucket")["Parameter"]["Value"]}""" 25 | glue_job_name = f"""{boto3.client("ssm", region_name=region_name).get_parameter(Name="GlueJob")["Parameter"]["Value"]}""" 26 | crawler_name = f"""{boto3.client("ssm", region_name=region_name).get_parameter(Name="GlueCrawler")["Parameter"]["Value"]}""" 27 | sagemaker_role = f"""{boto3.client("ssm", region_name=region_name).get_parameter(Name="SageMakerRoleARN")["Parameter"]["Value"]}""" 28 | lambda_function = f"""{boto3.client("ssm", region_name=region_name).get_parameter(Name="AnalyzeResultsLambda")["Parameter"]["Value"]}""" 29 | container_image = f"763104351884.dkr.ecr.{region_name}.amazonaws.com/tensorflow-training:2.5.0-cpu-py37-ubuntu18.04-v1.0" 30 | training_input = f"s3://{data_bucket}/{data_prefix}/training" 31 | testing_input = f"s3://{data_bucket}/{data_prefix}/testing" 32 | data_capture = f"s3://{data_bucket}/endpoint-data-capture" 33 | default_args = { 34 | "owner": "airflow", 35 | "depends_on_past": False, 36 | "start_date": airflow.utils.dates.days_ago(1), 37 | "retries": 0, 38 | "retry_delay": timedelta(minutes=2) 39 | } 40 | 41 | 42 | def training(data, **kwargs): 43 | estimator = TensorFlow( 44 | base_job_name=model_name, 45 | entry_point="/usr/local/airflow/dags/model/model_training.py", 46 | role=sagemaker_role, 47 | framework_version="2.4", 48 | py_version="py37", 49 | hyperparameters={"epochs": 200, "batch-size": 8}, 50 | script_mode=True, 51 | instance_count=1, 52 | instance_type="ml.m5.xlarge", 53 | ) 54 | estimator.fit(data) 55 | kwargs["ti"].xcom_push( 56 | key="TrainingJobName", 57 | value=str(estimator.latest_training_job.name) 58 | ) 59 | 60 | 61 | def evaluation(ds, **kwargs): 62 | training_job_name = kwargs["ti"].xcom_pull(key="TrainingJobName") 63 | estimator = TensorFlow.attach(training_job_name) 64 | model_data = estimator.model_data, 65 | processor = Processor( 66 | base_job_name=f"{model_name}-evaluation", 67 | image_uri=container_image, 68 | entrypoint=[ 69 | "python3", 70 | "/opt/ml/processing/input/code/evaluate.py" 71 | ], 72 | instance_count=1, 73 | instance_type="ml.m5.xlarge", 74 | role=sagemaker_role, 75 | max_runtime_in_seconds=1200 76 | ) 77 | processor.run( 78 | inputs=[ 79 | ProcessingInput( 80 | source=testing_input, 81 | destination="/opt/ml/processing/testing", 82 | input_name="input" 83 | ), 84 | ProcessingInput( 85 | source=model_data[0], 86 | destination="/opt/ml/processing/model", 87 | input_name="model" 88 | ), 89 | ProcessingInput( 90 | source="s3://{}/airflow/scripts/evaluate.py".format(data_bucket), 91 | destination="/opt/ml/processing/input/code", 92 | input_name="code" 93 | ) 94 | ], 95 | outputs=[ 96 | ProcessingOutput( 97 | source="/opt/ml/processing/evaluation", 98 | destination="s3://{}/{}/evaluation".format(data_bucket, data_prefix), 99 | output_name="evaluation" 100 | ) 101 | ] 102 | ) 103 | 104 | 105 | def deploy_model(ds, **kwargs): 106 | training_job_name = kwargs["ti"].xcom_pull(key="TrainingJobName") 107 | estimator = TensorFlow.attach(training_job_name) 108 | model = Model( 109 | model_data=estimator.model_data, 110 | role=sagemaker_role, 111 | framework_version="2.4", 112 | sagemaker_session=sagemaker.Session() 113 | ) 114 | model.deploy( 115 | initial_instance_count=2, 116 | instance_type="ml.m5.large", 117 | data_capture_config=DataCaptureConfig( 118 | enable_capture=True, 119 | sampling_percentage=100, 120 | destination_s3_uri=data_capture 121 | ) 122 | ) 123 | 124 | 125 | def get_results(ds, **kwargs): 126 | hook = AwsLambdaHook( 127 | function_name=lambda_function, 128 | aws_conn_id="aws_default", 129 | invocation_type="RequestResponse", 130 | log_type="None", 131 | qualifier="$LATEST", 132 | config=None 133 | ) 134 | request = hook.invoke_lambda( 135 | payload=json.dumps( 136 | { 137 | "Bucket": data_bucket, 138 | "Key": f"{data_prefix}/evaluation/evaluation.json" 139 | } 140 | ) 141 | ) 142 | response = json.loads(request["Payload"].read().decode()) 143 | kwargs["ti"].xcom_push( 144 | key="Results", 145 | value=response["Result"] 146 | ) 147 | 148 | 149 | def branch(ds, **kwargs): 150 | result = kwargs["ti"].xcom_pull(key="Results") 151 | if result > 3.1: 152 | return "rejected" 153 | else: 154 | return "approved" 155 | 156 | 157 | with DAG( 158 | dag_id=f"{model_name}-data-workflow", 159 | default_args=default_args, 160 | schedule_interval="@daily", 161 | concurrency=1, 162 | max_active_runs=1, 163 | ) as dag: 164 | 165 | crawler_task = AwsGlueCrawlerOperator( 166 | task_id="crawl_data", 167 | config={"Name": crawler_name} 168 | ) 169 | 170 | etl_task = AwsGlueJobOperator( 171 | task_id="preprocess_data", 172 | job_name=glue_job_name 173 | ) 174 | 175 | training_task = PythonOperator( 176 | task_id="training", 177 | python_callable=training, 178 | op_args=[training_input], 179 | provide_context=True, 180 | dag=dag 181 | ) 182 | 183 | evaluation_task = PythonOperator( 184 | task_id="evaluate_model", 185 | python_callable=evaluation, 186 | provide_context=True, 187 | dag=dag 188 | ) 189 | 190 | analyze_results_task = PythonOperator( 191 | task_id="analyze_results", 192 | python_callable=get_results, 193 | provide_context=True, 194 | dag=dag 195 | ) 196 | 197 | check_threshold_task = BranchPythonOperator( 198 | task_id="check_threshold", 199 | python_callable=branch, 200 | provide_context=True, 201 | dag=dag 202 | ) 203 | 204 | deployment_task = PythonOperator( 205 | task_id="deploy_model", 206 | python_callable=deploy_model, 207 | provide_context=True, 208 | dag=dag 209 | ) 210 | 211 | start_task = DummyOperator( 212 | task_id="start", 213 | dag=dag 214 | ) 215 | 216 | end_task = DummyOperator( 217 | task_id="end", 218 | dag=dag 219 | ) 220 | 221 | rejected_task = DummyOperator( 222 | task_id="rejected", 223 | dag=dag 224 | ) 225 | 226 | approved_task = DummyOperator( 227 | task_id="approved", 228 | dag=dag 229 | ) 230 | 231 | start_task >> crawler_task >> etl_task >> training_task >> evaluation_task >> analyze_results_task >> check_threshold_task >> [rejected_task, approved_task] 232 | approved_task >> deployment_task >> end_task 233 | rejected_task >> end_task -------------------------------------------------------------------------------- /Chapter09/Files/airflow/dags/model/model_training.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import numpy as np 4 | import pandas as pd 5 | 6 | import tensorflow as tf 7 | from tensorflow import keras 8 | from tensorflow.keras.models import Sequential 9 | from tensorflow.keras.layers import Dense 10 | from tensorflow.keras.optimizers import Adam 11 | from sklearn import preprocessing 12 | 13 | tf.get_logger().setLevel("ERROR") 14 | 15 | if __name__ == "__main__": 16 | print(f"Tensorflow Version: {tf.__version__}") 17 | column_names = ["rings", "sex", "length", "diameter", "height", "whole_weight", "shucked_weight", "viscera_weight", "shell_weight"] 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument("--epochs", type=int, default=2) 20 | parser.add_argument("--batch-size", type=int, default=8) 21 | parser.add_argument("--model-dir", type=str, default=os.environ["SM_MODEL_DIR"]) 22 | parser.add_argument("--training", type=str, default=os.environ["SM_CHANNEL_TRAINING"]) 23 | args, _ = parser.parse_known_args() 24 | epochs = args.epochs 25 | batch_size = args.batch_size 26 | training_path = args.training 27 | model_path = args.model_dir 28 | train_data = pd.read_csv(os.path.join(training_path, "training.csv"), sep=",", names=column_names) 29 | val_data = pd.read_csv(os.path.join(training_path, "validation.csv"), sep=",", names=column_names) 30 | train_y = train_data["rings"].to_numpy() 31 | train_X = train_data.drop(["rings"], axis=1).to_numpy() 32 | val_y = val_data["rings"].to_numpy() 33 | val_X = val_data.drop(["rings"], axis=1).to_numpy() 34 | train_X = preprocessing.normalize(train_X) 35 | val_X = preprocessing.normalize(val_X) 36 | network_layers = [ 37 | Dense(64, activation="relu", kernel_initializer="normal", input_dim=8), 38 | Dense(64, activation="relu"), 39 | Dense(1, activation="linear") 40 | ] 41 | model = Sequential(network_layers) 42 | model.compile(optimizer="adam", loss="mse", metrics=["mae", "accuracy"]) 43 | model.summary() 44 | model.fit( 45 | train_X, 46 | train_y, 47 | validation_data=(val_X, val_y), 48 | batch_size=batch_size, 49 | epochs=epochs, 50 | shuffle=True, 51 | verbose=1 52 | ) 53 | 54 | model.save(os.path.join(model_path, "model.h5")) 55 | model_version = 1 56 | export_path = os.path.join(model_path, str(model_version)) 57 | tf.keras.models.save_model( 58 | model, 59 | export_path, 60 | overwrite=True, 61 | include_optimizer=True, 62 | save_format=None, 63 | signatures=None, 64 | options=None 65 | ) -------------------------------------------------------------------------------- /Chapter09/Files/airflow/scripts/preprocess.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import boto3 4 | import pyspark 5 | import pandas as pd 6 | from functools import reduce 7 | from pyspark.sql import SparkSession, DataFrame 8 | from pyspark.ml import Pipeline 9 | from pyspark.sql.types import StructField, StructType, StringType, DoubleType 10 | from pyspark.ml.feature import StringIndexer, VectorIndexer, OneHotEncoder, VectorAssembler 11 | from pyspark.sql.functions import * 12 | from awsglue.job import Job 13 | from awsglue.transforms import * 14 | from awsglue.context import GlueContext 15 | from pyspark.context import SparkContext 16 | from awsglue.utils import getResolvedOptions 17 | from awsglue.dynamicframe import DynamicFrame 18 | from awsglue.utils import getResolvedOptions 19 | 20 | def csv_line(data): 21 | r = ','.join(str(d) for d in data[1]) 22 | return str(data[0]) + "," + r 23 | 24 | def toS3(df, path): 25 | rdd = df.rdd.map(lambda x: (x.rings, x.features)) 26 | rdd_lines = rdd.map(csv_line) 27 | spark_df = rdd_lines.map(lambda x: str(x)).map(lambda s: s.split(",")).toDF() 28 | pd_df = spark_df.toPandas() 29 | pd_df = pd_df.drop(columns=["_3"]) 30 | pd_df.to_csv(f"s3://{path}", header=False, index=False) 31 | 32 | def main(): 33 | glueContext = GlueContext(SparkContext.getOrCreate()) 34 | spark = SparkSession.builder.appName("PySparkAbalone").getOrCreate() 35 | spark.sparkContext._jsc.hadoopConfiguration().set("mapred.output.committer.class", "org.apache.hadoop.mapred.FileOutputCommitter") 36 | args = getResolvedOptions(sys.argv, ["GLUE_CATALOG", "S3_BUCKET", "S3_INPUT_KEY_PREFIX", "S3_OUTPUT_KEY_PREFIX"]) 37 | schema = StructType( 38 | [ 39 | StructField("sex", StringType(), True), 40 | StructField("length", DoubleType(), True), 41 | StructField("diameter", DoubleType(), True), 42 | StructField("height", DoubleType(), True), 43 | StructField("whole_weight", DoubleType(), True), 44 | StructField("shucked_weight", DoubleType(), True), 45 | StructField("viscera_weight", DoubleType(), True), 46 | StructField("shell_weight", DoubleType(), True), 47 | StructField("rings", DoubleType(), True) 48 | ] 49 | ) 50 | columns = ["sex", "length", "diameter", "height", "whole_weight", "shucked_weight", "viscera_weight", "shell_weight", "rings"] 51 | new = glueContext.create_dynamic_frame_from_catalog(database=args["GLUE_CATALOG"], table_name="new", transformation_ctx="new") 52 | new_df = new.toDF() 53 | new_df = new_df.toDF(*columns) 54 | raw_df = spark.read.csv(("s3://{}".format(os.path.join(args["S3_BUCKET"], args["S3_INPUT_KEY_PREFIX"]))), header=False, schema=schema) 55 | merged_df = reduce(DataFrame.unionAll, [raw_df, new_df]) 56 | distinct_df = merged_df.distinct() 57 | sex_indexer = StringIndexer(inputCol="sex", outputCol="indexed_sex") 58 | sex_encoder = OneHotEncoder(inputCol="indexed_sex", outputCol="sex_vec") 59 | assembler = VectorAssembler( 60 | inputCols=[ 61 | "sex_vec", 62 | "length", 63 | "diameter", 64 | "height", 65 | "whole_weight", 66 | "shucked_weight", 67 | "viscera_weight", 68 | "shell_weight" 69 | ], 70 | outputCol="features" 71 | ) 72 | pipeline = Pipeline(stages=[sex_indexer, sex_encoder, assembler]) 73 | model = pipeline.fit(distinct_df) 74 | transformed_df = model.transform(merged_df) 75 | (train_df, validation_df, test_df) = transformed_df.randomSplit([0.8, 0.15, 0.05]) 76 | toS3(train_df, os.path.join(args["S3_BUCKET"], args["S3_OUTPUT_KEY_PREFIX"], "training/training.csv")) 77 | toS3(validation_df, os.path.join(args["S3_BUCKET"], args["S3_OUTPUT_KEY_PREFIX"], "training/validation.csv")) 78 | toS3(test_df, os.path.join(args["S3_BUCKET"], args["S3_OUTPUT_KEY_PREFIX"], "testing/testing.csv")) 79 | 80 | if __name__ == "__main__": 81 | main() -------------------------------------------------------------------------------- /Chapter09/Notebook/Simulating New Abalone Survey Data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Creating new `abalone` data using `CTGAN`\n", 8 | ">__NOTE:__ Recommend using the _Python 3 (Data Science)_ kernel, using an _ml.m5.4xlarge (16vCPU + 64MB)_ Instance Type. However, this will incur additional AWS usage costs." 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "## Install `ctgan`" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "%%capture\n", 25 | "!pip install ctgan" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "## Load the Required Libraries" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "import io\n", 42 | "import boto3\n", 43 | "import warnings\n", 44 | "import pandas as pd\n", 45 | "from time import gmtime, strftime\n", 46 | "\n", 47 | "warnings.filterwarnings(\"ignore\")\n", 48 | "s3 = boto3.client(\"s3\")\n", 49 | "model_name = \"abalone\"\n", 50 | "column_names = [\n", 51 | " \"sex\",\n", 52 | " \"length\",\n", 53 | " \"diameter\",\n", 54 | " \"height\",\n", 55 | " \"whole_weight\",\n", 56 | " \"shucked_weight\",\n", 57 | " \"viscera_weight\",\n", 58 | " \"shell_weight\",\n", 59 | " \"rings\"\n", 60 | "]" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "## Load the \"raw\" data" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "data_bucket = f\"\"\"{boto3.client(\"ssm\").get_parameter(Name=\"AirflowDataBucket\")[\"Parameter\"][\"Value\"]}\"\"\"\n", 77 | "raw_data_key = f\"{model_name}_data/raw/abalone.data\"\n", 78 | "new_data_key = f\"{model_name}_data/new/abalone.{strftime('%Y%m%d%H%M%S', gmtime())}\"\n", 79 | "s3_object = s3.get_object(Bucket=data_bucket, Key=raw_data_key)\n", 80 | "raw_df = pd.read_csv(io.BytesIO(s3_object[\"Body\"].read()), encoding=\"utf8\", names=column_names)" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "## Fit the CTGAN Model on the `sex` target label\n", 88 | "\n", 89 | ">__NOTE:__ Fitting the `ctgan` model can up to 5 minutes, depending on the Kernel compute resources." 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "from ctgan import CTGAN\n", 99 | "\n", 100 | "ctgan = CTGAN()\n", 101 | "ctgan.fit(raw_df, [\"sex\"])" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "## Generate `100` samples of \"new\" data\n", 109 | ">__NOTE:__ `100` new samples are used to realistially simulate the potential amount of new daily survey data" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "samples = ctgan.sample(100)" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "## Compare Datasets\n", 126 | "### `raw` dataset" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": null, 132 | "metadata": {}, 133 | "outputs": [], 134 | "source": [ 135 | "raw_df.describe()" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "### `new` dataset" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "samples.describe()" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "## Upload the new data to test the Airflow DAG" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": null, 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [ 167 | "samples.to_csv(f\"s3://{data_bucket}/{new_data_key}\", header=False, index=False)" 168 | ] 169 | } 170 | ], 171 | "metadata": { 172 | "instance_type": "ml.m5.4xlarge", 173 | "kernelspec": { 174 | "display_name": "Python 3 (Data Science)", 175 | "language": "python", 176 | "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/datascience-1.0" 177 | }, 178 | "language_info": { 179 | "codemirror_mode": { 180 | "name": "ipython", 181 | "version": 3 182 | }, 183 | "file_extension": ".py", 184 | "mimetype": "text/x-python", 185 | "name": "python", 186 | "nbconvert_exporter": "python", 187 | "pygments_lexer": "ipython3", 188 | "version": "3.7.10" 189 | } 190 | }, 191 | "nbformat": 4, 192 | "nbformat_minor": 4 193 | } 194 | -------------------------------------------------------------------------------- /Chapter10/Files/airflow/dags/.airflowignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Automated-Machine-Learning-on-AWS/fa106b966823211a4588286656b6cf01cc6c14b2/Chapter10/Files/airflow/dags/.airflowignore -------------------------------------------------------------------------------- /Chapter10/Files/airflow/dags/continuous_training_pipeline.py: -------------------------------------------------------------------------------- 1 | import time 2 | import json 3 | import sagemaker 4 | import boto3 5 | import numpy as np 6 | import pandas as pd 7 | from time import sleep 8 | from datetime import timedelta 9 | from sagemaker.feature_store.feature_group import FeatureGroup 10 | 11 | import airflow 12 | from airflow import DAG 13 | from airflow.operators.python_operator import PythonOperator 14 | from airflow.providers.amazon.aws.hooks.lambda_function import AwsLambdaHook 15 | from airflow.providers.amazon.aws.sensors.s3_prefix import S3PrefixSensor 16 | 17 | sagemaker_session = sagemaker.Session() 18 | region_name = sagemaker_session.boto_region_name 19 | data_prefix = "abalone_data" 20 | data_bucket = f"""{boto3.client("ssm", region_name=region_name).get_parameter(Name="DataBucket")["Parameter"]["Value"]}""" 21 | lambda_function = f"""{boto3.client("ssm", region_name=region_name).get_parameter(Name="ReleaseChangeLambda")["Parameter"]["Value"]}""" 22 | fg_name = f"""{boto3.client("ssm", region_name=region_name).get_parameter(Name="FeatureGroup")["Parameter"]["Value"]}""" 23 | default_args = { 24 | "owner": "airflow", 25 | "depends_on_past": False, 26 | "start_date": airflow.utils.dates.days_ago(1), 27 | "retries": 0, 28 | "retry_delay": timedelta(minutes=2) 29 | } 30 | 31 | 32 | def start_pipeline(): 33 | hook = AwsLambdaHook( 34 | function_name=lambda_function, 35 | aws_conn_id="aws_default", 36 | invocation_type="RequestResponse", 37 | log_type="Tail", 38 | qualifier="$LATEST", 39 | config=None 40 | ) 41 | request = hook.invoke_lambda(payload="null") 42 | response = json.loads(request["Payload"].read().decode()) 43 | print(f"Response: {response}") 44 | 45 | 46 | def update_feature_group(): 47 | fg = FeatureGroup(name=fg_name, sagemaker_session=sagemaker_session) 48 | column_names = ["sex", "length", "diameter", "height", "whole_weight", "shucked_weight", "viscera_weight", "shell_weight", "rings"] 49 | abalone_data = pd.read_csv(f"s3://{data_bucket}/{data_prefix}/abalone.new", names=column_names) 50 | data = abalone_data[["rings", "sex", "length", "diameter", "height", "whole_weight", "shucked_weight", "viscera_weight", "shell_weight"]] 51 | processed_data = pd.get_dummies(data) 52 | time_stamp = int(round(time.time())) 53 | processed_data["TimeStamp"] = pd.Series([time_stamp] * len(processed_data), dtype="float64") 54 | fg.ingest(data_frame=processed_data, max_workers=5, wait=True) 55 | sleep(300) 56 | 57 | 58 | with DAG( 59 | dag_id=f"acme-data-workflow", 60 | default_args=default_args, 61 | schedule_interval="@daily", 62 | concurrency=1, 63 | max_active_runs=1, 64 | ) as dag: 65 | 66 | s3_trigger = S3PrefixSensor( 67 | task_id="s3_trigger", 68 | bucket_name=data_bucket, 69 | prefix=data_prefix, 70 | dag=dag 71 | ) 72 | 73 | update_fg_task = PythonOperator( 74 | task_id="update_fg", 75 | python_callable=update_feature_group, 76 | dag=dag 77 | ) 78 | 79 | trigger_release_task = PythonOperator( 80 | task_id="trigger_release_change", 81 | python_callable=start_pipeline, 82 | dag=dag 83 | ) 84 | 85 | s3_trigger >> update_fg_task >> trigger_release_task -------------------------------------------------------------------------------- /Chapter10/Files/airflow/requirements.txt: -------------------------------------------------------------------------------- 1 | sagemaker==2.49.1 2 | protobuf==3.19.0 3 | s3fs<=0.4 4 | boto3>=1.17.4 5 | numpy 6 | pandas 7 | -------------------------------------------------------------------------------- /Chapter10/Files/cdk/acme_pipeline_stack.py: -------------------------------------------------------------------------------- 1 | import aws_cdk as cdk 2 | import aws_cdk.aws_codecommit as codecommit 3 | import aws_cdk.aws_s3 as s3 4 | import aws_cdk.pipelines as pipelines 5 | import aws_cdk.aws_ssm as ssm 6 | from constructs import Construct 7 | 8 | class PipelineStack(cdk.Stack): 9 | 10 | def __init__(self, scope: Construct, id: str, *, model_name: str=None, group_name: str=None, repo_name: str=None, feature_group: str=None, threshold: float=None, cdk_version: str=None, **kwargs) -> None: 11 | super().__init__(scope, id, **kwargs) 12 | 13 | self.code_repo = codecommit.Repository( 14 | self, 15 | "Source-Repository", 16 | repository_name=repo_name, 17 | description="ACME Web Application Source Code Repository" 18 | ) 19 | cdk.CfnOutput( 20 | self, 21 | "Clone-URL", 22 | description="CodeCommit Clone URL", 23 | value=self.code_repo.repository_clone_url_http 24 | ) 25 | 26 | self.data_bucket = s3.Bucket( 27 | self, 28 | "Data-Bucket", 29 | bucket_name=f"data-{cdk.Aws.REGION}-{cdk.Aws.ACCOUNT_ID}", 30 | block_public_access=s3.BlockPublicAccess.BLOCK_ALL, 31 | auto_delete_objects=True, 32 | removal_policy=cdk.RemovalPolicy.DESTROY, 33 | versioned=True 34 | ) 35 | 36 | ssm.StringParameter( 37 | self, 38 | "Data-Bucket-Parameter", 39 | parameter_name="DataBucket", 40 | description="SSM Parameter for the S3 Data Bucket Name", 41 | string_value=self.data_bucket.bucket_name 42 | ) 43 | 44 | ssm.StringParameter( 45 | self, 46 | "Feature-Group-Parameter", 47 | parameter_name="FeatureGroup", 48 | description="SSM Paramater for the SageMaker Feature Store group", 49 | string_value=feature_group 50 | ) 51 | 52 | source_artifact = pipelines.CodePipelineSource.code_commit( 53 | repository=self.code_repo, 54 | branch="main" 55 | ) 56 | 57 | pipeline = pipelines.CodePipeline( 58 | self, 59 | "Application-Pipeline", 60 | pipeline_name="ACME-WebApp-Pipeline", 61 | self_mutation=False, 62 | cli_version=cdk_version, 63 | synth=pipelines.ShellStep( 64 | "Synth", 65 | input=source_artifact, 66 | commands=[ 67 | "printenv", 68 | f"npm install -g aws-cdk@{cdk_version}", 69 | "python -m pip install --upgrade pip", 70 | "pip install -r requirements.txt", 71 | "cdk synth" 72 | ] 73 | ) 74 | ) -------------------------------------------------------------------------------- /Chapter10/Files/cdk/app.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import aws_cdk as cdk 4 | from acme_web_application.acme_pipeline_stack import PipelineStack 5 | 6 | MODEL = "abalone" 7 | MODEL_GROUP = f"{MODEL.capitalize()}PackageGroup" 8 | FEATURE_GROUP = "PLACEHOLDER" 9 | CODECOMMIT_REPOSITORY = "acme-web-application" 10 | CDK_VERSION = "2.3.0" 11 | QUALITY_THRESHOLD = 3.1 12 | 13 | app = cdk.App() 14 | 15 | PipelineStack( 16 | app, 17 | CODECOMMIT_REPOSITORY, 18 | env=cdk.Environment(account=os.getenv("CDK_DEFAULT_ACCOUNT"), region=os.getenv("CDK_DEFAULT_REGION")), 19 | model_name=MODEL, 20 | repo_name=CODECOMMIT_REPOSITORY, 21 | group_name=MODEL_GROUP, 22 | feature_group=FEATURE_GROUP, 23 | cdk_version=CDK_VERSION, 24 | threshold=QUALITY_THRESHOLD, 25 | ) 26 | 27 | app.synth() 28 | -------------------------------------------------------------------------------- /Chapter10/Files/cdk/cdk.json: -------------------------------------------------------------------------------- 1 | { 2 | "app": "python3 app.py", 3 | "watch": { 4 | "include": [ 5 | "**" 6 | ], 7 | "exclude": [ 8 | "README.md", 9 | "cdk*.json", 10 | "requirements*.txt", 11 | "source.bat", 12 | "**/__init__.py", 13 | "python/__pycache__", 14 | "tests" 15 | ] 16 | }, 17 | "context": { 18 | "@aws-cdk/aws-apigateway:usagePlanKeyOrderInsensitiveId": true, 19 | "@aws-cdk/core:stackRelativeExports": true, 20 | "@aws-cdk/aws-rds:lowercaseDbIdentifier": true, 21 | "@aws-cdk/aws-lambda:recognizeVersionProps": true, 22 | "@aws-cdk/aws-cloudfront:defaultSecurityPolicyTLSv1.2_2021": true 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /Chapter10/Files/cdk/data_workflow_stack.py: -------------------------------------------------------------------------------- 1 | import os 2 | import aws_cdk as cdk 3 | import aws_cdk.aws_s3 as s3 4 | import aws_cdk.aws_ssm as ssm 5 | import aws_cdk.aws_s3_deployment as s3_deployment 6 | import aws_cdk.aws_ec2 as ec2 7 | import aws_cdk.aws_iam as iam 8 | import aws_cdk.aws_mwaa as mwaa 9 | import aws_cdk.aws_lambda as lambda_ 10 | from constructs import Construct 11 | 12 | 13 | class DataWorkflowStack(cdk.Stack): 14 | def __init__(self, scope: Construct, id: str, *, airflow_environment_name: str=None, data_bucket_name: str=None, pipeline_name: str=None, **kwargs) -> None: 15 | super().__init__(scope, id, **kwargs) 16 | 17 | data_bucket = s3.Bucket.from_bucket_name( 18 | self, 19 | "Data-Bucket", 20 | bucket_name=data_bucket_name 21 | ) 22 | 23 | data_bucket_param = ssm.StringParameter.from_string_parameter_name( 24 | self, 25 | "Data-Bucket-Parameter", 26 | string_parameter_name="DataBucket" 27 | ) 28 | 29 | group_name_param = ssm.StringParameter.from_string_parameter_name( 30 | self, 31 | "Feature-Group-Parameter", 32 | string_parameter_name="FeatureGroup" 33 | ) 34 | 35 | start_pipeline = lambda_.Function( 36 | self, 37 | "Release-Change", 38 | handler="index.lambda_handler", 39 | runtime=lambda_.Runtime.PYTHON_3_8, 40 | code=lambda_.Code.from_asset(os.path.join(os.path.dirname(__file__), "../../lambda/releaseChange")), 41 | environment={ 42 | "PIPELINE_NAME": pipeline_name 43 | }, 44 | memory_size=128, 45 | timeout=cdk.Duration.seconds(60) 46 | ) 47 | start_pipeline.add_to_role_policy( 48 | iam.PolicyStatement( 49 | actions=[ 50 | "codepipeline:StartPipelineExecution" 51 | ], 52 | effect=iam.Effect.ALLOW, 53 | resources=[ 54 | f"arn:aws:codepipeline:{cdk.Aws.REGION}:{cdk.Aws.ACCOUNT_ID}:{pipeline_name}" 55 | ] 56 | ) 57 | ) 58 | 59 | airflow_policy_document = { 60 | "Version": "2012-10-17", 61 | "Statement": [ 62 | { 63 | "Effect": "Allow", 64 | "Action": "airflow:PublishMetrics", 65 | "Resource": f"arn:aws:airflow:{cdk.Aws.REGION}:{cdk.Aws.ACCOUNT_ID}:environment/{airflow_environment_name}" 66 | }, 67 | { 68 | "Effect": "Deny", 69 | "Action": "s3:ListAllMyBuckets", 70 | "Resource": [ 71 | f"arn:aws:s3:::{data_bucket.bucket_name}", 72 | f"arn:aws:s3:::{data_bucket.bucket_name}/*" 73 | ] 74 | }, 75 | { 76 | "Effect": "Allow", 77 | "Action": [ 78 | "s3:GetObject*", 79 | "s3:GetBucket*", 80 | "s3:List*" 81 | ], 82 | "Resource": [ 83 | f"arn:aws:s3:::{data_bucket.bucket_name}", 84 | f"arn:aws:s3:::{data_bucket.bucket_name}/*" 85 | ] 86 | }, 87 | { 88 | "Effect": "Allow", 89 | "Action": [ 90 | "logs:CreateLogStream", 91 | "logs:CreateLogGroup", 92 | "logs:PutLogEvents", 93 | "logs:GetLogEvents", 94 | "logs:GetLogRecord", 95 | "logs:GetLogGroupFields", 96 | "logs:GetQueryResults" 97 | ], 98 | "Resource": [ 99 | f"arn:aws:logs:{cdk.Aws.REGION}:{cdk.Aws.ACCOUNT_ID}:log-group:airflow-{airflow_environment_name}-*" 100 | ] 101 | }, 102 | { 103 | "Effect": "Allow", 104 | "Action": [ 105 | "logs:DescribeLogGroups" 106 | ], 107 | "Resource": [ 108 | "*" 109 | ] 110 | }, 111 | { 112 | "Effect": "Allow", 113 | "Action": "cloudwatch:PutMetricData", 114 | "Resource": "*" 115 | }, 116 | { 117 | "Effect": "Allow", 118 | "Action": [ 119 | "sqs:ChangeMessageVisibility", 120 | "sqs:DeleteMessage", 121 | "sqs:GetQueueAttributes", 122 | "sqs:GetQueueUrl", 123 | "sqs:ReceiveMessage", 124 | "sqs:SendMessage" 125 | ], 126 | "Resource": f"arn:aws:sqs:{cdk.Aws.REGION}:*:airflow-celery-*" 127 | }, 128 | { 129 | "Effect": "Allow", 130 | "Action": [ 131 | "kms:Decrypt", 132 | "kms:DescribeKey", 133 | "kms:GenerateDataKey*", 134 | "kms:Encrypt" 135 | ], 136 | "NotResource": f"arn:aws:kms:*:{cdk.Aws.ACCOUNT_ID}:key/*", 137 | "Condition": { 138 | "StringLike": { 139 | "kms:ViaService": [ 140 | f"sqs.{cdk.Aws.REGION}.amazonaws.com" 141 | ] 142 | } 143 | } 144 | }, 145 | { 146 | "Effect": "Allow", 147 | "Action": [ 148 | "lambda:InvokeFunction" 149 | ], 150 | "Resource": f"{start_pipeline.function_arn}*" 151 | } 152 | ] 153 | } 154 | 155 | airflow_role = iam.Role( 156 | self, 157 | "AirflowRole", 158 | assumed_by=iam.CompositePrincipal( 159 | iam.ServicePrincipal("airflow.amazonaws.com"), 160 | iam.ServicePrincipal("airflow-env.amazonaws.com") 161 | ), 162 | inline_policies={ 163 | "AirflowRole-InlinePolicy": iam.PolicyDocument.from_json(airflow_policy_document) 164 | }, 165 | path="/service-role/" 166 | ) 167 | airflow_role.add_managed_policy(policy=iam.ManagedPolicy.from_aws_managed_policy_name("AmazonSageMakerFullAccess")) 168 | airflow_role.add_managed_policy(policy=iam.ManagedPolicy.from_aws_managed_policy_name("AmazonSageMakerFeatureStoreAccess")) 169 | data_bucket.grant_read_write(airflow_role) 170 | data_bucket_param.grant_read(airflow_role) 171 | group_name_param.grant_read(airflow_role) 172 | 173 | vpc = ec2.Vpc( 174 | self, 175 | "Airflow-VPC", 176 | cidr="10.0.0.0/16", 177 | max_azs=2, 178 | subnet_configuration=[ 179 | ec2.SubnetConfiguration( 180 | name="AirflowPublicSubnet", 181 | subnet_type=ec2.SubnetType.PUBLIC, 182 | cidr_mask=24 183 | ), 184 | ec2.SubnetConfiguration( 185 | name="AirflowPrivateSubnet", 186 | subnet_type=ec2.SubnetType.PRIVATE_WITH_NAT, 187 | cidr_mask=24 188 | ) 189 | ], 190 | nat_gateways=2, 191 | enable_dns_hostnames=True, 192 | enable_dns_support=True 193 | ) 194 | 195 | airflow_sg = ec2.SecurityGroup( 196 | self, 197 | "Airflow-SG", 198 | vpc=vpc, 199 | description="Airflow Internal Traffic", 200 | security_group_name=f"{airflow_environment_name}-sg" 201 | ) 202 | airflow_sg.connections.allow_internally(ec2.Port.all_traffic(), "MWAA") 203 | 204 | airflow_subnet_ids = list(map(lambda x: x.subnet_id, vpc.private_subnets)) 205 | 206 | airflow_network = mwaa.CfnEnvironment.NetworkConfigurationProperty( 207 | security_group_ids=[ 208 | airflow_sg.security_group_id 209 | ], 210 | subnet_ids=airflow_subnet_ids 211 | ) 212 | 213 | airflow_environment = mwaa.CfnEnvironment( 214 | self, 215 | "Airflow-Environment", 216 | name=airflow_environment_name, 217 | airflow_version="2.0.2", 218 | airflow_configuration_options={ 219 | "core.default_timezone": "utc", 220 | "logging.logging_level": "INFO" 221 | }, 222 | execution_role_arn=airflow_role.role_arn, 223 | environment_class="mw1.small", 224 | max_workers=5, 225 | source_bucket_arn=data_bucket.bucket_arn, 226 | dag_s3_path="airflow/dags", 227 | requirements_s3_path="airflow/requirements.txt", 228 | logging_configuration=mwaa.CfnEnvironment.LoggingConfigurationProperty( 229 | dag_processing_logs=mwaa.CfnEnvironment.ModuleLoggingConfigurationProperty( 230 | enabled=True, 231 | log_level="INFO" 232 | ), 233 | ), 234 | network_configuration=airflow_network, 235 | webserver_access_mode="PUBLIC_ONLY" 236 | ) 237 | 238 | artifacts_deployment = s3_deployment.BucketDeployment( 239 | self, 240 | "Deploy-Airflow-Artifacts", 241 | sources=[ 242 | s3_deployment.Source.asset(os.path.join(os.path.dirname(__file__), "../../airflow")) 243 | ], 244 | destination_bucket=data_bucket, 245 | destination_key_prefix="airflow", 246 | retain_on_delete=False 247 | ) 248 | airflow_environment.node.add_dependency(artifacts_deployment) 249 | 250 | lambda_param = ssm.StringParameter( 251 | self, 252 | "Release-Change-Parameter", 253 | parameter_name="ReleaseChangeLambda", 254 | description="SSM Parameter for the releaseChange Lambda Function", 255 | string_value=start_pipeline.function_name 256 | ) 257 | lambda_param.grant_read(airflow_role) 258 | -------------------------------------------------------------------------------- /Chapter10/Files/cdk/requirements.txt: -------------------------------------------------------------------------------- 1 | aws-cdk-lib==2.3.0 2 | constructs>=10.0.0,<11.0.0 3 | aws-cdk.aws-apigatewayv2-alpha==2.3.0a0 4 | aws-cdk.aws-apigatewayv2-integrations-alpha==2.3.0a0 5 | -------------------------------------------------------------------------------- /Chapter10/Files/lambda/createExperiment/index.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import boto3 4 | import botocore 5 | from botocore.exceptions import ClientError 6 | 7 | logger = logging.getLogger() 8 | logger.setLevel(logging.INFO) 9 | cp = boto3.client("codepipeline") 10 | sm = boto3.client("sagemaker") 11 | 12 | 13 | def lambda_handler(event, context): 14 | logger.debug("## Environment Variables ##") 15 | logger.debug(os.environ) 16 | logger.debug("## Event ##") 17 | logger.debug(event) 18 | logger.info('Creating SageMaker Experiment') 19 | 20 | if ("modelName" in event): 21 | model_name = event["modelName"] 22 | else: 23 | raise KeyError("'Model Name' not found in Lambda event!") 24 | 25 | if ("pipelineName" in event): 26 | pipeline_name = event["pipelineName"] 27 | else: 28 | raise KeyError("'Pipeline Name' not found in Lambda event!") 29 | 30 | if ("stageName" in event): 31 | stage_name = event["stageName"] 32 | else: 33 | raise KeyError("'Pipeline Stage Name' not in Lambda event!") 34 | 35 | if ("actionName" in event): 36 | action_name = event["actionName"] 37 | else: 38 | raise KeyError("'Pipeline Action Name' not in Lambda event!") 39 | 40 | if ("dataBucket" in event): 41 | data_bucket = event["dataBucket"] 42 | else: 43 | raise KeyError("'Data Bucket Name' not found in Lambda event!") 44 | 45 | execution_id = get_executionId(pipeline_name, stage_name, action_name) 46 | experiment_name, trial_name = create_experiment(model_name, execution_id) 47 | 48 | payload = { 49 | "statusCode": 200, 50 | "executionId": execution_id, 51 | "experimentName": experiment_name, 52 | "trialName": trial_name, 53 | "processingJobName": f"{model_name}-processing-{execution_id}", 54 | "processingCodeInput": f"s3://{data_bucket}/scripts/preprocessing.py", 55 | "processingTrainingOutput": f"s3://{data_bucket}/{execution_id}/input/training", 56 | "processingTestingOutput": f"s3://{data_bucket}/{execution_id}/input/testing", 57 | "processingBaselineOutput": f"s3://{data_bucket}/{execution_id}/input/baseline", 58 | "trainingJobName": f"{model_name}-training-{execution_id}", 59 | "trainingDataInput": f"s3://{data_bucket}/{execution_id}/input/training", 60 | "trainingModelOutput": f"s3://{data_bucket}/{execution_id}/", 61 | "evaluationJobName": f"{model_name}-evaluation-{execution_id}", 62 | "evaluationCodeInput": f"s3://{data_bucket}/scripts/evaluation.py", 63 | "evaluationDataInput": f"s3://{data_bucket}/{execution_id}/input/testing/testing.csv", 64 | "evaluationOutput": f"s3://{data_bucket}/{execution_id}/input/evaluation", 65 | "evaluationOutputFile": f"{execution_id}/input/evaluation/evaluation.json", 66 | "baselineDataInput": f"s3://{data_bucket}/{execution_id}/input/baseline/baseline.csv", 67 | } 68 | 69 | return payload 70 | 71 | 72 | def get_executionId(pipeline_name, stage_name, action_name): 73 | logger.info(f"Getting the latest CodePipeline Execution ID for {pipeline_name}") 74 | try: 75 | response = cp.get_pipeline_state(name=pipeline_name) 76 | for stageState in response["stageStates"]: 77 | if stageState["stageName"] == stage_name: 78 | for actionState in stageState["actionStates"]: 79 | if actionState["actionName"] == action_name: 80 | executionId = stageState["latestExecution"]["pipelineExecutionId"] 81 | except ClientError as e: 82 | error_message = e.response["Error"]["Message"] 83 | logger.error(error_message) 84 | raise Exception(error_message) 85 | 86 | logger.info(f"Current Pipeline Execution ID: {executionId}") 87 | return executionId 88 | 89 | 90 | def create_experiment(model_name, execution_id): 91 | experiment_name = f"{model_name.capitalize()}Experiments" 92 | trial_name = f"{model_name.capitalize()}-{execution_id}" 93 | logger.info("Getting list of SageMaker Experiments") 94 | try: 95 | response = sm.list_experiments( 96 | SortBy="Name", 97 | MaxResults=100 98 | ) 99 | names = [experiments["ExperimentName"] for experiments in response["ExperimentSummaries"]] 100 | except ClientError as e: 101 | error_message = e.response["Error"]["Message"] 102 | logger.error(error_message) 103 | raise Exception(error_message) 104 | 105 | logger.info(f"Checking if Experiment already exists") 106 | if experiment_name not in names: 107 | try: 108 | response = sm.create_experiment( 109 | ExperimentName=experiment_name, 110 | Description=f"Training Experiments for {model_name}", 111 | ) 112 | logger.info(f"Created SageMaker Experiment: {experiment_name}") 113 | except ClientError as e: 114 | error_message = e.response["Error"]["Message"] 115 | logger.error(error_message) 116 | raise Exception(error_message) 117 | 118 | logger.info(f"Creating Associated SageMaker Trial") 119 | try: 120 | response = sm.create_trial( 121 | ExperimentName=experiment_name, 122 | TrialName=trial_name 123 | ) 124 | except ClientError as e: 125 | error_message = e.response["Error"]["Message"] 126 | logger.error(error_message) 127 | raise Exception(error_message) 128 | 129 | return experiment_name, trial_name -------------------------------------------------------------------------------- /Chapter10/Files/lambda/evaluateResults/index.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import logging 4 | import boto3 5 | import botocore 6 | from botocore.exceptions import ClientError 7 | from urllib.parse import urlparse 8 | 9 | logger = logging.getLogger() 10 | logger.setLevel(logging.INFO) 11 | s3 = boto3.client("s3") 12 | ssm = boto3.client("ssm") 13 | sm = boto3.client("sagemaker") 14 | 15 | 16 | def lambda_handler(event, context): 17 | logger.debug("## Environment Variables ##") 18 | logger.debug(os.environ) 19 | logger.debug("## Event ##") 20 | logger.debug(event) 21 | 22 | if ("evaluationFile" in event): 23 | evaluation_file = event["evaluationFile"] 24 | else: 25 | raise KeyError("'S3 Key for Evaluation File' not found in Lambda event!") 26 | 27 | logger.info("Reading Evaluation Report") 28 | try: 29 | obj = s3.get_object(Bucket=os.environ["BUCKET"], Key=evaluation_file)["Body"].read() 30 | except ClientError as e: 31 | error_message = e.response["Error"]["Message"] 32 | logger.error(error_message) 33 | raise Exception(error_message) 34 | 35 | current_report = json.loads(obj) 36 | logger.info(f"Current Evaluation Report: {current_report}") 37 | current_rmse = current_report["regression_metrics"]["rmse"]["value"] 38 | 39 | logger.info("Reading Previous Model's Evaluation Report") 40 | model_package = get_package(os.environ["PACKAGE_PARAMETER"]) 41 | if model_package != "PLACEHOLDER": 42 | try: 43 | uri = sm.describe_model_package( 44 | ModelPackageName=model_package 45 | )["ModelMetrics"]["ModelQuality"]["Statistics"]["S3Uri"] 46 | bucket = urlparse(uri).netloc 47 | key = urlparse(uri).path.lstrip("/") 48 | previous_obj = s3.get_object(Bucket=bucket, Key=key)["Body"].read() 49 | except ClientError as e: 50 | error_message = e.response["Error"]["Message"] 51 | logger.error(error_message) 52 | raise Exception(error_message) 53 | 54 | previous_report = json.loads(previous_obj) 55 | logger.info(f"Previous Evaluation Report: {previous_report}") 56 | previous_rmse = previous_report ["regression_metrics"]["rmse"]["value"] 57 | 58 | if current_rmse < previous_rmse: 59 | improved = "TRUE" 60 | else: 61 | improved = "FALSE" 62 | else: 63 | improved = "TRUE" 64 | logger.info(f"Model Improved: {improved}") 65 | 66 | return { 67 | 'statusCode': 200, 68 | 'rmse': current_rmse, 69 | 'improved': improved 70 | } 71 | 72 | 73 | def get_package(parameter_name): 74 | try: 75 | package = ssm.get_parameter( 76 | Name=parameter_name 77 | )['Parameter']['Value'] 78 | 79 | return package 80 | 81 | except ClientError as e: 82 | error_message = e.response['Error']['Message'] 83 | logger.error(error_message) 84 | raise Exception(error_message) 85 | -------------------------------------------------------------------------------- /Chapter10/Files/lambda/registerModel/index.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import boto3 4 | from botocore.exceptions import ClientError 5 | 6 | sm = boto3.client("sagemaker") 7 | ssm = boto3.client("ssm") 8 | logger = logging.getLogger() 9 | logger.setLevel(logging.INFO) 10 | 11 | 12 | def lambda_handler(event, context): 13 | logger.debug("## Environment Variables ##") 14 | logger.debug(os.environ) 15 | logger.debug("## Event ##") 16 | logger.debug(event) 17 | 18 | if ("modelUri" in event): 19 | model_uri = event["modelUri"] 20 | else: 21 | raise KeyError("'Model Uri' not found in Lambda event!") 22 | 23 | if ("evaluationUri" in event): 24 | evaluation_uri = event["evaluationUri"] 25 | else: 26 | raise KeyError("'Evaluation File URI' not found in Lambda event!") 27 | 28 | if ("baselineUri" in event): 29 | baseline_uri = event["baselineUri"] 30 | else: 31 | raise KeyError("'Testing Data URI' not found in Lambda event!") 32 | 33 | if ("executionId" in event): 34 | execution_id = event["executionId"] 35 | else: 36 | raise KeyError("'Execition ID' not found in Lambda event!") 37 | 38 | request = { 39 | "InferenceSpecification": { 40 | "Containers": [ 41 | { 42 | "Image": os.environ["IMAGE_URI"], 43 | "ModelDataUrl": model_uri 44 | } 45 | ], 46 | "SupportedContentTypes": [ 47 | "text/csv" 48 | ], 49 | "SupportedRealtimeInferenceInstanceTypes": [ 50 | "ml.t2.large", 51 | "ml.c5.large", 52 | "ml.c5.xlarge" 53 | ], 54 | "SupportedResponseMIMETypes": [ 55 | "text/csv" 56 | ], 57 | "SupportedTransformInstanceTypes": [ 58 | "ml.c5.xlarge" 59 | ] 60 | }, 61 | "ModelApprovalStatus": "Approved", 62 | "MetadataProperties": { 63 | "ProjectId": execution_id, 64 | "GeneratedBy": "CDK Pipeline" 65 | }, 66 | "ModelMetrics": { 67 | "ModelQuality": { 68 | "Statistics": { 69 | "ContentType": "application/json", 70 | "S3Uri": f"s3://{os.environ['BUCKET']}/{evaluation_uri}" 71 | } 72 | } 73 | }, 74 | "ModelPackageDescription": "MLOps Production Model", 75 | "ModelPackageGroupName": os.environ["GROUP_NAME"] 76 | } 77 | 78 | try: 79 | logger.info("Creating model package.") 80 | response = sm.create_model_package(**request) 81 | model_package_arn = response["ModelPackageArn"] 82 | except ClientError as e: 83 | error_message = e.response["Error"]["Message"] 84 | logger.error(error_message) 85 | raise Exception(error_message) 86 | 87 | try: 88 | logger.info("Updating SSM Parameter with the latest model package.") 89 | response = ssm.put_parameter( 90 | Name=os.environ["PACKAGE_PARAMETER"], 91 | Value=model_package_arn, 92 | Type="String", 93 | Overwrite=True 94 | ) 95 | except ClientError as e: 96 | error_message = e.response["Error"]["Message"] 97 | logger.error(error_message) 98 | raise Exception(error_message) 99 | 100 | try: 101 | logger.info("Creating SSM Parameter with the latest copy of the testing data.") 102 | response = ssm.put_parameter( 103 | Name=os.environ["BASELINE_PARAMETER"], 104 | Value=baseline_uri, 105 | Type="String", 106 | Overwrite=True 107 | ) 108 | except ClientError as e: 109 | error_message = e.response["Error"]["Message"] 110 | logger.error(error_message) 111 | raise Exception(error_message) 112 | 113 | logger.info("Done!") 114 | return { 115 | "statusCode": 200, 116 | "PackageArn": model_package_arn, 117 | "TestingParameter": "TestingDataUri" 118 | } 119 | -------------------------------------------------------------------------------- /Chapter10/Files/lambda/registryCreator/index.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import boto3 4 | from botocore.exceptions import ClientError 5 | 6 | sm = boto3.client("sagemaker") 7 | logger = logging.getLogger() 8 | logger.setLevel(logging.INFO) 9 | 10 | 11 | def lambda_handler(event, context): 12 | logger.debug("## Environment Variables ##") 13 | logger.debug(os.environ) 14 | logger.debug("## Event ##") 15 | logger.debug(event) 16 | props = event["ResourceProperties"] 17 | group_name = props["GroupName"] 18 | 19 | if event["RequestType"] == "Create": 20 | try: 21 | response = sm.create_model_package_group( 22 | ModelPackageGroupName=group_name, 23 | ModelPackageGroupDescription="Models Package Group for Production Models", 24 | Tags=[ 25 | { 26 | "Key": "Name", 27 | "Value": group_name 28 | } 29 | ] 30 | ) 31 | package_arn = response["ModelPackageGroupArn"] 32 | logger.info(f"Created Model Model Package Group: {package_arn}") 33 | return { 34 | "PhysicalResourceId": group_name, 35 | "Data": { 36 | "ModelPackageArn": package_arn 37 | } 38 | } 39 | except ClientError as e: 40 | error_message = e.response["Error"]["Message"] 41 | logging.error(f"Failed to create Model Package Group: {error_message}") 42 | raise Exception(error_message) 43 | 44 | elif event["RequestType"] == "Delete": 45 | try: 46 | response = sm.list_model_packages( 47 | ModelPackageGroupName=group_name, 48 | ModelApprovalStatus="Approved", 49 | SortBy="CreationTime", 50 | MaxResults=100 51 | ) 52 | 53 | for model_package in response["ModelPackageSummaryList"]: 54 | sm.delete_model_package(ModelPackageName=model_package["ModelPackageArn"]) 55 | 56 | sm.delete_model_package_group(ModelPackageGroupName=group_name) 57 | logger.info(f"Deleted Model Package Group: {group_name}") 58 | return { 59 | "PhysicalResourceId": group_name, 60 | "Data":{} 61 | } 62 | 63 | except ClientError as e: 64 | error_message = e.response["Error"]["Messgae"] 65 | logger.error(f"Failed to delete Model Package Group: {error_message}") 66 | raise Exception(error_message) -------------------------------------------------------------------------------- /Chapter10/Files/lambda/releaseChange/index.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import boto3 4 | import botocore 5 | from botocore.exceptions import ClientError 6 | 7 | logger = logging.getLogger() 8 | logger.setLevel(logging.INFO) 9 | cp = boto3.client("codepipeline") 10 | 11 | 12 | def lambda_handler(event, context): 13 | logger.debug("## Environment Variables ##") 14 | logger.debug(os.environ) 15 | logger.debug("## Event ##") 16 | logger.debug(event) 17 | pipeline_name = os.environ["PIPELINE_NAME"] 18 | logger.info(f"Starting Coninuous Training release change for {pipeline_name}") 19 | try: 20 | response = cp.start_pipeline_execution( 21 | name=pipeline_name 22 | ) 23 | logger.info(f'Release Change ExecutionId: {response["pipelineExecutionId"]}') 24 | except ClientError as e: 25 | error_message = e.response["Error"]["Message"] 26 | logger.error(error_message) 27 | raise Exception(error_message) 28 | return { 29 | "statusCode": 200, 30 | "ExecutionId": response["pipelineExecutionId"] 31 | } -------------------------------------------------------------------------------- /Chapter10/Files/scripts/evaluation.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import pathlib 4 | import tarfile 5 | import pandas as pd 6 | import numpy as np 7 | import tensorflow as tf 8 | from tensorflow import keras 9 | from tensorflow.keras.models import Sequential 10 | from tensorflow.keras.layers import Dense 11 | from tensorflow.keras.optimizers import Adam 12 | from sklearn import preprocessing 13 | from sklearn.metrics import mean_squared_error 14 | 15 | 16 | def load_model(base_dir): 17 | print("Loading Model") 18 | 19 | model_path = os.path.join(base_dir, "model/model.tar.gz") 20 | with tarfile.open(model_path) as tar: 21 | tar.extractall(".") 22 | 23 | model = tf.keras.models.load_model("model.h5") 24 | model.compile(optimizer="adam", loss="mse") 25 | return model 26 | 27 | 28 | def save_report(directory, report): 29 | print("Saving Evaluation Report") 30 | pathlib.Path(directory).mkdir(parents=True, exist_ok=True) 31 | evaluation_path = f"{directory}/evaluation.json" 32 | with open(evaluation_path, "w") as f: 33 | f.write(json.dumps(report)) 34 | 35 | def save_baseline(directory, predictions, labels): 36 | print("Saving Evaluation Quality Baseline") 37 | pathlib.Path(directory).mkdir(parents=True, exist_ok=True) 38 | baseline_path = f"{directory}/baseline.csv" 39 | baseline_dict = {"prediction": predictions, "label": labels} 40 | pd.DataFrame(baseline_dict).to_csv(baseline_path, header=True, index=False) 41 | 42 | 43 | def evaluate_model(base_dir, model): 44 | print("Evaluating Model") 45 | truths = [] 46 | predictions = [] 47 | column_names = [ 48 | "rings", 49 | "length", 50 | "diameter", 51 | "height", 52 | "whole_weight", 53 | "shucked_weight", 54 | "viscera_weight", 55 | "shell_weight", 56 | "sex_F", 57 | "sex_I", 58 | "sex_M" 59 | ] 60 | data_path = os.path.join(base_dir, "data/testing.csv") 61 | data = pd.read_csv(data_path, names=column_names) 62 | y = data["rings"].to_numpy() 63 | X = data.drop(["rings"], axis=1).to_numpy() 64 | X = preprocessing.normalize(X) 65 | for row in range(len(X)): 66 | payload = [X[row].tolist()] 67 | result = model.predict(payload) 68 | print(f"Result: {result[0][0]}") 69 | predictions.append(float(result[0][0])) 70 | truths.append(float(y[row])) 71 | return truths, predictions 72 | 73 | 74 | if __name__ == "__main__": 75 | input_dir = "/opt/ml/processing/input" 76 | output_dir = "/opt/ml/processing/output/evaluation" 77 | baseline_dir = "/opt/ml/processing/output/baseline" 78 | model = load_model(input_dir) 79 | y, y_pred = evaluate_model(input_dir, model) 80 | save_baseline(baseline_dir, y_pred, y) 81 | mse = mean_squared_error(y, y_pred) 82 | print(f"Mean Squared Error: {mse}") 83 | rmse = mean_squared_error(y, y_pred, squared=False) 84 | print(f"Root Mean Squared Error: {rmse}") 85 | std = np.std(np.array(y) - np.array(y_pred)) 86 | print(f"Standard Deviation: {std}") 87 | report_dict = { 88 | "regression_metrics": { 89 | "rmse": { 90 | "value": rmse, 91 | "standard_deviation": std 92 | }, 93 | "mse": { 94 | "value": mse, 95 | "standard_deviation": std 96 | }, 97 | }, 98 | } 99 | save_report(output_dir, report_dict) 100 | -------------------------------------------------------------------------------- /Chapter10/Files/scripts/preprocessing.py: -------------------------------------------------------------------------------- 1 | import os 2 | import boto3 3 | import numpy as np 4 | import pandas as pd 5 | import awswrangler as wr 6 | from sklearn.utils import shuffle 7 | from botocore.exceptions import ClientError 8 | 9 | boto3.setup_default_session(region_name=os.environ["AWS_REGION"]) 10 | sm = boto3.client("sagemaker") 11 | 12 | 13 | def get_featurestore_params(feature_group_name): 14 | try: 15 | response = sm.describe_feature_group( 16 | FeatureGroupName=feature_group_name 17 | ) 18 | return response["OfflineStoreConfig"]["DataCatalogConfig"]["Database"], response["OfflineStoreConfig"]["DataCatalogConfig"]["TableName"] 19 | except ClientError as e: 20 | error_message = e.response["Error"]["Message"] 21 | print(error_message) 22 | raise Exception(error_message) 23 | 24 | 25 | if __name__ == "__main__": 26 | base_dir = "/opt/ml/processing" 27 | print('Loading "raw" data') 28 | fg_name = os.environ["FEATURE_GROUP_NAME"] 29 | print(f"Using Feature Group: {fg_name}") 30 | columns = ["rings", "length", "diameter", "height", "whole_weight", "shucked_weight", "viscera_weight", "shell_weight", "sex_f", "sex_i", "sex_m"] 31 | database, table = get_featurestore_params(fg_name) 32 | print("Querying Feature Store Data") 33 | query_string = f'SELECT {",".join(columns)} FROM "{table}" WHERE is_deleted=false;' 34 | featurestore_df = wr.athena.read_sql_query(query_string, database=database, ctas_approach=False) 35 | print("Shuffling Data") 36 | X = shuffle(featurestore_df).to_numpy() 37 | print("Spliting the data into training, validation and testing datasets ...") 38 | training, validation, testing = np.split(X, [int(.8*len(X)), int(.95*len(X))]) 39 | print("Saving datasets to S3") 40 | pd.DataFrame(training).to_csv(f"{base_dir}/output/training/training.csv", header=False, index=False) 41 | pd.DataFrame(validation).to_csv(f"{base_dir}/output/training/validation.csv", header=False, index=False) 42 | pd.DataFrame(testing).to_csv(f"{base_dir}/output/testing/testing.csv", header=False, index=False) 43 | -------------------------------------------------------------------------------- /Chapter10/Notebooks/SageMaker Feature Store Example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# SageMaker Feature Store Example\n", 8 | "\n", 9 | ">__NOTE:__ This Notebook uses the _Python 3 (Data Science)_ Kernel\n", 10 | "\n", 11 | "## Setup" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "import warnings\n", 21 | "import time\n", 22 | "import sagemaker\n", 23 | "import boto3\n", 24 | "import numpy as np\n", 25 | "import pandas as pd\n", 26 | "from time import gmtime, strftime\n", 27 | "from sklearn import preprocessing\n", 28 | "from sagemaker.feature_store.feature_group import FeatureGroup\n", 29 | "\n", 30 | "warnings.filterwarnings(\"ignore\")\n", 31 | "\n", 32 | "role = sagemaker.get_execution_role()\n", 33 | "sagemaker_session = sagemaker.Session()\n", 34 | "region_name = sagemaker_session.boto_region_name\n", 35 | "data_bucket = f\"\"\"{boto3.client(\"ssm\", region_name=region_name).get_parameter(Name=\"DataBucket\")[\"Parameter\"][\"Value\"]}\"\"\"\n", 36 | "prefix = \"sagemaker-featurestore\"\n", 37 | "\n", 38 | "def check_feature_group_status(feature_group):\n", 39 | " status = feature_group.describe().get(\"FeatureGroupStatus\")\n", 40 | " while status == \"Creating\":\n", 41 | " print(\"Waiting for Feature Group to be Created\")\n", 42 | " time.sleep(5)\n", 43 | " status = feature_group.describe().get(\"FeatureGroupStatus\")\n", 44 | " print(f\"FeatureGroup {feature_group.name} successfully created.\")\n", 45 | "\n", 46 | "def check_data_availability(feature_group, bucket):\n", 47 | " s3_client = sagemaker_session.boto_session.client('s3', region_name=region_name)\n", 48 | " offline_store_contents = None\n", 49 | " feature_group_s3_uri = feature_group.describe().get(\"OfflineStoreConfig\").get(\"S3StorageConfig\").get(\"ResolvedOutputS3Uri\")\n", 50 | " feature_group_s3_prefix = feature_group_s3_uri.replace(f\"s3://{bucket}/\", \"\")\n", 51 | " while offline_store_contents is None:\n", 52 | " objects_in_bucket = s3_client.list_objects(Bucket=bucket, Prefix=feature_group_s3_prefix)\n", 53 | " if ('Contents' in objects_in_bucket and len(objects_in_bucket['Contents']) > 1):\n", 54 | " offline_store_contents = objects_in_bucket['Contents']\n", 55 | " else:\n", 56 | " print('Waiting for data into the offline store...\\n')\n", 57 | " time.sleep(60)\n", 58 | " print('Data available.') " 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "---\n", 66 | "\n", 67 | "## Data Preparation\n", 68 | "\n", 69 | "### Import Python Libraries and Helper Funcitons" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "### Download the Data" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "column_names = [\"sex\", \"length\", \"diameter\", \"height\", \"whole_weight\", \"shucked_weight\", \"viscera_weight\", \"shell_weight\", \"rings\"]\n", 86 | "abalone_data = pd.read_csv(\"http://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data\", names=column_names)\n", 87 | "abalone_data.head()" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "### Data Processing and Feature Engineering" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "data = abalone_data[[\"rings\", \"sex\", \"length\", \"diameter\", \"height\", \"whole_weight\", \"shucked_weight\", \"viscera_weight\", \"shell_weight\"]]\n", 104 | "processed_data = pd.get_dummies(data)\n", 105 | "processed_data.head()" 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": {}, 111 | "source": [ 112 | "---\n", 113 | "\n", 114 | "## SageMaker Feature Store\n", 115 | "\n", 116 | "### Define the Feature Group" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "fg_name = f\"abalone-fg-{strftime('%d-%H-%M-%S', gmtime())}\"\n", 126 | "fg = FeatureGroup(name=fg_name, sagemaker_session=sagemaker_session)" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "### Create Ingestion Timestamp Identifier (Event Time Feature)" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "time_stamp = int(round(time.time()))\n", 143 | "processed_data[\"TimeStamp\"] = pd.Series([time_stamp] * len(processed_data), dtype=\"float64\")" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "### Create Feature Definition Schema" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "fg.load_feature_definitions(data_frame=processed_data)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "### Create the Feature Group" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "fg.create(\n", 176 | " s3_uri=f\"s3://{data_bucket}/{prefix}\",\n", 177 | " record_identifier_name=\"rings\",\n", 178 | " event_time_feature_name=\"TimeStamp\",\n", 179 | " role_arn=role,\n", 180 | " enable_online_store=False\n", 181 | ")\n", 182 | "\n", 183 | "check_feature_group_status(fg)" 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "### Ingest Data into the Feature Group" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [ 199 | "fg.ingest(data_frame=processed_data, max_workers=5, wait=True)\n", 200 | "\n", 201 | "check_data_availability(fg, data_bucket)" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": {}, 207 | "source": [ 208 | "### Describe the Feature Group\n", 209 | "\n", 210 | ">__NOTE:__ Make sure to capture the name of the Feature Group _(FeatureGroupName)_, as we will be using this later." 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": {}, 217 | "outputs": [], 218 | "source": [ 219 | "fg.describe()" 220 | ] 221 | } 222 | ], 223 | "metadata": { 224 | "instance_type": "ml.t3.medium", 225 | "kernelspec": { 226 | "display_name": "Python 3 (Data Science)", 227 | "language": "python", 228 | "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/datascience-1.0" 229 | }, 230 | "language_info": { 231 | "codemirror_mode": { 232 | "name": "ipython", 233 | "version": 3 234 | }, 235 | "file_extension": ".py", 236 | "mimetype": "text/x-python", 237 | "name": "python", 238 | "nbconvert_exporter": "python", 239 | "pygments_lexer": "ipython3", 240 | "version": "3.7.10" 241 | } 242 | }, 243 | "nbformat": 4, 244 | "nbformat_minor": 4 245 | } 246 | -------------------------------------------------------------------------------- /Chapter10/www/404.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 404 pages 8 | 9 | 10 | 11 | 12 | 13 |
14 | 15 | 17 | 18 | 19 | 4 20 | 4 21 | 22 | 23 | 24 | 26 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 45 | 46 | 47 | 48 | 49 | 50 |
51 | Page not found 52 |
53 |
54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /Chapter10/www/css/error-page.css: -------------------------------------------------------------------------------- 1 | *{ 2 | padding:0px; 3 | margin:0px; 4 | } 5 | body{ 6 | background:black; 7 | } 8 | .st0{font-family:'FootlightMTLight';} 9 | .st1{font-size:83.0285px;} 10 | .st2{fill:gray;} 11 | 12 | svg{ 13 | width: 500px; 14 | height: 400px; 15 | text-align: center; 16 | /* fill: #ff6a00; */ 17 | fill: #ff06ac; 18 | } 19 | path#XMLID_5_ { 20 | fill: #ff06ac; 21 | /* fill: #ff6a00; */ 22 | filter: url(#blurFilter4); 23 | } 24 | path#XMLID_11_ ,path#XMLID_2_ { 25 | fill: #ff06ac; 26 | /* fill: #ff6a00; */ 27 | } 28 | .circle{ 29 | animation: out 2s infinite ease-out; 30 | fill: #ff06ac; 31 | /* fill: #ff6a00; */ 32 | } 33 | 34 | #container{ 35 | text-align:center; 36 | } 37 | .message{ 38 | /* color: #ff6a00; */ 39 | color: #ff06ac; 40 | } 41 | .message:after{ 42 | content:"]"; 43 | } 44 | .message:before{ 45 | content:"["; 46 | } 47 | 48 | .message:after, .message:before { 49 | color: #ff06ac; 50 | /* color: #ff6a00; */ 51 | font-size: 20px; 52 | -webkit-animation-name: opacity; 53 | -webkit-animation-duration: 2s; 54 | -webkit-animation-iteration-count: infinite; 55 | -webkit-animation-name: opacity; 56 | animation-name: opacity; 57 | -webkit-animation-duration: 2s; 58 | animation-duration: 2s; 59 | -webkit-animation-iteration-count: infinite; 60 | animation-iteration-count: infinite; 61 | margin:0 50px; 62 | } 63 | 64 | @-webkit-keyframes opacity { 65 | 0%, 100% { 66 | opacity: 0; 67 | } 68 | 50% { 69 | opacity: 1; 70 | } 71 | } 72 | 73 | @keyframes opacity { 74 | 0%, 100% { 75 | opacity: 0; 76 | } 77 | 50% { 78 | opacity: 1; 79 | } 80 | } 81 | 82 | @keyframes out { 83 | 0% {r:1; opacity: 0.9 ;} 84 | 25%{r:5; opacity: 0.3 ;} 85 | 50%{r:10; opacity: 0.2 ;} 86 | 75%{r:15;opacity:0.1;} 87 | 100% {r:20;opacity:0;} 88 | } 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | -------------------------------------------------------------------------------- /Chapter10/www/css/main-page.css: -------------------------------------------------------------------------------- 1 | /*! 2 | * Start Bootstrap - One Page Wonder v5.0.8 (https://startbootstrap.com/theme/one-page-wonder) 3 | * Copyright 2013-2020 Start Bootstrap 4 | * Licensed under MIT (https://github.com/StartBootstrap/startbootstrap-one-page-wonder/blob/master/LICENSE) 5 | */ 6 | 7 | body { 8 | font-family: 'Lato'; 9 | } 10 | 11 | h1, 12 | h2, 13 | h3, 14 | h4, 15 | h5, 16 | h6 { 17 | font-family: 'Catamaran'; 18 | font-weight: 800 !important; 19 | } 20 | 21 | .btn-xl { 22 | text-transform: uppercase; 23 | padding: 1.5rem 3rem; 24 | font-size: 0.9rem; 25 | font-weight: 700; 26 | letter-spacing: 0.1rem; 27 | } 28 | 29 | .bg-black { 30 | background-color: #000 !important; 31 | } 32 | 33 | .rounded-pill { 34 | border-radius: 5rem; 35 | } 36 | 37 | .navbar-custom { 38 | padding-top: 1rem; 39 | padding-bottom: 1rem; 40 | background-color: rgba(0, 0, 0, 0.7); 41 | } 42 | 43 | .navbar-custom .navbar-brand { 44 | text-transform: uppercase; 45 | font-size: 1rem; 46 | letter-spacing: 0.1rem; 47 | font-weight: 700; 48 | } 49 | 50 | .navbar-custom .navbar-nav .nav-item .nav-link { 51 | text-transform: uppercase; 52 | font-size: 0.8rem; 53 | font-weight: 700; 54 | letter-spacing: 0.1rem; 55 | } 56 | 57 | header.masthead { 58 | position: relative; 59 | overflow: hidden; 60 | padding-top: calc(7rem + 72px); 61 | padding-bottom: 7rem; 62 | background: linear-gradient(0deg, #ff6a00 0%, #ee0979 100%); 63 | background-repeat: no-repeat; 64 | background-position: center center; 65 | background-attachment: scroll; 66 | background-size: cover; 67 | } 68 | 69 | header.masthead .masthead-content { 70 | z-index: 1; 71 | position: relative; 72 | } 73 | 74 | header.masthead .masthead-content .masthead-heading { 75 | font-size: 4rem; 76 | } 77 | 78 | header.masthead .masthead-content .masthead-subheading { 79 | font-size: 2rem; 80 | } 81 | 82 | header.masthead .bg-circle { 83 | z-index: 0; 84 | position: absolute; 85 | border-radius: 100%; 86 | background: linear-gradient(0deg, #ee0979 0%, #ff6a00 100%); 87 | } 88 | 89 | header.masthead .bg-circle-1 { 90 | height: 90rem; 91 | width: 90rem; 92 | bottom: -55rem; 93 | left: -55rem; 94 | } 95 | 96 | header.masthead .bg-circle-2 { 97 | height: 50rem; 98 | width: 50rem; 99 | top: -25rem; 100 | right: -25rem; 101 | } 102 | 103 | header.masthead .bg-circle-3 { 104 | height: 20rem; 105 | width: 20rem; 106 | bottom: -10rem; 107 | right: 5%; 108 | } 109 | 110 | header.masthead .bg-circle-4 { 111 | height: 30rem; 112 | width: 30rem; 113 | top: -5rem; 114 | right: 35%; 115 | } 116 | 117 | @media (min-width: 992px) { 118 | header.masthead { 119 | padding-top: calc(10rem + 55px); 120 | padding-bottom: 10rem; 121 | } 122 | header.masthead .masthead-content .masthead-heading { 123 | font-size: 6rem; 124 | } 125 | header.masthead .masthead-content .masthead-subheading { 126 | font-size: 4rem; 127 | } 128 | } 129 | 130 | .bg-primary { 131 | background-color: #ee0979 !important; 132 | } 133 | 134 | .btn-primary { 135 | background-color: #ee0979; 136 | /* border-color: #ee0979; */ 137 | border-color: #6600ba; 138 | } 139 | 140 | .btn-primary:active, .btn-primary:focus, .btn-primary:hover { 141 | background-color: #bd0760 !important; 142 | border-color: #bd0760 !important; 143 | } 144 | 145 | .btn-primary:focus { 146 | box-shadow: 0 0 0 0.2rem rgba(238, 9, 121, 0.5); 147 | } 148 | 149 | .btn-secondary { 150 | background-color: #ff6a00; 151 | border-color: #ff6a00; 152 | } 153 | 154 | .btn-secondary:active, .btn-secondary:focus, .btn-secondary:hover { 155 | background-color: #cc5500 !important; 156 | border-color: #cc5500 !important; 157 | } 158 | 159 | .btn-secondary:focus { 160 | box-shadow: 0 0 0 0.2rem rgba(255, 106, 0, 0.5); 161 | } 162 | 163 | .modal-header, h4, .close { 164 | /*background-color: #008CBA;*/ 165 | /* background-color: #6600ba; */ 166 | background-color: #ff06ac; 167 | color:white !important; 168 | text-align: center; 169 | font-size: 30px; 170 | } 171 | .modal-footer { 172 | background-color: #f9f9f9; 173 | } 174 | -------------------------------------------------------------------------------- /Chapter10/www/img/team-work.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Automated-Machine-Learning-on-AWS/fa106b966823211a4588286656b6cf01cc6c14b2/Chapter10/www/img/team-work.jpeg -------------------------------------------------------------------------------- /Chapter10/www/img/undersea-abalone.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Automated-Machine-Learning-on-AWS/fa106b966823211a4588286656b6cf01cc6c14b2/Chapter10/www/img/undersea-abalone.jpg -------------------------------------------------------------------------------- /Chapter10/www/img/video-monitoring.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Automated-Machine-Learning-on-AWS/fa106b966823211a4588286656b6cf01cc6c14b2/Chapter10/www/img/video-monitoring.jpeg -------------------------------------------------------------------------------- /Chapter10/www/robots.txt: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: / -------------------------------------------------------------------------------- /Chapter10/www/scss/_bootstrap-overrides.scss: -------------------------------------------------------------------------------- 1 | .bg-primary { 2 | background-color: $primary !important; 3 | } 4 | 5 | .btn-primary { 6 | background-color: $primary; 7 | border-color: $primary; 8 | &:active, 9 | &:focus, 10 | &:hover { 11 | background-color: darken($primary, 10%) !important; 12 | border-color: darken($primary, 10%) !important; 13 | } 14 | &:focus { 15 | box-shadow: 0 0 0 0.2rem fade-out($primary, 0.5); 16 | } 17 | } 18 | 19 | .btn-secondary { 20 | background-color: $secondary; 21 | border-color: $secondary; 22 | &:active, 23 | &:focus, 24 | &:hover { 25 | background-color: darken($secondary, 10%) !important; 26 | border-color: darken($secondary, 10%) !important; 27 | } 28 | &:focus { 29 | box-shadow: 0 0 0 0.2rem fade-out($secondary, 0.5); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /Chapter10/www/scss/_global.scss: -------------------------------------------------------------------------------- 1 | body { 2 | @include body-font; 3 | } 4 | 5 | h1, 6 | h2, 7 | h3, 8 | h4, 9 | h5, 10 | h6 { 11 | @include heading-font; 12 | } 13 | 14 | .btn-xl { 15 | text-transform: uppercase; 16 | padding: 1.5rem 3rem; 17 | font-size: 0.9rem; 18 | font-weight: 700; 19 | letter-spacing: 0.1rem; 20 | } 21 | 22 | .bg-black { 23 | background-color: $black !important; 24 | } 25 | 26 | .rounded-pill { 27 | border-radius: 5rem; 28 | } 29 | -------------------------------------------------------------------------------- /Chapter10/www/scss/_masthead.scss: -------------------------------------------------------------------------------- 1 | header.masthead { 2 | position: relative; 3 | overflow: hidden; 4 | padding-top: calc(7rem + 72px); 5 | padding-bottom: 7rem; 6 | background: linear-gradient(0deg, $secondary 0%, $primary 100%); 7 | background-repeat: no-repeat; 8 | background-position: center center; 9 | background-attachment: scroll; 10 | background-size: cover; 11 | .masthead-content { 12 | z-index: 1; 13 | position: relative; 14 | .masthead-heading { 15 | font-size: 4rem; 16 | } 17 | .masthead-subheading { 18 | font-size: 2rem; 19 | } 20 | } 21 | .bg-circle { 22 | z-index: 0; 23 | position: absolute; 24 | border-radius: 100%; 25 | background: linear-gradient(0deg, $primary 0%, $secondary 100%); 26 | } 27 | .bg-circle-1 { 28 | height: 90rem; 29 | width: 90rem; 30 | bottom: -55rem; 31 | left: -55rem; 32 | } 33 | .bg-circle-2 { 34 | height: 50rem; 35 | width: 50rem; 36 | top: -25rem; 37 | right: -25rem; 38 | } 39 | .bg-circle-3 { 40 | height: 20rem; 41 | width: 20rem; 42 | bottom: -10rem; 43 | right: 5%; 44 | } 45 | .bg-circle-4 { 46 | height: 30rem; 47 | width: 30rem; 48 | top: -5rem; 49 | right: 35%; 50 | } 51 | } 52 | 53 | @media (min-width: 992px) { 54 | header.masthead { 55 | padding-top: calc(10rem + 55px); 56 | padding-bottom: 10rem; 57 | .masthead-content { 58 | .masthead-heading { 59 | font-size: 6rem; 60 | } 61 | .masthead-subheading { 62 | font-size: 4rem; 63 | } 64 | } 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /Chapter10/www/scss/_mixins.scss: -------------------------------------------------------------------------------- 1 | @mixin heading-font { 2 | font-family: 'Catamaran'; 3 | font-weight: 800 !important; 4 | } 5 | 6 | @mixin body-font { 7 | font-family: 'Lato'; 8 | } 9 | -------------------------------------------------------------------------------- /Chapter10/www/scss/_navbar.scss: -------------------------------------------------------------------------------- 1 | .navbar-custom { 2 | padding-top: 1rem; 3 | padding-bottom: 1rem; 4 | background-color: fade-out($black, 0.3); 5 | .navbar-brand { 6 | text-transform: uppercase; 7 | font-size: 1rem; 8 | letter-spacing: 0.1rem; 9 | font-weight: 700; 10 | } 11 | .navbar-nav { 12 | .nav-item { 13 | .nav-link { 14 | text-transform: uppercase; 15 | font-size: 0.8rem; 16 | font-weight: 700; 17 | letter-spacing: 0.1rem; 18 | } 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /Chapter10/www/scss/_variables.scss: -------------------------------------------------------------------------------- 1 | // Variables 2 | 3 | // Restated Bootstrap Variables 4 | 5 | $white: #fff !default; 6 | $gray-100: #f8f9fa !default; 7 | $gray-200: #e9ecef !default; 8 | $gray-300: #dee2e6 !default; 9 | $gray-400: #ced4da !default; 10 | $gray-500: #adb5bd !default; 11 | $gray-600: #868e96 !default; 12 | $gray-700: #495057 !default; 13 | $gray-800: #343a40 !default; 14 | $gray-900: #212529 !default; 15 | $black: #000 !default; 16 | 17 | $primary: #ee0979 !default; 18 | $secondary: #ff6a00 !default; 19 | -------------------------------------------------------------------------------- /Chapter10/www/scss/one-page-wonder.scss: -------------------------------------------------------------------------------- 1 | // Core variables and mixins 2 | @import "variables.scss"; 3 | @import "mixins.scss"; 4 | // Global CSS 5 | @import "global.scss"; 6 | // Components 7 | @import "navbar.scss"; 8 | @import "masthead.scss"; 9 | @import "bootstrap-overrides.scss"; 10 | -------------------------------------------------------------------------------- /Chapter10/www/vendor/bootstrap/css/bootstrap-reboot.css: -------------------------------------------------------------------------------- 1 | /*! 2 | * Bootstrap Reboot v4.5.3 (https://getbootstrap.com/) 3 | * Copyright 2011-2020 The Bootstrap Authors 4 | * Copyright 2011-2020 Twitter, Inc. 5 | * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE) 6 | * Forked from Normalize.css, licensed MIT (https://github.com/necolas/normalize.css/blob/master/LICENSE.md) 7 | */ 8 | *, 9 | *::before, 10 | *::after { 11 | box-sizing: border-box; 12 | } 13 | 14 | html { 15 | font-family: sans-serif; 16 | line-height: 1.15; 17 | -webkit-text-size-adjust: 100%; 18 | -webkit-tap-highlight-color: rgba(0, 0, 0, 0); 19 | } 20 | 21 | article, aside, figcaption, figure, footer, header, hgroup, main, nav, section { 22 | display: block; 23 | } 24 | 25 | body { 26 | margin: 0; 27 | font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji"; 28 | font-size: 1rem; 29 | font-weight: 400; 30 | line-height: 1.5; 31 | color: #212529; 32 | text-align: left; 33 | background-color: #fff; 34 | } 35 | 36 | [tabindex="-1"]:focus:not(:focus-visible) { 37 | outline: 0 !important; 38 | } 39 | 40 | hr { 41 | box-sizing: content-box; 42 | height: 0; 43 | overflow: visible; 44 | } 45 | 46 | h1, h2, h3, h4, h5, h6 { 47 | margin-top: 0; 48 | margin-bottom: 0.5rem; 49 | } 50 | 51 | p { 52 | margin-top: 0; 53 | margin-bottom: 1rem; 54 | } 55 | 56 | abbr[title], 57 | abbr[data-original-title] { 58 | text-decoration: underline; 59 | -webkit-text-decoration: underline dotted; 60 | text-decoration: underline dotted; 61 | cursor: help; 62 | border-bottom: 0; 63 | -webkit-text-decoration-skip-ink: none; 64 | text-decoration-skip-ink: none; 65 | } 66 | 67 | address { 68 | margin-bottom: 1rem; 69 | font-style: normal; 70 | line-height: inherit; 71 | } 72 | 73 | ol, 74 | ul, 75 | dl { 76 | margin-top: 0; 77 | margin-bottom: 1rem; 78 | } 79 | 80 | ol ol, 81 | ul ul, 82 | ol ul, 83 | ul ol { 84 | margin-bottom: 0; 85 | } 86 | 87 | dt { 88 | font-weight: 700; 89 | } 90 | 91 | dd { 92 | margin-bottom: .5rem; 93 | margin-left: 0; 94 | } 95 | 96 | blockquote { 97 | margin: 0 0 1rem; 98 | } 99 | 100 | b, 101 | strong { 102 | font-weight: bolder; 103 | } 104 | 105 | small { 106 | font-size: 80%; 107 | } 108 | 109 | sub, 110 | sup { 111 | position: relative; 112 | font-size: 75%; 113 | line-height: 0; 114 | vertical-align: baseline; 115 | } 116 | 117 | sub { 118 | bottom: -.25em; 119 | } 120 | 121 | sup { 122 | top: -.5em; 123 | } 124 | 125 | a { 126 | color: #007bff; 127 | text-decoration: none; 128 | background-color: transparent; 129 | } 130 | 131 | a:hover { 132 | color: #0056b3; 133 | text-decoration: underline; 134 | } 135 | 136 | a:not([href]):not([class]) { 137 | color: inherit; 138 | text-decoration: none; 139 | } 140 | 141 | a:not([href]):not([class]):hover { 142 | color: inherit; 143 | text-decoration: none; 144 | } 145 | 146 | pre, 147 | code, 148 | kbd, 149 | samp { 150 | font-family: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; 151 | font-size: 1em; 152 | } 153 | 154 | pre { 155 | margin-top: 0; 156 | margin-bottom: 1rem; 157 | overflow: auto; 158 | -ms-overflow-style: scrollbar; 159 | } 160 | 161 | figure { 162 | margin: 0 0 1rem; 163 | } 164 | 165 | img { 166 | vertical-align: middle; 167 | border-style: none; 168 | } 169 | 170 | svg { 171 | overflow: hidden; 172 | vertical-align: middle; 173 | } 174 | 175 | table { 176 | border-collapse: collapse; 177 | } 178 | 179 | caption { 180 | padding-top: 0.75rem; 181 | padding-bottom: 0.75rem; 182 | color: #6c757d; 183 | text-align: left; 184 | caption-side: bottom; 185 | } 186 | 187 | th { 188 | text-align: inherit; 189 | text-align: -webkit-match-parent; 190 | } 191 | 192 | label { 193 | display: inline-block; 194 | margin-bottom: 0.5rem; 195 | } 196 | 197 | button { 198 | border-radius: 0; 199 | } 200 | 201 | button:focus { 202 | outline: 1px dotted; 203 | outline: 5px auto -webkit-focus-ring-color; 204 | } 205 | 206 | input, 207 | button, 208 | select, 209 | optgroup, 210 | textarea { 211 | margin: 0; 212 | font-family: inherit; 213 | font-size: inherit; 214 | line-height: inherit; 215 | } 216 | 217 | button, 218 | input { 219 | overflow: visible; 220 | } 221 | 222 | button, 223 | select { 224 | text-transform: none; 225 | } 226 | 227 | [role="button"] { 228 | cursor: pointer; 229 | } 230 | 231 | select { 232 | word-wrap: normal; 233 | } 234 | 235 | button, 236 | [type="button"], 237 | [type="reset"], 238 | [type="submit"] { 239 | -webkit-appearance: button; 240 | } 241 | 242 | button:not(:disabled), 243 | [type="button"]:not(:disabled), 244 | [type="reset"]:not(:disabled), 245 | [type="submit"]:not(:disabled) { 246 | cursor: pointer; 247 | } 248 | 249 | button::-moz-focus-inner, 250 | [type="button"]::-moz-focus-inner, 251 | [type="reset"]::-moz-focus-inner, 252 | [type="submit"]::-moz-focus-inner { 253 | padding: 0; 254 | border-style: none; 255 | } 256 | 257 | input[type="radio"], 258 | input[type="checkbox"] { 259 | box-sizing: border-box; 260 | padding: 0; 261 | } 262 | 263 | textarea { 264 | overflow: auto; 265 | resize: vertical; 266 | } 267 | 268 | fieldset { 269 | min-width: 0; 270 | padding: 0; 271 | margin: 0; 272 | border: 0; 273 | } 274 | 275 | legend { 276 | display: block; 277 | width: 100%; 278 | max-width: 100%; 279 | padding: 0; 280 | margin-bottom: .5rem; 281 | font-size: 1.5rem; 282 | line-height: inherit; 283 | color: inherit; 284 | white-space: normal; 285 | } 286 | 287 | progress { 288 | vertical-align: baseline; 289 | } 290 | 291 | [type="number"]::-webkit-inner-spin-button, 292 | [type="number"]::-webkit-outer-spin-button { 293 | height: auto; 294 | } 295 | 296 | [type="search"] { 297 | outline-offset: -2px; 298 | -webkit-appearance: none; 299 | } 300 | 301 | [type="search"]::-webkit-search-decoration { 302 | -webkit-appearance: none; 303 | } 304 | 305 | ::-webkit-file-upload-button { 306 | font: inherit; 307 | -webkit-appearance: button; 308 | } 309 | 310 | output { 311 | display: inline-block; 312 | } 313 | 314 | summary { 315 | display: list-item; 316 | cursor: pointer; 317 | } 318 | 319 | template { 320 | display: none; 321 | } 322 | 323 | [hidden] { 324 | display: none !important; 325 | } 326 | /*# sourceMappingURL=bootstrap-reboot.css.map */ -------------------------------------------------------------------------------- /Chapter10/www/vendor/bootstrap/css/bootstrap-reboot.min.css: -------------------------------------------------------------------------------- 1 | /*! 2 | * Bootstrap Reboot v4.5.3 (https://getbootstrap.com/) 3 | * Copyright 2011-2020 The Bootstrap Authors 4 | * Copyright 2011-2020 Twitter, Inc. 5 | * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE) 6 | * Forked from Normalize.css, licensed MIT (https://github.com/necolas/normalize.css/blob/master/LICENSE.md) 7 | */*,::after,::before{box-sizing:border-box}html{font-family:sans-serif;line-height:1.15;-webkit-text-size-adjust:100%;-webkit-tap-highlight-color:transparent}article,aside,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}body{margin:0;font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,"Noto Sans",sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji";font-size:1rem;font-weight:400;line-height:1.5;color:#212529;text-align:left;background-color:#fff}[tabindex="-1"]:focus:not(:focus-visible){outline:0!important}hr{box-sizing:content-box;height:0;overflow:visible}h1,h2,h3,h4,h5,h6{margin-top:0;margin-bottom:.5rem}p{margin-top:0;margin-bottom:1rem}abbr[data-original-title],abbr[title]{text-decoration:underline;-webkit-text-decoration:underline dotted;text-decoration:underline dotted;cursor:help;border-bottom:0;-webkit-text-decoration-skip-ink:none;text-decoration-skip-ink:none}address{margin-bottom:1rem;font-style:normal;line-height:inherit}dl,ol,ul{margin-top:0;margin-bottom:1rem}ol ol,ol ul,ul ol,ul ul{margin-bottom:0}dt{font-weight:700}dd{margin-bottom:.5rem;margin-left:0}blockquote{margin:0 0 1rem}b,strong{font-weight:bolder}small{font-size:80%}sub,sup{position:relative;font-size:75%;line-height:0;vertical-align:baseline}sub{bottom:-.25em}sup{top:-.5em}a{color:#007bff;text-decoration:none;background-color:transparent}a:hover{color:#0056b3;text-decoration:underline}a:not([href]):not([class]){color:inherit;text-decoration:none}a:not([href]):not([class]):hover{color:inherit;text-decoration:none}code,kbd,pre,samp{font-family:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace;font-size:1em}pre{margin-top:0;margin-bottom:1rem;overflow:auto;-ms-overflow-style:scrollbar}figure{margin:0 0 1rem}img{vertical-align:middle;border-style:none}svg{overflow:hidden;vertical-align:middle}table{border-collapse:collapse}caption{padding-top:.75rem;padding-bottom:.75rem;color:#6c757d;text-align:left;caption-side:bottom}th{text-align:inherit;text-align:-webkit-match-parent}label{display:inline-block;margin-bottom:.5rem}button{border-radius:0}button:focus{outline:1px dotted;outline:5px auto -webkit-focus-ring-color}button,input,optgroup,select,textarea{margin:0;font-family:inherit;font-size:inherit;line-height:inherit}button,input{overflow:visible}button,select{text-transform:none}[role=button]{cursor:pointer}select{word-wrap:normal}[type=button],[type=reset],[type=submit],button{-webkit-appearance:button}[type=button]:not(:disabled),[type=reset]:not(:disabled),[type=submit]:not(:disabled),button:not(:disabled){cursor:pointer}[type=button]::-moz-focus-inner,[type=reset]::-moz-focus-inner,[type=submit]::-moz-focus-inner,button::-moz-focus-inner{padding:0;border-style:none}input[type=checkbox],input[type=radio]{box-sizing:border-box;padding:0}textarea{overflow:auto;resize:vertical}fieldset{min-width:0;padding:0;margin:0;border:0}legend{display:block;width:100%;max-width:100%;padding:0;margin-bottom:.5rem;font-size:1.5rem;line-height:inherit;color:inherit;white-space:normal}progress{vertical-align:baseline}[type=number]::-webkit-inner-spin-button,[type=number]::-webkit-outer-spin-button{height:auto}[type=search]{outline-offset:-2px;-webkit-appearance:none}[type=search]::-webkit-search-decoration{-webkit-appearance:none}::-webkit-file-upload-button{font:inherit;-webkit-appearance:button}output{display:inline-block}summary{display:list-item;cursor:pointer}template{display:none}[hidden]{display:none!important} 8 | /*# sourceMappingURL=bootstrap-reboot.min.css.map */ -------------------------------------------------------------------------------- /Chapter11/Files/cdk/acme_pipeline_stack.py: -------------------------------------------------------------------------------- 1 | import aws_cdk as cdk 2 | import aws_cdk.aws_codecommit as codecommit 3 | import aws_cdk.aws_s3 as s3 4 | import aws_cdk.pipelines as pipelines 5 | import aws_cdk.aws_ssm as ssm 6 | import aws_cdk.aws_ecr as ecr 7 | import aws_cdk.aws_iam as iam 8 | from constructs import Construct 9 | 10 | 11 | from .stacks.ml_workflow_stack import MLWorkflowStack 12 | from .stacks.test_application_stack import TestApplicaitonStack 13 | from .stacks.production_application_stack import ProductionApplicaitonStack 14 | from .stacks.data_workflow_stack import DataWorkflowStack 15 | 16 | class MLWorkflowStage(cdk.Stage): 17 | 18 | def __init__(self, scope: Construct, id: str, *, group_name: str, threshold: float, data_bucket_name: str, feature_group_name: str, **kwargs): 19 | super().__init__(scope, id, **kwargs) 20 | ml_workflow_stack = MLWorkflowStack( 21 | self, 22 | "MLWorkflowStack", 23 | group_name=group_name, 24 | threshold=threshold, 25 | data_bucket_name=data_bucket_name, 26 | feature_group_name=feature_group_name 27 | ) 28 | self.sfn_arn = ml_workflow_stack.sfn_output 29 | 30 | class TestApplicationStage(cdk.Stage): 31 | 32 | def __init__(self, scope: Construct, id: str, *, model_name: str, **kwargs): 33 | super().__init__(scope, id, **kwargs) 34 | test_stack = TestApplicaitonStack(self, "TestApplicaitonStack", model_name=model_name) 35 | self.cdn_output = test_stack.cdn_output 36 | self.api_output = test_stack.api_output 37 | 38 | 39 | class ProductionApplicationStage(cdk.Stage): 40 | def __init__(self, scope: Construct, id: str, *, model_name: str, **kwargs): 41 | super().__init__(scope, id, **kwargs) 42 | production_stack = ProductionApplicaitonStack(self, "ProdApplicationStack", model_name=model_name) 43 | self.cdn_output = production_stack.cdn_output 44 | self.api_output = production_stack.api_output 45 | 46 | 47 | class DataWorkflowStage(cdk.Stage): 48 | def __init__(self, scope: Construct, id: str, *, airflow_environment_name: str, data_bucket_name: str, pipeline_name: str, **kwargs): 49 | super().__init__(scope, id, **kwargs) 50 | data_workflow_stack = DataWorkflowStack(self, "DataWorkflowStack", airflow_environment_name=airflow_environment_name, data_bucket_name=data_bucket_name, pipeline_name=pipeline_name) 51 | 52 | 53 | class PipelineStack(cdk.Stack): 54 | 55 | def __init__(self, scope: Construct, id: str, *, model_name: str=None, group_name: str=None, repo_name: str=None, feature_group: str=None, threshold: float=None, cdk_version: str=None, **kwargs) -> None: 56 | super().__init__(scope, id, **kwargs) 57 | 58 | self.code_repo = codecommit.Repository( 59 | self, 60 | "Source-Repository", 61 | repository_name=repo_name, 62 | description="ACME Web Application Source Code Repository" 63 | ) 64 | cdk.CfnOutput( 65 | self, 66 | "Clone-URL", 67 | description="CodeCommit Clone URL", 68 | value=self.code_repo.repository_clone_url_http 69 | ) 70 | 71 | self.data_bucket = s3.Bucket( 72 | self, 73 | "Data-Bucket", 74 | bucket_name=f"data-{self.region}-{self.account}", 75 | block_public_access=s3.BlockPublicAccess.BLOCK_ALL, 76 | auto_delete_objects=True, 77 | removal_policy=cdk.RemovalPolicy.DESTROY, 78 | versioned=True 79 | ) 80 | 81 | ssm.StringParameter( 82 | self, 83 | "Data-Bucket-Parameter", 84 | parameter_name="DataBucket", 85 | description="SSM Parameter for the S3 Data Bucket Name", 86 | string_value=self.data_bucket.bucket_name 87 | ) 88 | 89 | ssm.StringParameter( 90 | self, 91 | "Feature-Group-Parameter", 92 | parameter_name="FeatureGroup", 93 | description="SSM Paramater for the SageMaker Feature Store group", 94 | string_value=feature_group 95 | ) 96 | 97 | source_artifact = pipelines.CodePipelineSource.code_commit( 98 | repository=self.code_repo, 99 | branch="main" 100 | ) 101 | 102 | ml_workflow_stage = MLWorkflowStage( 103 | self, 104 | "Build-MLWorkflow", 105 | data_bucket_name=self.data_bucket.bucket_name, 106 | group_name=group_name, 107 | threshold=threshold, 108 | feature_group_name=feature_group 109 | ) 110 | 111 | test_stage = TestApplicationStage( 112 | self, 113 | "Test-Deployment", 114 | model_name=model_name 115 | ) 116 | 117 | prod_stage = ProductionApplicationStage( 118 | self, 119 | "Production-Deployment", 120 | model_name=model_name 121 | ) 122 | 123 | data_workflow_stage = DataWorkflowStage( 124 | self, 125 | "Build-DataWorkflow", 126 | airflow_environment_name="acme-airflow-environment", 127 | data_bucket_name=self.data_bucket.bucket_name, 128 | pipeline_name="ACME-WebApp-Pipeline" 129 | ) 130 | 131 | pipeline = pipelines.CodePipeline( 132 | self, 133 | "Application-Pipeline", 134 | pipeline_name="ACME-WebApp-Pipeline", 135 | self_mutation=True, 136 | cli_version=cdk_version, 137 | synth=pipelines.ShellStep( 138 | "Synth", 139 | input=source_artifact, 140 | commands=[ 141 | "printenv", 142 | f"npm install -g aws-cdk@{cdk_version}", 143 | "python -m pip install --upgrade pip", 144 | "pip install -r requirements.txt", 145 | "cdk synth" 146 | ] 147 | ) 148 | ) 149 | pipeline.add_stage( 150 | ml_workflow_stage, 151 | post=[ 152 | pipelines.CodeBuildStep( 153 | "Execute-MLWorkflow", 154 | input=source_artifact, 155 | commands=[ 156 | "python3 ./scripts/invoke.py" 157 | ], 158 | env_from_cfn_outputs={ 159 | "STATEMACHINE_ARN": ml_workflow_stage.sfn_arn 160 | }, 161 | env={ 162 | "MODEL_NAME": model_name, 163 | "PIPELINE_NAME": "ACME-WebApp-Pipeline", 164 | "STAGE_NAME": "Build-MLWorkflow", 165 | "ACTION_NAME": "Execute-MLWorkflow", 166 | "DATA_BUCKET": self.data_bucket.bucket_name 167 | }, 168 | role_policy_statements=[ 169 | iam.PolicyStatement( 170 | actions=[ 171 | "states:ListStateMachine", 172 | "states:DescribeStateMachine", 173 | "states:DescribeExecution", 174 | "states:ListExecutions", 175 | "states:GetExecutionHistory", 176 | "states:StartExecution", 177 | "states:StopExecution" 178 | ], 179 | effect=iam.Effect.ALLOW, 180 | resources=["*"] 181 | ) 182 | ] 183 | ) 184 | ] 185 | ) 186 | pipeline.add_stage( 187 | test_stage, 188 | post=[ 189 | pipelines.ShellStep( 190 | "System-Tests", 191 | input=source_artifact, 192 | commands=[ 193 | "pip install -r ./tests/requirements.txt", 194 | "pytest ./tests/system_tests.py" 195 | ], 196 | env_from_cfn_outputs={ 197 | "WEBSITE_URL": test_stage.cdn_output, 198 | "API_URL": test_stage.api_output 199 | } 200 | ) 201 | ] 202 | ) 203 | pipeline.add_stage(prod_stage) 204 | pipeline.add_stage(data_workflow_stage) 205 | -------------------------------------------------------------------------------- /Chapter11/Files/cdk/test_application_stack.py: -------------------------------------------------------------------------------- 1 | import os 2 | from time import strftime 3 | import aws_cdk as cdk 4 | import aws_cdk.aws_s3 as s3 5 | import aws_cdk.aws_cloudfront as cloudfront 6 | import aws_cdk.aws_iam as iam 7 | import aws_cdk.aws_s3_deployment as s3_deployment 8 | import aws_cdk.aws_lambda as lambda_ 9 | import aws_cdk.aws_apigatewayv2_alpha as httpgw 10 | import aws_cdk.aws_apigatewayv2_integrations_alpha as integrations 11 | import aws_cdk.aws_sagemaker as sagemaker 12 | import aws_cdk.custom_resources as cr 13 | from constructs import Construct 14 | 15 | class TestApplicaitonStack(cdk.Stack): 16 | 17 | def __init__(self, scope: Construct, id: str, *, model_name: str=None, **kwargs) -> None: 18 | super().__init__(scope, id, **kwargs) 19 | 20 | endpoint_name = f"{model_name}-test-endpoint" 21 | 22 | sagemaker_test_role = iam.Role( 23 | self, 24 | "SageMaker-TestRole", 25 | assumed_by=iam.CompositePrincipal( 26 | iam.ServicePrincipal("sagemaker.amazonaws.com") 27 | ), 28 | managed_policies=[ 29 | iam.ManagedPolicy.from_aws_managed_policy_name("AmazonSageMakerFullAccess"), 30 | iam.ManagedPolicy.from_aws_managed_policy_name("AmazonS3FullAccess") 31 | ] 32 | ) 33 | 34 | model = sagemaker.CfnModel( 35 | self, 36 | "Test-Model", 37 | execution_role_arn=sagemaker_test_role.role_arn, 38 | primary_container=sagemaker.CfnModel.ContainerDefinitionProperty( 39 | model_package_name=cr.AwsCustomResource( 40 | self, 41 | "Get-ModelPackage-Parameter", 42 | on_create=cr.AwsSdkCall( 43 | action="getParameter", 44 | service="SSM", 45 | parameters={ 46 | "Name": "ModelPackageName" 47 | }, 48 | physical_resource_id=cr.PhysicalResourceId.of(strftime("%Y%m%d%H%M%S")) 49 | ), 50 | on_update=cr.AwsSdkCall( 51 | action="getParameter", 52 | service="SSM", 53 | parameters={ 54 | "Name": "ModelPackageName" 55 | }, 56 | physical_resource_id=cr.PhysicalResourceId.of(strftime("%Y%m%d%H%M%S")) 57 | ), 58 | policy=cr.AwsCustomResourcePolicy.from_sdk_calls( 59 | resources=cr.AwsCustomResourcePolicy.ANY_RESOURCE 60 | ) 61 | ).get_response_field("Parameter.Value") 62 | ) 63 | ) 64 | 65 | endpoint_config = sagemaker.CfnEndpointConfig( 66 | self, 67 | "Test-EndpointConfig", 68 | production_variants=[ 69 | sagemaker.CfnEndpointConfig.ProductionVariantProperty( 70 | initial_instance_count=1, 71 | initial_variant_weight=1.0, 72 | instance_type="ml.t2.large", 73 | model_name=model.attr_model_name, 74 | variant_name="AllTraffic" 75 | ) 76 | ] 77 | ) 78 | 79 | endpoint = sagemaker.CfnEndpoint( 80 | self, 81 | "Test-Endpoint", 82 | endpoint_config_name=endpoint_config.attr_endpoint_config_name, 83 | endpoint_name=endpoint_name 84 | ) 85 | endpoint.add_depends_on(endpoint_config) 86 | 87 | static_bucket = s3.Bucket( 88 | self, 89 | "Static-Bucket", 90 | removal_policy=cdk.RemovalPolicy.DESTROY 91 | ) 92 | 93 | origin = cloudfront.OriginAccessIdentity( 94 | self, 95 | "Bucket-Origin", 96 | comment="Origin associated with ACME website static content Bucket" 97 | ) 98 | 99 | static_bucket.grant_read( 100 | iam.CanonicalUserPrincipal( 101 | origin.cloud_front_origin_access_identity_s3_canonical_user_id 102 | ) 103 | ) 104 | 105 | form_lambda = lambda_.DockerImageFunction( 106 | self, 107 | "Form-Lambda", 108 | code=lambda_.DockerImageCode.from_image_asset( 109 | os.path.join(os.path.dirname(__file__), 110 | "../../lambda/formHandler" 111 | ) 112 | ), 113 | environment={ 114 | "sagemakerEndpoint": endpoint.attr_endpoint_name 115 | }, 116 | memory_size=512, 117 | timeout=cdk.Duration.seconds(120) 118 | ) 119 | form_lambda.add_to_role_policy( 120 | iam.PolicyStatement( 121 | actions=[ 122 | "sagemaker:InvokeEndpoint" 123 | ], 124 | effect=iam.Effect.ALLOW, 125 | resources=["*"] 126 | ) 127 | ) 128 | 129 | api = httpgw.HttpApi( 130 | self, 131 | "Form-API", 132 | cors_preflight={ 133 | "allow_origins": ["*"], 134 | "allow_methods": [httpgw.HttpMethod.POST], 135 | "allow_headers": ["*"] 136 | } 137 | ) 138 | api.add_routes( 139 | path="/api/contact", 140 | methods=[httpgw.HttpMethod.POST], 141 | integration=integrations.HttpLambdaIntegration( 142 | "ContactForm-Integration", 143 | handler=form_lambda 144 | ) 145 | ) 146 | api.add_routes( 147 | path="/api/predict", 148 | methods=[httpgw.HttpMethod.POST], 149 | integration=integrations.HttpLambdaIntegration( 150 | "PredictForm-Integration", 151 | handler=form_lambda 152 | ) 153 | ) 154 | 155 | cdn = cloudfront.CloudFrontWebDistribution( 156 | self, 157 | "CloudFront-CDN", 158 | comment="CDN for the ACME website", 159 | origin_configs=[ 160 | cloudfront.SourceConfiguration( 161 | custom_origin_source=cloudfront.CustomOriginConfig( 162 | domain_name=f"{api.http_api_id}.execute-api.{cdk.Aws.REGION}.amazonaws.com" 163 | ), 164 | behaviors=[ 165 | cloudfront.Behavior( 166 | allowed_methods=cloudfront.CloudFrontAllowedMethods.ALL, 167 | default_ttl=cdk.Duration.seconds(0), 168 | forwarded_values={ 169 | "query_string": True, 170 | "headers": ["Authorization"] 171 | }, 172 | path_pattern="/api/*" 173 | ) 174 | ] 175 | ), 176 | cloudfront.SourceConfiguration( 177 | s3_origin_source=cloudfront.S3OriginConfig( 178 | s3_bucket_source=static_bucket, 179 | origin_access_identity=origin 180 | ), 181 | behaviors=[ 182 | cloudfront.Behavior( 183 | is_default_behavior=True, 184 | default_ttl=cdk.Duration.seconds(0), 185 | compress=True 186 | ) 187 | ] 188 | ) 189 | ], 190 | default_root_object="index.html", 191 | enable_ip_v6=True, 192 | http_version=cloudfront.HttpVersion.HTTP2, 193 | price_class=cloudfront.PriceClass.PRICE_CLASS_100, 194 | viewer_protocol_policy=cloudfront.ViewerProtocolPolicy.REDIRECT_TO_HTTPS 195 | ) 196 | 197 | s3_deployment.BucketDeployment( 198 | self, 199 | "Deploy-Website", 200 | sources=[ 201 | s3_deployment.Source.asset(os.path.join(os.path.dirname(__file__), "../../www")) 202 | ], 203 | destination_bucket=static_bucket, 204 | distribution=cdn, 205 | retain_on_delete=False 206 | ) 207 | 208 | self.cdn_output = cdk.CfnOutput( 209 | self, 210 | "CloudFront-URL", 211 | value=f"http://{cdn.distribution_domain_name}" 212 | ) 213 | 214 | self.api_output = cdk.CfnOutput( 215 | self, 216 | "Form-API-URL", 217 | value=api.url 218 | ) 219 | -------------------------------------------------------------------------------- /Chapter11/Files/lambda/createBaseline/index.py: -------------------------------------------------------------------------------- 1 | import io 2 | import json 3 | import os 4 | import logging 5 | import boto3 6 | from botocore.exceptions import ClientError 7 | from urllib.parse import urlparse 8 | from datetime import datetime 9 | 10 | 11 | s3 = boto3.resource("s3") 12 | sm = boto3.client("sagemaker") 13 | logger = logging.getLogger() 14 | logger.setLevel(logging.INFO) 15 | image_map = { 16 | "us-east-1": "156813124566.dkr.ecr.us-east-1.amazonaws.com/sagemaker-model-monitor-analyzer", 17 | "us-east-2": "777275614652.dkr.ecr.us-east-2.amazonaws.com/sagemaker-model-monitor-analyzer", 18 | "us-west-1": "890145073186.dkr.ecr.us-west-1.amazonaws.com/sagemaker-model-monitor-analyzer", 19 | "us-west-2": "159807026194.dkr.ecr.us-west-2.amazonaws.com/sagemaker-model-monitor-analyzer", 20 | "af-south-1": "875698925577.dkr.ecr.af-south-1.amazonaws.com/sagemaker-model-monitor-analyzer", 21 | "ap-east-1": "001633400207.dkr.ecr.ap-east-1.amazonaws.com/sagemaker-model-monitor-analyzer", 22 | "ap-northeast-1": "574779866223.dkr.ecr.ap-northeast-1.amazonaws.com/sagemaker-model-monitor-analyzer", 23 | "ap-northeast-2": "709848358524.dkr.ecr.ap-northeast-2.amazonaws.com/sagemaker-model-monitor-analyzer", 24 | "ap-south-1": "126357580389.dkr.ecr.ap-south-1.amazonaws.com/sagemaker-model-monitor-analyzer", 25 | "ap-southeast-1": "245545462676.dkr.ecr.ap-southeast-1.amazonaws.com/sagemaker-model-monitor-analyzer", 26 | "ap-southeast-2": "563025443158.dkr.ecr.ap-southeast-2.amazonaws.com/sagemaker-model-monitor-analyzer", 27 | "ca-central-1": "536280801234.dkr.ecr.ca-central-1.amazonaws.com/sagemaker-model-monitor-analyzer", 28 | "cn-north-1": "453000072557.dkr.ecr.cn-north-1.amazonaws.com/sagemaker-model-monitor-analyzer", 29 | "cn-northwest-1": "453252182341.dkr.ecr.cn-northwest-1.amazonaws.com/sagemaker-model-monitor-analyzer", 30 | "eu-central-1": "048819808253.dkr.ecr.eu-central-1.amazonaws.com/sagemaker-model-monitor-analyzer", 31 | "eu-north-1": "895015795356.dkr.ecr.eu-north-1.amazonaws.com/sagemaker-model-monitor-analyzer", 32 | "eu-south-1": "933208885752.dkr.ecr.eu-south-1.amazonaws.com/sagemaker-model-monitor-analyzer", 33 | "eu-west-1": "468650794304.dkr.ecr.eu-west-1.amazonaws.com/sagemaker-model-monitor-analyzer", 34 | "eu-west-2": "749857270468.dkr.ecr.eu-west-2.amazonaws.com/sagemaker-model-monitor-analyzer", 35 | "eu-west-3": "680080141114.dkr.ecr.eu-west-3.amazonaws.com/sagemaker-model-monitor-analyzer", 36 | "me-south-1": "607024016150.dkr.ecr.me-south-1.amazonaws.com/sagemaker-model-monitor-analyzer", 37 | "sa-east-1": "539772159869.dkr.ecr.sa-east-1.amazonaws.com/sagemaker-model-monitor-analyzer" 38 | } 39 | 40 | 41 | def lambda_handler(event, context): 42 | logger.info("Received Event: {}".format(json.dumps(event, indent=2))) 43 | props = event["ResourceProperties"] 44 | source_bucket = urlparse(props["BaselineSourceUri"]).netloc 45 | source_key = urlparse(props["BaselineSourceUri"]).path.lstrip("/") 46 | logs_bucket = props["LogsBucketName"] 47 | 48 | if event["RequestType"] != "Delete": 49 | logger.info(f"Copying data from {source_bucket} to {logs_bucket}.") 50 | try: 51 | s3.meta.client.copy({"Bucket": source_bucket, "Key": source_key}, logs_bucket, "baselining/data/baseline.csv") 52 | except ClientError as e: 53 | error_message = e.response["Error"]["Message"] 54 | logger.error(error_message) 55 | raise Exception(error_message) 56 | 57 | request = { 58 | 'ProcessingJobName': f'abalone-baseline-{datetime.utcnow():%Y-%m-%d-%H%M}', 59 | 'Environment': { 60 | 'analysis_type': 'MODEL_QUALITY', 61 | 'dataset_format': '{"csv": {"header": true, "output_columns_position": "START"}}', 62 | 'dataset_source': '/opt/ml/processing/input/baseline_dataset_input', 63 | 'ground_truth_attribute': 'label', 64 | 'inference_attribute': 'prediction', 65 | 'output_path': '/opt/ml/processing/output', 66 | 'problem_type': 'Regression', 67 | 'publish_cloudwatch_metrics': 'Disabled' 68 | }, 69 | 'AppSpecification': { 70 | 'ImageUri': image_map[os.environ['AWS_DEFAULT_REGION']] 71 | }, 72 | 'ProcessingInputs': [ 73 | { 74 | 'InputName': 'baseline_dataset_input', 75 | 'AppManaged': False, 76 | 'S3Input': { 77 | 'LocalPath': '/opt/ml/processing/input/baseline_dataset_input', 78 | 'S3Uri': f's3://{logs_bucket}/baselining/data/baseline.csv', 79 | 'S3DataDistributionType': 'FullyReplicated', 80 | 'S3DataType': 'S3Prefix', 81 | 'S3InputMode': 'File', 82 | 'S3CompressionType': 'None' 83 | } 84 | } 85 | ], 86 | 'ProcessingOutputConfig': { 87 | 'Outputs': [ 88 | { 89 | 'OutputName': 'monitoring_output', 90 | 'AppManaged': False, 91 | 'S3Output': { 92 | 'LocalPath': '/opt/ml/processing/output', 93 | 'S3Uri': f's3://{logs_bucket}/baselining/results', 94 | 'S3UploadMode': 'EndOfJob' 95 | } 96 | } 97 | ] 98 | }, 99 | 'ProcessingResources': { 100 | 'ClusterConfig': { 101 | 'InstanceCount': 1, 102 | 'InstanceType': 'ml.m5.xlarge', 103 | 'VolumeSizeInGB': 20 104 | } 105 | }, 106 | 'RoleArn': props['RoleArn'], 107 | 'StoppingCondition': { 108 | 'MaxRuntimeInSeconds': 1800 109 | } 110 | } 111 | 112 | 113 | logger.info(f'Creating Basline Suggestion Job: {request["ProcessingJobName"]}') 114 | try: 115 | response = sm.create_processing_job(**request) 116 | return { 117 | "PhysicalResourceId": response["ProcessingJobArn"], 118 | "Data": { 119 | "ProcessingJobName": request["ProcessingJobName"], 120 | "BaselineResultsUri": f"s3://{logs_bucket}/baselining/results" 121 | } 122 | } 123 | except ClientError as e: 124 | error_message = e.response["Error"]["Message"] 125 | logger.error(error_message) 126 | raise Exception(error_message) 127 | -------------------------------------------------------------------------------- /Chapter11/Files/lambda/formHandler/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM public.ecr.aws/lambda/python:3.8 2 | COPY index.py requirements.txt ./ 3 | RUN pip3 install -r requirements.txt 4 | CMD ["index.lambda_handler"] -------------------------------------------------------------------------------- /Chapter11/Files/lambda/formHandler/index.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import json 4 | import boto3 5 | from botocore.exceptions import ClientError 6 | from http import HTTPStatus 7 | import numpy as np 8 | import pandas as pd 9 | from sklearn.preprocessing import normalize 10 | 11 | sm = boto3.client("sagemaker-runtime") 12 | logger = logging.getLogger() 13 | logger.setLevel(logging.INFO) 14 | 15 | def lambda_handler(request, context): 16 | logger.info(f"Processing HTTP API Request: {json.dumps(request, indent=2)}") 17 | if request["requestContext"]["http"]["method"] == "POST": 18 | response_code, response_body = handle_request(request) 19 | return generate_response(request, response_body, response_code) 20 | else: 21 | logger.info("Request is not using POST method") 22 | return generate_response(request, json.dumps({"message:" "Unsupported method."}), HTTPStatus.BAD_REQUEST) 23 | 24 | 25 | def generate_response(request, response_body, response_code): 26 | logger.info("Generating response:") 27 | response = { 28 | "body": response_body, 29 | "isBase64Encoded": request["isBase64Encoded"], 30 | "headers": request["headers"], 31 | "statusCode": response_code 32 | } 33 | logger.info(json.dumps(response, indent=2)) 34 | return response 35 | 36 | 37 | def handle_request(request): 38 | if request["rawPath"] == "/api/contact": 39 | logger.info("Processing Contact Form request.") 40 | return handle_contact(request) 41 | elif request["rawPath"] == "/api/predict": 42 | logger.info("Processing Prediction Form request.") 43 | return handle_predict(request) 44 | else: 45 | logger.info("Request outside of scope.") 46 | return HTTPStatus.BAD_REQUEST, json.dumps({"message": "Unsupported path."}) 47 | 48 | 49 | def handle_contact(request): 50 | email = json.loads(request["body"])["email"] 51 | return HTTPStatus.OK, json.dumps( 52 | { 53 | "message": f"Thank you! We\'ve received your message from {email} and, we will respond shortly." 54 | } 55 | ) 56 | 57 | 58 | def handle_predict(request): 59 | df = pd.json_normalize(json.loads(request["body"])) 60 | logger.info(f"Received Request Body: {df}") 61 | s_pre = df["sex"][0] 62 | s_post = handle_encoding(s_pre) 63 | x = df.drop(columns=["sex"], axis=1) 64 | x_pre = x.to_numpy() 65 | x_post = normalize(x_pre).tolist()[0] 66 | payload = ",".join(map(str, x_post+s_post)) 67 | logger.info(f"SageMaker Request Payload: {payload}") 68 | try: 69 | if ("inference-id" in request["headers"]): 70 | inference_id = request["headers"]["inference-id"] 71 | logger.info(f"Invoking SageMaker Endpoint with Ground Truth Inference ID: {inference_id}") 72 | response = sm.invoke_endpoint( 73 | EndpointName=os.environ["sagemakerEndpoint"], 74 | ContentType="text/csv", 75 | Body=payload, 76 | InferenceId=inference_id 77 | ) 78 | else: 79 | logger.info("Invoking SageMaker Enspoint with no Ground Truth Inference ID") 80 | response = sm.invoke_endpoint( 81 | EndpointName=os.environ["sagemakerEndpoint"], 82 | ContentType="text/csv", 83 | Body=payload 84 | ) 85 | logger.debug(f"Sagemaker Response: {response}") 86 | prediction = response["Body"].read().decode("utf-8").split(".")[0] 87 | logger.info(f"SageMaker Endpoint Prediction: {prediction}") 88 | logger.debug(type(prediction)) 89 | rings = round(int(prediction)) 90 | age = rings + 1.5 91 | return HTTPStatus.OK, json.dumps( 92 | { 93 | "message": f"We\'ve calcuated that the Abalone has {rings} rings, and is therefore approximately {age} years old." 94 | } 95 | ) 96 | 97 | except ClientError as e: 98 | error_message = e.response["Error"]["Message"] 99 | logger.error(error_message) 100 | return HTTPStatus.OK, json.dumps( 101 | { 102 | "message": "Age Calculator Unavailable! Please try again later." 103 | } 104 | ) 105 | 106 | 107 | def handle_encoding(sex): 108 | if sex == "M" or sex == "m": 109 | return [0., 0., 1.0] 110 | elif sex == "F" or sex == "f": 111 | return [1.0, 0., 0.] 112 | elif sex == "I" or sex == "i": 113 | return [0., 1.0, 0.] 114 | -------------------------------------------------------------------------------- /Chapter11/Files/lambda/formHandler/requirements.txt: -------------------------------------------------------------------------------- 1 | scikit-learn==0.23.2 2 | pandas==1.1.4 3 | numpy==1.20.2 4 | boto3==1.17.58 -------------------------------------------------------------------------------- /Chapter11/Files/scripts/invoke.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import os 3 | import json 4 | import time 5 | import sys 6 | import logging 7 | 8 | sfn = boto3.client("stepfunctions") 9 | logger = logging.getLogger() 10 | log_format = "%(levelname)s: [%(filename)s:%(lineno)s] %(message)s" 11 | logging.basicConfig(format=log_format, level=os.environ.get("LOGLEVEL", "INFO").upper()) 12 | logger.info(f'Invoking ML Workflow: {os.environ["STATEMACHINE_ARN"]}') 13 | execution_arn = sfn.start_execution( 14 | stateMachineArn=os.environ['STATEMACHINE_ARN'], 15 | input=json.dumps( 16 | { 17 | "input": { 18 | "model_name": os.environ["MODEL_NAME"], 19 | "pipeline_name": os.environ["PIPELINE_NAME"], 20 | "stage_name": os.environ["STAGE_NAME"], 21 | "action_name": os.environ["ACTION_NAME"], 22 | "data_bucket": os.environ["DATA_BUCKET"] 23 | } 24 | } 25 | ) 26 | )["executionArn"] 27 | status = sfn.describe_execution(executionArn=execution_arn)["status"] 28 | while status == "RUNNING": 29 | time.sleep(60) 30 | logger.info("ML Workflow Status: {}".format(status)) 31 | status = sfn.describe_execution(executionArn=execution_arn)["status"] 32 | if status == "SUCCEEDED": 33 | logger.info("ML Workflow Exection: {}".format(status)) 34 | sys.exit(0) 35 | else: 36 | error_message = "ML Workflow execution: {}".format(status) 37 | logger.error(error_message) 38 | sys.exit(255) 39 | -------------------------------------------------------------------------------- /Chapter11/Files/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Automated-Machine-Learning-on-AWS/fa106b966823211a4588286656b6cf01cc6c14b2/Chapter11/Files/tests/__init__.py -------------------------------------------------------------------------------- /Chapter11/Files/tests/requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | pytest -------------------------------------------------------------------------------- /Chapter11/Files/tests/system_tests.py: -------------------------------------------------------------------------------- 1 | import os 2 | import requests 3 | import json 4 | 5 | test_data = { 6 | 'length': '0.455', 7 | 'diameter': '0.365', 8 | 'height': '0.095', 9 | 'whole_weight': '0.514', 10 | 'shucked_weight': '0.2245', 11 | 'viscera_weight': '0.101', 12 | 'shell_weight': '0.15', 13 | 'sex': 'M' 14 | } 15 | 16 | 17 | def test_website(): 18 | with requests.get(os.environ["WEBSITE_URL"]) as response: 19 | assert response.status_code == 200 20 | assert response.headers["Content-Type"] == "text/html" 21 | 22 | def test_prediction(): 23 | with requests.post(os.environ["API_URL"]+"api/predict", json=test_data) as response: 24 | assert response.status_code == 200 25 | assert response.headers["Content-Type"] == "application/json" 26 | assert "We've calcuated that the Abalone has" in json.loads(response.content)["message"] 27 | 28 | def test_errors(): 29 | with requests.get(os.environ["API_URL"]+"api/predict") as response: 30 | assert response.status_code == 404 31 | assert json.loads(response.content)["message"] == "Not Found" 32 | with requests.post(os.environ["API_URL"]+"api/predict") as response: 33 | assert response.status_code == 500 34 | -------------------------------------------------------------------------------- /Chapter11/Notebook/Simulating New Abalone Survey Data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Creating new `abalone` data using `CTGAN`\n", 8 | ">__NOTE:__ Recommend using the _Python 3 (Data Science)_ kernel, using an _ml.m5.4xlarge (16vCPU + 64MB)_ Instance Type. However, this will incur additional AWS usage costs." 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "## Install `ctgan`" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "%%capture\n", 25 | "!pip install ctgan \"s3fs<=0.4\"" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "## Load the Required Libraries" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "import io\n", 42 | "import boto3\n", 43 | "import warnings\n", 44 | "import pandas as pd\n", 45 | "from time import gmtime, strftime\n", 46 | "\n", 47 | "warnings.filterwarnings(\"ignore\")\n", 48 | "s3 = boto3.client(\"s3\")\n", 49 | "model_name = \"abalone\"\n", 50 | "column_names = [\n", 51 | " \"sex\",\n", 52 | " \"length\",\n", 53 | " \"diameter\",\n", 54 | " \"height\",\n", 55 | " \"whole_weight\",\n", 56 | " \"shucked_weight\",\n", 57 | " \"viscera_weight\",\n", 58 | " \"shell_weight\",\n", 59 | " \"rings\"\n", 60 | "]" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "## Load the \"raw\" data" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "column_names = [\"sex\", \"length\", \"diameter\", \"height\", \"whole_weight\", \"shucked_weight\", \"viscera_weight\", \"shell_weight\", \"rings\"]\n", 77 | "abalone_data = pd.read_csv(\"http://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data\", names=column_names)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "## Fit the CTGAN Model on the `sex` target label\n", 85 | "\n", 86 | ">__NOTE:__ Fitting the `ctgan` model can up to 5 minutes, depending on the Kernel compute resources." 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "from ctgan import CTGAN\n", 96 | "\n", 97 | "ctgan = CTGAN()\n", 98 | "ctgan.fit(abalone_data, [\"sex\"])" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "## Generate `100` samples of \"new\" data\n", 106 | ">__NOTE:__ `100` new samples are used to realistially simulate the potential amount of new daily survey data" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "samples = ctgan.sample(100)" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "## Compare Datasets\n", 123 | "### `raw` dataset" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "abalone_data.describe()" 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": {}, 138 | "source": [ 139 | "### `new` dataset" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "samples.describe()" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "## Upload the new data to test the Airflow DAG" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [ 164 | "region_name = boto3.session.Session().region_name\n", 165 | "data_bucket = f\"\"\"{boto3.client(\"ssm\", region_name=region_name).get_parameter(Name=\"DataBucket\")[\"Parameter\"][\"Value\"]}\"\"\"\n", 166 | "new_data_key = f\"{model_name}_data/abalone.new\"\n", 167 | "samples.to_csv(f\"s3://{data_bucket}/{new_data_key}\", header=False, index=False)" 168 | ] 169 | } 170 | ], 171 | "metadata": { 172 | "instance_type": "ml.m5.4xlarge", 173 | "kernelspec": { 174 | "display_name": "Python 3 (Data Science)", 175 | "language": "python", 176 | "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/datascience-1.0" 177 | }, 178 | "language_info": { 179 | "codemirror_mode": { 180 | "name": "ipython", 181 | "version": 3 182 | }, 183 | "file_extension": ".py", 184 | "mimetype": "text/x-python", 185 | "name": "python", 186 | "nbconvert_exporter": "python", 187 | "pygments_lexer": "ipython3", 188 | "version": "3.7.10" 189 | } 190 | }, 191 | "nbformat": 4, 192 | "nbformat_minor": 4 193 | } 194 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Packt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | ### [Packt Conference : Put Generative AI to work on Oct 11-13 (Virtual)](https://packt.link/JGIEY) 3 | 4 |

[![Packt Conference](https://hub.packtpub.com/wp-content/uploads/2023/08/put-generative-ai-to-work-packt.png)](https://packt.link/JGIEY)

5 | 3 Days, 20+ AI Experts, 25+ Workshops and Power Talks 6 | 7 | Code: USD75OFF 8 | 9 | 10 | 11 | 12 | # Automated Machine Learning on AWS 13 | 14 | Automated Machine Learning on AWS 15 | 16 | This is the code repository for [Automated Machine Learning on AWS](https://www.packtpub.com/product/automated-machine-learning-on-aws/9781801811828?utm_source=github&utm_medium=repository&utm_campaign=9781801811828), published by Packt. 17 | 18 | **Fast-track the development of your production-ready machine learning applications the AWS way** 19 | 20 | ## What is this book about? 21 | AWS provides a wide range of solutions to help automate a machine learning workflow with just a few lines of code. With this practical book, you'll learn how to automate a machine learning pipeline using the various AWS services. 22 | Automated Machine Learning on AWS begins with a quick overview of what the machine learning pipeline/process looks like and highlights the typical challenges that you may face when building a pipeline. 23 | 24 | This book covers the following exciting features: 25 | * Employ SageMaker Autopilot and Amazon SageMaker SDK to automate the machine learning process 26 | * Understand how to use AutoGluon to automate complicated model building tasks 27 | * Use the AWS CDK to codify the machine learning process 28 | * Create, deploy, and rebuild a CI/CD pipeline on AWS 29 | * Build an ML workflow using AWS Step Functions and the Data Science SDK 30 | * Leverage the Amazon SageMaker Feature Store to automate the machine learning software development life cycle (MLSDLC) 31 | * Discover how to use Amazon MWAA for a data-centric ML process 32 | 33 | For supplemental content that covers Generative AI on AWS, as well as updates to AWS capabilities, such as SageMaker Pipelines, and advanced features for production ML model monitoring, take a look at [www.automatedmlonaws.com](https://www.automatedmlonaws.com/). 34 | 35 | If you feel this book is for you, get your [copy](https://www.amazon.com/dp/1801811822) today! 36 | 37 | https://www.packtpub.com/ 39 | 40 | 41 | ## Instructions and Navigations 42 | All of the code is organized into folders. 43 | 44 | The code will look like the following: 45 | ``` 46 | import boto3 47 | import sagemaker 48 | aws_region = sagemaker.Session().boto_session.region_name 49 | !sm-docker build --build-arg REGION={aws_region} 50 | ``` 51 | 52 | **Following is what you need for this book:** 53 | This book is for the novice as well as experienced machine learning practitioners looking to automate the process of building, training, and deploying machine learning-based solutions into production, using both purpose-built and other AWS services. 54 | A basic understanding of the end-to-end machine learning process and concepts, Python programming, and AWS is necessary to make the most out of this book. . 55 | 56 | With the following software and hardware list you can run all code files present in the book (Chapter 1-11). 57 | 58 | ### Software and Hardware List 59 | 60 | 61 | | Chapter | Software required | OS required | 62 | | -------- | ------------------------------------ | -----------------------------------| 63 | | 1-11 | Python 3.7.10 (and above) | Windows, Mac OS X, and Linux (Any) | 64 | | 1-11 | AWS CLI 1.19.112 (and above) | Windows, Mac OS X, and Linux (Any) | 65 | | 1-11 | AWS CDK 2.3.0 (build beaa5b2) | Windows, Mac OS X, and Linux (Any) | 66 | 67 | It is recommended that you use an AWS Cloud9 integrated development environment as 68 | it meets the software/hardware and operating system requirements 69 | 70 | We also provide a PDF file that has color images of the screenshots/diagrams used in this book. [Click here to download it](https://static.packt-cdn.com/downloads/9781801811828_ColorImages.pdf). 71 | 72 | 73 | ### Related products 74 | * Automated Machine Learning with Microsoft Azure [[Packt]](https://www.packtpub.com/product/automated-machine-learning-with-microsoft-azure/9781800565319?utm_source=github&utm_medium=repository&utm_campaign=9781800565319) [[Amazon]](https://www.amazon.com/dp/B08VJKVS4B) 75 | 76 | * Learn Amazon SageMaker - Second Edition [[Packt]](https://www.packtpub.com/product/learn-amazon-sagemaker-second-edition/9781801817950?utm_source=github&utm_medium=repository&utm_campaign=9781801817950) [[Amazon]](https://www.amazon.com/dp/B09CQ6MSRY) 77 | 78 | ## Get to Know the Author 79 | **Trenton Potgieter** 80 | is a senior AI/ML specialist at AWS and has been working in the field 81 | of ML since 2011. At AWS, he assists multiple AWS customers to create ML solutions 82 | and has contributed to various use cases, broadly spanning computer vision, knowledge 83 | graphs, and ML automation using MLOps methodologies. Trenton plays a key role in 84 | evangelizing the AWS ML services and shares best practices through forums such as 85 | AWS blogs, whitepapers, reference architectures, and public-speaking events. He has 86 | also actively been involved in leading, developing, and supporting an internal AWS 87 | community of MLOps-related subject matter experts. 88 | ### Download a free PDF 89 | 90 | If you have already purchased a print or Kindle version of this book, you can get a DRM-free PDF version at no cost.
Simply click on the link to claim your free PDF.
91 |

https://packt.link/free-ebook/9781801811828

92 | --------------------------------------------------------------------------------