├── .gitignore
├── Chapter01
    └── ML Process Example.ipynb
├── Chapter02
    ├── Autopilot Example.ipynb
    └── abalone_with_headers.csv
├── Chapter03
    ├── Image
    │   └── AutoGluon Image Example.ipynb
    ├── Tabular
    │   └── AutoGluon Tabular Example.ipynb
    └── policy.json
├── Chapter04
    ├── cdk
    │   └── abalone_endpoint_stack.py
    └── scripts
    │   ├── build.py
    │   └── deploy.py
├── Chapter05
    ├── Notebook
    │   └── Abalone CICD Example.ipynb
    └── cdk
    │   ├── abalone_cicd_pipeline_stack.py
    │   └── app.py
├── Chapter06
    ├── cdk
    │   ├── abalone_cicd_pipeline_stack.py
    │   ├── abalone_endpoint_stack.py
    │   └── app.py
    └── scripts
    │   └── deploy.py
├── Chapter07
    ├── Files
    │   └── buildspec.yml
    └── Notebook
    │   └── Abalone Step Functions Workflow Example.ipynb
├── Chapter08
    ├── airflow
    │   ├── dags
    │   │   └── .airflowignore
    │   ├── rerquirements.txt
    │   └── scripts
    │   │   └── evaluate.py
    ├── cdk
    │   ├── abalone_data_pipeline_stack.py
    │   └── app.py
    └── lambda
    │   └── analyze_results
    │       └── index.py
├── Chapter09
    ├── Files
    │   └── airflow
    │   │   ├── dags
    │   │       ├── .airflowignore
    │   │       ├── abalone_data_pipeline.py
    │   │       └── model
    │   │       │   └── model_training.py
    │   │   └── scripts
    │   │       └── preprocess.py
    └── Notebook
    │   └── Simulating New Abalone Survey Data.ipynb
├── Chapter10
    ├── Files
    │   ├── airflow
    │   │   ├── dags
    │   │   │   ├── .airflowignore
    │   │   │   └── continuous_training_pipeline.py
    │   │   └── requirements.txt
    │   ├── cdk
    │   │   ├── acme_pipeline_stack.py
    │   │   ├── app.py
    │   │   ├── cdk.json
    │   │   ├── data_workflow_stack.py
    │   │   ├── ml_workflow_stack.py
    │   │   └── requirements.txt
    │   ├── lambda
    │   │   ├── createExperiment
    │   │   │   └── index.py
    │   │   ├── evaluateResults
    │   │   │   └── index.py
    │   │   ├── registerModel
    │   │   │   └── index.py
    │   │   ├── registryCreator
    │   │   │   └── index.py
    │   │   └── releaseChange
    │   │   │   └── index.py
    │   └── scripts
    │   │   ├── evaluation.py
    │   │   └── preprocessing.py
    ├── Notebooks
    │   ├── ACME Model Artifacts Example.ipynb
    │   └── SageMaker Feature Store Example.ipynb
    └── www
    │   ├── 404.html
    │   ├── css
    │       ├── error-page.css
    │       └── main-page.css
    │   ├── img
    │       ├── team-work.jpeg
    │       ├── undersea-abalone.jpg
    │       └── video-monitoring.jpeg
    │   ├── index.html
    │   ├── robots.txt
    │   ├── scss
    │       ├── _bootstrap-overrides.scss
    │       ├── _global.scss
    │       ├── _masthead.scss
    │       ├── _mixins.scss
    │       ├── _navbar.scss
    │       ├── _variables.scss
    │       └── one-page-wonder.scss
    │   └── vendor
    │       ├── bootstrap
    │           ├── css
    │           │   ├── bootstrap-grid.css
    │           │   ├── bootstrap-grid.css.map
    │           │   ├── bootstrap-grid.min.css
    │           │   ├── bootstrap-grid.min.css.map
    │           │   ├── bootstrap-reboot.css
    │           │   ├── bootstrap-reboot.css.map
    │           │   ├── bootstrap-reboot.min.css
    │           │   ├── bootstrap-reboot.min.css.map
    │           │   ├── bootstrap.css
    │           │   ├── bootstrap.css.map
    │           │   ├── bootstrap.min.css
    │           │   └── bootstrap.min.css.map
    │           └── js
    │           │   ├── bootstrap.bundle.js
    │           │   ├── bootstrap.bundle.js.map
    │           │   ├── bootstrap.bundle.min.js
    │           │   ├── bootstrap.bundle.min.js.map
    │           │   ├── bootstrap.js
    │           │   ├── bootstrap.js.map
    │           │   ├── bootstrap.min.js
    │           │   └── bootstrap.min.js.map
    │       └── jquery
    │           ├── jquery.js
    │           ├── jquery.min.js
    │           ├── jquery.min.map
    │           ├── jquery.slim.js
    │           ├── jquery.slim.min.js
    │           └── jquery.slim.min.map
├── Chapter11
    ├── Files
    │   ├── cdk
    │   │   ├── acme_pipeline_stack.py
    │   │   ├── production_application_stack.py
    │   │   └── test_application_stack.py
    │   ├── lambda
    │   │   ├── createBaseline
    │   │   │   └── index.py
    │   │   └── formHandler
    │   │   │   ├── Dockerfile
    │   │   │   ├── index.py
    │   │   │   └── requirements.txt
    │   ├── scripts
    │   │   └── invoke.py
    │   └── tests
    │   │   ├── __init__.py
    │   │   ├── requirements.txt
    │   │   └── system_tests.py
    └── Notebook
    │   └── Simulating New Abalone Survey Data.ipynb
├── LICENSE
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints
2 | **/.DS_Store
3 | .venv


--------------------------------------------------------------------------------
/Chapter01/ML Process Example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Business Case: Predicting Abalone Age\n",
  8 |     "\n",
  9 |     ">__NOTE:__ This Jupyter Notebook uses a Python3.6 kernel."
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": null,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import sys\n",
 19 |     "print(f\"Python Version: {sys.version}.\")"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": null,
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "%%capture\n",
 29 |     "!{sys.executable} -m pip install -U pip matplotlib numpy pandas scikit-learn tensorflow"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": null,
 35 |    "metadata": {},
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "import warnings\n",
 39 |     "import matplotlib.pyplot as plt\n",
 40 |     "import numpy as np\n",
 41 |     "import pandas as pd\n",
 42 |     "\n",
 43 |     "from sklearn import preprocessing\n",
 44 |     "from sklearn.model_selection import train_test_split\n",
 45 |     "from sklearn.metrics import mean_squared_error\n",
 46 |     "\n",
 47 |     "import tensorflow as tf\n",
 48 |     "from tensorflow import keras\n",
 49 |     "from tensorflow.keras.models import Sequential\n",
 50 |     "from tensorflow.keras.layers import Dense\n",
 51 |     "\n",
 52 |     "class cleanPrint(keras.callbacks.Callback):\n",
 53 |     "    def on_epoch_end(self, epoch, logs):\n",
 54 |     "        if epoch+1 % 100 == 0:\n",
 55 |     "            print(\"!\")\n",
 56 |     "        else:\n",
 57 |     "            print(\"-\", end=\"\")\n",
 58 |     "\n",
 59 |     "%matplotlib inline\n",
 60 |     "warnings.filterwarnings(\"ignore\")"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {},
 66 |    "source": [
 67 |     "---\n",
 68 |     "## Exploratory Data Analysis: Abalone Dataset"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": null,
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "column_names = [\"sex\", \"length\", \"diameter\", \"height\", \"whole_weight\", \"shucked_weight\", \"viscera_weight\", \"shell_weight\", \"rings\"]\n",
 78 |     "abalone_data = pd.read_csv(\"http://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data\", names=column_names)\n",
 79 |     "abalone_data.head()"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": null,
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "abalone_data.describe()"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "---\n",
 96 |     "\n",
 97 |     "## Data Preparation"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "metadata": {},
104 |    "outputs": [],
105 |    "source": [
106 |     "data = abalone_data[[\"rings\", \"sex\", \"length\", \"diameter\", \"height\", \"whole_weight\", \"shucked_weight\", \"viscera_weight\", \"shell_weight\"]]\n",
107 |     "data = pd.get_dummies(data)\n",
108 |     "y = data.rings.values\n",
109 |     "del data[\"rings\"]\n",
110 |     "X = data.values.astype(np.float)\n",
111 |     "X = preprocessing.normalize(X)\n",
112 |     "training_features, testing_features, training_labels, testing_labels = train_test_split(X, y, test_size=0.2, random_state=42)"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "markdown",
117 |    "metadata": {},
118 |    "source": [
119 |     "---\n",
120 |     "## Model Training"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": null,
126 |    "metadata": {},
127 |    "outputs": [],
128 |    "source": [
129 |     "network_layers = [\n",
130 |     "    Dense(256, activation='relu', kernel_initializer=\"normal\", input_dim=10),\n",
131 |     "    Dense(128, activation='relu'),\n",
132 |     "    Dense(64, activation='relu'),\n",
133 |     "    Dense(32, activation='relu'),\n",
134 |     "    Dense(1, activation='linear')\n",
135 |     "]\n",
136 |     "\n",
137 |     "model = Sequential(network_layers)\n",
138 |     "model.compile(optimizer=\"adam\", loss=\"mse\", metrics=[\"mae\", \"accuracy\"])\n",
139 |     "model.summary()        "
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": null,
145 |    "metadata": {},
146 |    "outputs": [],
147 |    "source": [
148 |     "training_results = model.fit(training_features, training_labels, validation_data=(testing_features, testing_labels), batch_size=32, epochs=2000, shuffle=True, verbose=0, callbacks=[cleanPrint()])"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "markdown",
153 |    "metadata": {},
154 |    "source": [
155 |     "---\n",
156 |     "## Model Evaluation (Before Optimization)\n",
157 |     "\n",
158 |     "### Plot Model Evaluaiton Metrics (RMSE)"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": null,
164 |    "metadata": {},
165 |    "outputs": [],
166 |    "source": [
167 |     "fig, ax = plt.subplots(figsize=(15, 10))\n",
168 |     "ax.plot(testing_labels, model.predict(testing_features), \"ob\")\n",
169 |     "ax.plot([0, 25], [0, 25], \"-r\")\n",
170 |     "ax.text(8, 1, f\"RMSE = {mean_squared_error(testing_labels, model.predict(testing_features), squared=False)}\", color=\"r\", fontsize=14, weight=\"bold\")\n",
171 |     "plt.title(\"Abalone Model Evaluation\", fontweight=\"bold\", fontsize=12)\n",
172 |     "plt.xlabel(\"Actual 'Rings'\", fontweight=\"bold\", fontsize=12)\n",
173 |     "plt.ylabel(\"Predicted 'Rings'\", fontweight=\"bold\", fontsize=12)\n",
174 |     "plt.legend([\"Predictions\", \"Regression Line\"], loc=\"upper left\", prop={\"weight\": \"bold\"})\n",
175 |     "plt.show()"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "markdown",
180 |    "metadata": {},
181 |    "source": [
182 |     "### Plot additional performance summaries\n",
183 |     "\n",
184 |     "#### Training vs. Testing Loss"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": null,
190 |    "metadata": {},
191 |    "outputs": [],
192 |    "source": [
193 |     "plt.rcParams[\"figure.figsize\"] = (15, 10)\n",
194 |     "plt.plot(training_results.history[\"loss\"])\n",
195 |     "plt.plot(training_results.history[\"val_loss\"])\n",
196 |     "plt.title(\"Training vs. Testing Loss\", fontweight=\"bold\", fontsize=14)\n",
197 |     "plt.ylabel(\"Loss\", fontweight=\"bold\", fontsize=14)\n",
198 |     "plt.xlabel(\"Epochs\", fontweight=\"bold\", fontsize=14)\n",
199 |     "plt.legend([\"Training Loss\", \"Testing Loss\"], loc=\"upper right\", prop={\"weight\": \"bold\"})\n",
200 |     "plt.grid()\n",
201 |     "plt.show()"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": null,
207 |    "metadata": {},
208 |    "outputs": [],
209 |    "source": [
210 |     "plt.rcParams[\"figure.figsize\"] = (15, 10)\n",
211 |     "plt.plot(training_results.history[\"mae\"])\n",
212 |     "plt.plot(training_results.history[\"val_mae\"])\n",
213 |     "plt.title(\"Training vs. Testing Mean Absolute Error\", fontweight=\"bold\", fontsize=14)\n",
214 |     "plt.ylabel(\"mae\", fontweight=\"bold\", fontsize=14)\n",
215 |     "plt.xlabel(\"Epochs\", fontweight=\"bold\", fontsize=14)\n",
216 |     "plt.legend([\"Training MAE\", \"Testing MAE\"], loc=\"upper right\", prop={\"weight\": \"bold\"})\n",
217 |     "plt.grid()\n",
218 |     "plt.show()"
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "markdown",
223 |    "metadata": {},
224 |    "source": [
225 |     "---\n",
226 |     "\n",
227 |     "## Model Evaluation (After Optimization)"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "code",
232 |    "execution_count": null,
233 |    "metadata": {},
234 |    "outputs": [],
235 |    "source": [
236 |     "network_layers = [\n",
237 |     "    Dense(64, activation='relu', kernel_initializer=\"normal\", input_dim=10),\n",
238 |     "    Dense(64, activation='relu'),\n",
239 |     "    Dense(1, activation='linear')\n",
240 |     "]\n",
241 |     "\n",
242 |     "model = Sequential(network_layers)\n",
243 |     "model.compile(optimizer=\"adam\", loss=\"mse\", metrics=[\"mae\", \"accuracy\"])\n",
244 |     "model.summary()\n",
245 |     "training_results = model.fit(training_features, training_labels, validation_data=(testing_features, testing_labels), batch_size=8, epochs=200, shuffle=True, verbose=1)\n"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "code",
250 |    "execution_count": null,
251 |    "metadata": {},
252 |    "outputs": [],
253 |    "source": [
254 |     "fig, ax = plt.subplots(figsize=(15, 10))\n",
255 |     "ax.plot(testing_labels, model.predict(testing_features), \"ob\")\n",
256 |     "ax.plot([0, 25], [0, 25], \"-r\")\n",
257 |     "ax.text(8, 1, f\"RMSE = {mean_squared_error(testing_labels, model.predict(testing_features), squared=False)}\", color=\"r\", fontsize=14, weight=\"bold\")\n",
258 |     "plt.grid()\n",
259 |     "plt.title(\"Abalone Model Evaluation\", fontweight=\"bold\", fontsize=12)\n",
260 |     "plt.xlabel(\"Actual 'Rings'\", fontweight=\"bold\", fontsize=12)\n",
261 |     "plt.ylabel(\"Predicted 'Rings'\", fontweight=\"bold\", fontsize=12)\n",
262 |     "plt.legend([\"Predictions\", \"Regression Line\"], loc=\"upper left\", prop={\"weight\": \"bold\"})\n",
263 |     "plt.show()"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "code",
268 |    "execution_count": null,
269 |    "metadata": {},
270 |    "outputs": [],
271 |    "source": [
272 |     "plt.rcParams[\"figure.figsize\"] = (15, 10)\n",
273 |     "plt.plot(training_results.history[\"loss\"])\n",
274 |     "plt.plot(training_results.history[\"val_loss\"])\n",
275 |     "plt.title(\"Training vs. Testing Loss\", fontweight=\"bold\", fontsize=14)\n",
276 |     "plt.ylabel(\"Loss\", fontweight=\"bold\", fontsize=14)\n",
277 |     "plt.xlabel(\"Epochs\", fontweight=\"bold\", fontsize=14)\n",
278 |     "plt.legend([\"Training Loss\", \"Testing Loss\"], loc=\"upper right\", prop={\"weight\": \"bold\"})\n",
279 |     "plt.grid()\n",
280 |     "plt.show()"
281 |    ]
282 |   },
283 |   {
284 |    "cell_type": "code",
285 |    "execution_count": null,
286 |    "metadata": {},
287 |    "outputs": [],
288 |    "source": [
289 |     "plt.rcParams[\"figure.figsize\"] = (15, 10)\n",
290 |     "plt.plot(training_results.history[\"mae\"])\n",
291 |     "plt.plot(training_results.history[\"val_mae\"])\n",
292 |     "plt.title(\"Training vs. Testing Mean Absolute Error\", fontweight=\"bold\", fontsize=14)\n",
293 |     "plt.ylabel(\"mae\", fontweight=\"bold\", fontsize=14)\n",
294 |     "plt.xlabel(\"Epochs\", fontweight=\"bold\", fontsize=14)\n",
295 |     "plt.legend([\"Training MAE\", \"Testing MAE\"], loc=\"upper right\", prop={\"weight\": \"bold\"})\n",
296 |     "plt.grid()\n",
297 |     "plt.show()"
298 |    ]
299 |   }
300 |  ],
301 |  "metadata": {
302 |   "instance_type": "ml.t3.medium",
303 |   "interpreter": {
304 |    "hash": "91cd747e2918a8daa704f3bc8e42b880a4d8049712a274f2deb0fa8e8f710896"
305 |   },
306 |   "kernelspec": {
307 |    "display_name": "Python 3.8.10 64-bit ('3.8.10': pyenv)",
308 |    "name": "python3"
309 |   },
310 |   "language_info": {
311 |    "codemirror_mode": {
312 |     "name": "ipython",
313 |     "version": 3
314 |    },
315 |    "file_extension": ".py",
316 |    "mimetype": "text/x-python",
317 |    "name": "python",
318 |    "nbconvert_exporter": "python",
319 |    "pygments_lexer": "ipython3",
320 |    "version": "3.8.10"
321 |   }
322 |  },
323 |  "nbformat": 4,
324 |  "nbformat_minor": 4
325 | }
326 | 


--------------------------------------------------------------------------------
/Chapter02/Autopilot Example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Autopilot Example\n",
  8 |     ">__NOTE:__ Make sure to use the Pyton 3 (Data Science) Jupyter Kernel."
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": null,
 14 |    "metadata": {},
 15 |    "outputs": [],
 16 |    "source": [
 17 |     "import sagemaker\n",
 18 |     "import pandas as pd\n",
 19 |     "\n",
 20 |     "role = sagemaker.get_execution_role()\n",
 21 |     "session = sagemaker.session.Session()"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "## Download Data"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": null,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "column_names = [\"sex\", \"length\", \"diameter\", \"height\", \"whole_weight\", \"shucked_weight\", \"viscera_weight\", \"shell_weight\", \"rings\"]\n",
 38 |     "abalone_data = pd.read_csv(\"http://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data\", names=column_names)\n",
 39 |     "abalone_data.to_csv(\"abalone_with_headers.csv\", index=False)"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "## Create the Autopilot Experiment"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": null,
 52 |    "metadata": {},
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "from sagemaker.automl.automl import AutoML\n",
 56 |     "automl_job = AutoML(\n",
 57 |     "    role=role,\n",
 58 |     "    target_attribute_name=\"rings\",\n",
 59 |     "    output_path=f\"s3://{session.default_bucket()}/abalone-v1/output\",\n",
 60 |     "    base_job_name=\"abalone\",\n",
 61 |     "    sagemaker_session=session,\n",
 62 |     "    max_candidates=250\n",
 63 |     ")\n"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "## Start the Autopilot Experiment"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {},
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "automl_job.fit(inputs=session.upload_data(\"abalone_with_headers.csv\", bucket=session.default_bucket(), key_prefix=\"abalone-v1/input\"), wait=False)"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "markdown",
 84 |    "metadata": {},
 85 |    "source": [
 86 |     "## Analyze the Autopilot Experiment\n",
 87 |     "\n",
 88 |     ">__NOTE:__ Wait until the Autopilot Experiment has completed before proceeding."
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": null,
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "from sagemaker.analytics import ExperimentAnalytics\n",
 98 |     "automl_experiment = ExperimentAnalytics(\n",
 99 |     "    sagemaker_session=session,\n",
100 |     "    experiment_name=\"{}-aws-auto-ml-job\".format(automl_job.describe_auto_ml_job()[\"AutoMLJobName\"])\n",
101 |     ")"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": null,
107 |    "metadata": {},
108 |    "outputs": [],
109 |    "source": [
110 |     "df = automl_experiment.dataframe()\n",
111 |     "df = df.filter([\"TrialComponentName\",\"validation:accuracy - Last\", \"train:accuracy - Last\"])\n",
112 |     "df = df.sort_values(by=\"validation:accuracy - Last\", ascending=False)[:5]\n",
113 |     "df"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "markdown",
118 |    "metadata": {},
119 |    "source": [
120 |     "## Plot Trial Comparison"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": null,
126 |    "metadata": {},
127 |    "outputs": [],
128 |    "source": [
129 |     "import matplotlib.pyplot as plt\n",
130 |     "%matplotlib inline\n",
131 |     "\n",
132 |     "legend_colors = [\"r\", \"b\", \"g\", \"c\", \"m\"]\n",
133 |     "ig, ax = plt.subplots(figsize=(15, 10))\n",
134 |     "legend = []\n",
135 |     "i = 0\n",
136 |     "for column, value in df.iterrows():\n",
137 |     "    ax.plot(value[\"train:accuracy - Last\"], value[\"validation:accuracy - Last\"], \"o\", c=legend_colors[i], label=value.TrialComponentName)\n",
138 |     "    i +=1\n",
139 |     "plt.title(\"Training vs.Testing Accuracy\", fontweight=\"bold\", fontsize=14)\n",
140 |     "plt.ylabel(\"validation:accuracy - Last\", fontweight=\"bold\", fontsize=14)\n",
141 |     "plt.xlabel(\"train:accuracy - Last\", fontweight=\"bold\", fontsize=14)\n",
142 |     "plt.grid()\n",
143 |     "plt.legend()\n",
144 |     "plt.show()"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "markdown",
149 |    "metadata": {},
150 |    "source": [
151 |     "## Best Candidate Overview\n",
152 |     "\n",
153 |     "### Best Candidate Job"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": null,
159 |    "metadata": {},
160 |    "outputs": [],
161 |    "source": [
162 |     "automl_job.best_candidate()[\"CandidateName\"]"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "markdown",
167 |    "metadata": {},
168 |    "source": [
169 |     "### Best Candidate Evaluation Metrics"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "code",
174 |    "execution_count": null,
175 |    "metadata": {},
176 |    "outputs": [],
177 |    "source": [
178 |     "automl_job.best_candidate()[\"FinalAutoMLJobObjectiveMetric\"]"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "markdown",
183 |    "metadata": {},
184 |    "source": [
185 |     "## Candidate Artifacts\n",
186 |     "\n",
187 |     "### Data Exploration Notebook "
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": null,
193 |    "metadata": {},
194 |    "outputs": [],
195 |    "source": [
196 |     "automl_job.describe_auto_ml_job()[\"AutoMLJobArtifacts\"][\"DataExplorationNotebookLocation\"]"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "markdown",
201 |    "metadata": {},
202 |    "source": [
203 |     "### Candidate Definition Notebook"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "code",
208 |    "execution_count": null,
209 |    "metadata": {},
210 |    "outputs": [],
211 |    "source": [
212 |     "automl_job.describe_auto_ml_job()[\"AutoMLJobArtifacts\"][\"CandidateDefinitionNotebookLocation\"]"
213 |    ]
214 |   },
215 |   {
216 |    "cell_type": "markdown",
217 |    "metadata": {},
218 |    "source": [
219 |     "### Explainability Report"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "code",
224 |    "execution_count": null,
225 |    "metadata": {},
226 |    "outputs": [],
227 |    "source": [
228 |     "automl_job.describe_auto_ml_job()[\"BestCandidate\"][\"CandidateProperties\"][\"CandidateArtifactLocations\"][\"Explainability\"]"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "markdown",
233 |    "metadata": {},
234 |    "source": [
235 |     "## Deploy the Best Candidate\n",
236 |     "\n",
237 |     ">__NOTE:__ Deploying the Best Model will incur AWS usage costs."
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "code",
242 |    "execution_count": null,
243 |    "metadata": {},
244 |    "outputs": [],
245 |    "source": [
246 |     "automl_job.deploy(\n",
247 |     "    initial_instance_count=1,\n",
248 |     "    instance_type=\"ml.m5.xlarge\",\n",
249 |     "    candidate=automl_job.best_candidate(),\n",
250 |     "    sagemaker_session=session,\n",
251 |     "    endpoint_name=\"-\".join(automl_job.best_candidate()[\"CandidateName\"].split(\"-\")[0:7])\n",
252 |     ")"
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "markdown",
257 |    "metadata": {},
258 |    "source": [
259 |     "## Cleanup\n",
260 |     "\n",
261 |     "### Delete Hoasted Endpoint"
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "code",
266 |    "execution_count": null,
267 |    "metadata": {},
268 |    "outputs": [],
269 |    "source": [
270 |     "!aws sagemaker delete-endpoint --endpoint-name {\"-\".join(automl_job.best_candidate()[\"CandidateName\"].split(\"-\")[0:7])}"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "markdown",
275 |    "metadata": {},
276 |    "source": [
277 |     "### Delete the Endpoint Configuration"
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "code",
282 |    "execution_count": null,
283 |    "metadata": {},
284 |    "outputs": [],
285 |    "source": [
286 |     "!aws sagemaker delete-endpoint-config --endpoint-config-name {\"-\".join(automl_job.best_candidate()[\"CandidateName\"].split(\"-\")[0:7])}"
287 |    ]
288 |   }
289 |  ],
290 |  "metadata": {
291 |   "instance_type": "ml.t3.medium",
292 |   "kernelspec": {
293 |    "display_name": "Python 3 (Data Science)",
294 |    "language": "python",
295 |    "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/datascience-1.0"
296 |   },
297 |   "language_info": {
298 |    "codemirror_mode": {
299 |     "name": "ipython",
300 |     "version": 3
301 |    },
302 |    "file_extension": ".py",
303 |    "mimetype": "text/x-python",
304 |    "name": "python",
305 |    "nbconvert_exporter": "python",
306 |    "pygments_lexer": "ipython3",
307 |    "version": "3.7.10"
308 |   }
309 |  },
310 |  "nbformat": 4,
311 |  "nbformat_minor": 4
312 | }
313 | 


--------------------------------------------------------------------------------
/Chapter03/Image/AutoGluon Image Example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# AutoGluon Image Example\n",
  8 |     ">__NOTE:__ Make sure to use the Pyton 3 (Data Science) Jupyter Kernel.\n",
  9 |     "\n",
 10 |     "## Prerequisites\n",
 11 |     "\n",
 12 |     "### Intalling the Image Build CLI"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": null,
 18 |    "metadata": {},
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "%%capture\n",
 22 |     "import sys\n",
 23 |     "import warnings\n",
 24 |     "warnings.filterwarnings(\"ignore\")\n",
 25 |     "%matplotlib inline\n",
 26 |     "\n",
 27 |     "!{sys.executable} -m pip install -U pip sagemaker-studio-image-build"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "### Configuring the AutoGluon Training/Testing Script"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": null,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "%%writefile train.py\n",
 44 |     "import os\n",
 45 |     "import json\n",
 46 |     "import boto3\n",
 47 |     "import json\n",
 48 |     "import warnings\n",
 49 |     "import numpy as np\n",
 50 |     "import pandas as pd\n",
 51 |     "from autogluon.vision import ImagePredictor\n",
 52 |     "\n",
 53 |     "warnings.filterwarnings(\"ignore\", category=DeprecationWarning)\n",
 54 |     "prefix = \"/opt/ml\"\n",
 55 |     "input_path = os.path.join(prefix, \"input/data\")\n",
 56 |     "output_path = os.path.join(prefix, \"output\")\n",
 57 |     "model_path = os.path.join(prefix, \"model\")\n",
 58 |     "param_path = os.path.join(prefix, \"input/config/hyperparameters.json\")\n",
 59 |     "\n",
 60 |     "\n",
 61 |     "def train(params):\n",
 62 |     "    time_limit = int(params[\"time_limit\"])\n",
 63 |     "    presets = \"\".join([str(i) for i in list(params[\"presets\"])])\n",
 64 |     "    channel_name = \"training\"\n",
 65 |     "    training_path = os.path.join(input_path, channel_name)\n",
 66 |     "    training_dataset = ImagePredictor.Dataset.from_folder(training_path)\n",
 67 |     "    predictor = ImagePredictor().fit(training_dataset, time_limit=time_limit, presets=presets)\n",
 68 |     "    with open(os.path.join(model_path, \"FitSummary.json\"), \"w\") as f:\n",
 69 |     "        json.dump(predictor.fit_summary(), f)\n",
 70 |     "    predictor.save(os.path.join(model_path, \"ImagePredictor.Autogluon\"))\n",
 71 |     "    return \"AutoGluon Job Complete\"\n",
 72 |     "\n",
 73 |     "\n",
 74 |     "if __name__ == \"__main__\":\n",
 75 |     "    print(\"Loading Parameters\\n\")\n",
 76 |     "    with open(param_path) as f:\n",
 77 |     "        params = json.load(f)\n",
 78 |     "    print(\"Training Models\\n\")\n",
 79 |     "    result = train(params)\n",
 80 |     "    print(result)"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "metadata": {},
 86 |    "source": [
 87 |     "### Container Image Build Instructions (Dockerfile)"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "%%writefile Dockerfile\n",
 97 |     "ARG REGION\n",
 98 |     "FROM 763104351884.dkr.ecr.${REGION}.amazonaws.com/autogluon-training:0.3.1-gpu-py37-cu102-ubuntu18.04\n",
 99 |     "RUN pip install -U pip wheel setuptools\n",
100 |     "RUN pip install autogluon\n",
101 |     "RUN mkdir -p /opt/program\n",
102 |     "RUN mkdir -p /opt/ml\n",
103 |     "COPY train.py /opt/program\n",
104 |     "WORKDIR /opt/program\n",
105 |     "ENTRYPOINT [\"python\", \"train.py\"]"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "markdown",
110 |    "metadata": {},
111 |    "source": [
112 |     "### Container Build Process"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": null,
118 |    "metadata": {},
119 |    "outputs": [],
120 |    "source": [
121 |     "import boto3\n",
122 |     "import sagemaker\n",
123 |     "\n",
124 |     "aws_region = sagemaker.Session().boto_session.region_name\n",
125 |     "!sm-docker build --build-arg REGION={aws_region} ."
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "markdown",
130 |    "metadata": {},
131 |    "source": [
132 |     "---\n",
133 |     "\n",
134 |     "## AutoGluon Experiment\n",
135 |     "\n",
136 |     "### Download the Image Data"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": null,
142 |    "metadata": {},
143 |    "outputs": [],
144 |    "source": [
145 |     "import io\n",
146 |     "import urllib\n",
147 |     "import zipfile\n",
148 |     "\n",
149 |     "dataset_url = \"https://storage.googleapis.com/laurencemoroney-blog.appspot.com/rps.zip\"\n",
150 |     "with urllib.request.urlopen(dataset_url) as rps_zipfile:\n",
151 |     "    with zipfile.ZipFile(io.BytesIO(rps_zipfile.read())) as z:\n",
152 |     "        z.extractall(\"data\")"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "markdown",
157 |    "metadata": {},
158 |    "source": [
159 |     "### Experiment Parameters\n",
160 |     "\n",
161 |     ">__NOTE:__ Make sure to update the `image_uri` parameter with the _Image URI_ output the __Container Build Process__."
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": null,
167 |    "metadata": {},
168 |    "outputs": [],
169 |    "source": [
170 |     "import sagemaker\n",
171 |     "import datetime\n",
172 |     "\n",
173 |     "image_uri = \"<Enter the Image URI from the sm-docker output>\"\n",
174 |     "role = sagemaker.get_execution_role()\n",
175 |     "session = sagemaker.session.Session()\n",
176 |     "bucket = session.default_bucket()\n",
177 |     "job_version = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')[:-3]\n",
178 |     "job_name = f\"autogluon-image-{job_version}\""
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "markdown",
183 |    "metadata": {},
184 |    "source": [
185 |     "### Create the AutoGluon Estimator\n",
186 |     "\n",
187 |     ">__TIP:__ To leverage [Managed Spot Training](https://docs.aws.amazon.com/sagemaker/latest/dg/model-managed-spot-training.html) to further resuce training costs, uncomment the lines in the following code cell."
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": null,
193 |    "metadata": {},
194 |    "outputs": [],
195 |    "source": [
196 |     "from sagemaker.estimator import Estimator\n",
197 |     "\n",
198 |     "autogluon = Estimator(\n",
199 |     "    image_uri=image_uri,\n",
200 |     "    role=role,\n",
201 |     "    output_path=f\"s3://{bucket}/{job_name}\",\n",
202 |     "    base_job_name=job_name,\n",
203 |     "    instance_count=1,\n",
204 |     "    instance_type=\"ml.p2.xlarge\",\n",
205 |     "    hyperparameters={\n",
206 |     "        \"presets\": \"medium_quality_faster_train\",\n",
207 |     "        \"time_limit\": \"600\",\n",
208 |     "        \"bucket\": bucket,\n",
209 |     "        \"training_job\": job_name\n",
210 |     "    },\n",
211 |     "    volume_size=50,\n",
212 |     "#     use_spot_instances=True,\n",
213 |     "#     max_wait=3600,\n",
214 |     "#     max_run=8*3600\n",
215 |     ")"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "markdown",
220 |    "metadata": {},
221 |    "source": [
222 |     "### Execute the Experiment"
223 |    ]
224 |   },
225 |   {
226 |    "cell_type": "code",
227 |    "execution_count": null,
228 |    "metadata": {},
229 |    "outputs": [],
230 |    "source": [
231 |     "autogluon.fit(\n",
232 |     "    inputs={\n",
233 |     "        \"training\": session.upload_data(\n",
234 |     "            \"data/rps\",\n",
235 |     "            bucket=bucket,\n",
236 |     "            key_prefix=f\"{job_name}/input\"\n",
237 |     "        )\n",
238 |     "    }\n",
239 |     ")"
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "markdown",
244 |    "metadata": {},
245 |    "source": [
246 |     "### Experiment Results\n",
247 |     "\n",
248 |     "#### Download Model Artifacts"
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "code",
253 |    "execution_count": null,
254 |    "metadata": {},
255 |    "outputs": [],
256 |    "source": [
257 |     "!mkdir extract\n",
258 |     "sagemaker.s3.S3Downloader.download(autogluon.model_data, \"./\")\n",
259 |     "!tar xfz ./model.tar.gz -C extract"
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "markdown",
264 |    "metadata": {},
265 |    "source": [
266 |     "#### Review Model Summary"
267 |    ]
268 |   },
269 |   {
270 |    "cell_type": "code",
271 |    "execution_count": null,
272 |    "metadata": {},
273 |    "outputs": [],
274 |    "source": [
275 |     "import json\n",
276 |     "with open(\"extract/FitSummary.json\", \"r\") as f:\n",
277 |     "    fit_summary = json.load(f)\n",
278 |     "print(json.dumps(fit_summary, indent=4))\n",
279 |     "print(f\"\"\"Best Model Training Accuracy: {fit_summary[\"train_acc\"]} \\nBest Model Validation Accuracy: {fit_summary[\"valid_acc\"]}\"\"\")"
280 |    ]
281 |   }
282 |  ],
283 |  "metadata": {
284 |   "instance_type": "ml.t3.medium",
285 |   "kernelspec": {
286 |    "display_name": "Python 3 (Data Science)",
287 |    "language": "python",
288 |    "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/datascience-1.0"
289 |   },
290 |   "language_info": {
291 |    "codemirror_mode": {
292 |     "name": "ipython",
293 |     "version": 3
294 |    },
295 |    "file_extension": ".py",
296 |    "mimetype": "text/x-python",
297 |    "name": "python",
298 |    "nbconvert_exporter": "python",
299 |    "pygments_lexer": "ipython3",
300 |    "version": "3.7.10"
301 |   }
302 |  },
303 |  "nbformat": 4,
304 |  "nbformat_minor": 4
305 | }
306 | 


--------------------------------------------------------------------------------
/Chapter03/Tabular/AutoGluon Tabular Example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# AutoGluon Tabular Example\n",
  8 |     ">__NOTE:__ Make sure to use the Pyton 3 (Data Science) Jupyter Kernel.\n",
  9 |     "\n",
 10 |     "## Prerequisites\n",
 11 |     "\n",
 12 |     "### Intalling the Image Build CLI"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": null,
 18 |    "metadata": {},
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "%%capture\n",
 22 |     "import sys\n",
 23 |     "import warnings\n",
 24 |     "warnings.filterwarnings('ignore')\n",
 25 |     "%matplotlib inline\n",
 26 |     "\n",
 27 |     "!{sys.executable} -m pip install -U pip sagemaker-studio-image-build"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "### Configuring the AutoGluon Training/Testing Script"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": null,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "%%writefile train.py\n",
 44 |     "import os\n",
 45 |     "import json\n",
 46 |     "import boto3\n",
 47 |     "import json\n",
 48 |     "import warnings\n",
 49 |     "import numpy as np\n",
 50 |     "import pandas as pd\n",
 51 |     "from autogluon.tabular import TabularDataset, TabularPredictor\n",
 52 |     "\n",
 53 |     "warnings.filterwarnings(\"ignore\", category=DeprecationWarning)\n",
 54 |     "prefix = \"/opt/ml\"\n",
 55 |     "input_path = os.path.join(prefix, \"input/data\")\n",
 56 |     "output_path = os.path.join(prefix, \"output\")\n",
 57 |     "model_path = os.path.join(prefix, \"model\")\n",
 58 |     "param_path = os.path.join(prefix, 'input/config/hyperparameters.json')\n",
 59 |     "\n",
 60 |     "\n",
 61 |     "def train(params):\n",
 62 |     "    label = params[\"label\"]\n",
 63 |     "    channel_name = \"training\"\n",
 64 |     "    training_path = os.path.join(input_path, channel_name)\n",
 65 |     "    training_dataset = TabularDataset(os.path.join(training_path, \"training.csv\"))\n",
 66 |     "    predictor = TabularPredictor(label=label, path=model_path).fit(training_dataset)\n",
 67 |     "    with open(os.path.join(model_path, \"Fit_Summary.txt\"), \"w\") as f:\n",
 68 |     "        print(predictor.fit_summary(), file=f)\n",
 69 |     "    return predictor\n",
 70 |     "    \n",
 71 |     "\n",
 72 |     "def test(params, predictor):\n",
 73 |     "    label = params[\"label\"]\n",
 74 |     "    channel_name = \"testing\"\n",
 75 |     "    testing_path = os.path.join(input_path, channel_name)\n",
 76 |     "    testing_dataset = TabularDataset(os.path.join(testing_path, \"testing.csv\"))\n",
 77 |     "    ground_truth = testing_dataset[label]\n",
 78 |     "    testing_data = testing_dataset.drop(columns=label)\n",
 79 |     "    predictions = predictor.predict(testing_data)\n",
 80 |     "    with open(os.path.join(model_path, \"Model_Evaluation.txt\"), \"w\") as f:\n",
 81 |     "        print(\n",
 82 |     "            json.dumps(\n",
 83 |     "                predictor.evaluate_predictions(\n",
 84 |     "                    y_true=ground_truth,\n",
 85 |     "                    y_pred=predictions,\n",
 86 |     "                    auxiliary_metrics=True\n",
 87 |     "                ),\n",
 88 |     "                indent=4\n",
 89 |     "            ),\n",
 90 |     "            file=f\n",
 91 |     "        )\n",
 92 |     "    leaderboard = predictor.leaderboard(testing_dataset, silent=True)\n",
 93 |     "    leaderboard.to_csv(os.path.join(model_path, \"Leaderboard.csv\"))\n",
 94 |     "\n",
 95 |     "\n",
 96 |     "if __name__ == \"__main__\":\n",
 97 |     "    print(\"Loading Parameters\\n\")\n",
 98 |     "    with open(param_path) as f:\n",
 99 |     "        params = json.load(f)\n",
100 |     "    print(\"Training Models\\n\")\n",
101 |     "    predictor = train(params)\n",
102 |     "    print(\"Testig Models\\n\")\n",
103 |     "    test(params, predictor)\n",
104 |     "    print(\"AutoGluon Job Complete\")"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "markdown",
109 |    "metadata": {},
110 |    "source": [
111 |     "### Container Image Build Instructions (Dockerfile)"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": null,
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": [
120 |     "%%writefile Dockerfile\n",
121 |     "ARG REGION\n",
122 |     "FROM 763104351884.dkr.ecr.${REGION}.amazonaws.com/autogluon-training:0.3.1-cpu-py37-ubuntu18.04\n",
123 |     "RUN pip install -U pip\n",
124 |     "RUN pip install bokeh==2.0.1\n",
125 |     "RUN mkdir -p /opt/program\n",
126 |     "RUN mkdir -p /opt/ml\n",
127 |     "COPY train.py /opt/program\n",
128 |     "WORKDIR /opt/program\n",
129 |     "ENTRYPOINT [\"python\", \"train.py\"]"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "markdown",
134 |    "metadata": {},
135 |    "source": [
136 |     "### Container Build Process"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": null,
142 |    "metadata": {},
143 |    "outputs": [],
144 |    "source": [
145 |     "import boto3\n",
146 |     "import sagemaker\n",
147 |     "\n",
148 |     "aws_region = sagemaker.Session().boto_session.region_name\n",
149 |     "!sm-docker build --build-arg REGION={aws_region} ."
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "markdown",
154 |    "metadata": {},
155 |    "source": [
156 |     "---\n",
157 |     "\n",
158 |     "## AutoGluon Experiment\n",
159 |     "\n",
160 |     "### Download the Abalone Data"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": null,
166 |    "metadata": {},
167 |    "outputs": [],
168 |    "source": [
169 |     "import numpy as np\n",
170 |     "import pandas as pd \n",
171 |     "from sklearn.model_selection import train_test_split\n",
172 |     "\n",
173 |     "column_names = [\"sex\", \"length\", \"diameter\", \"height\", \"whole_weight\", \"shucked_weight\", \"viscera_weight\", \"shell_weight\", \"rings\"]\n",
174 |     "abalone_data = pd.read_csv(\"http://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data\", names=column_names)\n",
175 |     "training_data, testing_data = train_test_split(abalone_data, test_size=0.1)\n",
176 |     "training_data.to_csv(\"training.csv\")\n",
177 |     "testing_data.to_csv(\"testing.csv\")"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "markdown",
182 |    "metadata": {},
183 |    "source": [
184 |     "### Experiment Parameters\n",
185 |     "\n",
186 |     ">__NOTE:__ Update the `image_uri` parameter with the _Image URI_ output the __Container Build Process__."
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "code",
191 |    "execution_count": null,
192 |    "metadata": {},
193 |    "outputs": [],
194 |    "source": [
195 |     "import sagemaker\n",
196 |     "import datetime\n",
197 |     "\n",
198 |     "image_uri = \"<Enter the Image URI from the sm-docker output>\"\n",
199 |     "role = sagemaker.get_execution_role()\n",
200 |     "session = sagemaker.session.Session()\n",
201 |     "bucket = session.default_bucket()\n",
202 |     "job_version = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')[:-3]\n",
203 |     "job_name = f\"abalone-autogluon-{job_version}\""
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "markdown",
208 |    "metadata": {},
209 |    "source": [
210 |     "### Create the AutoGluon Estimator "
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "code",
215 |    "execution_count": null,
216 |    "metadata": {},
217 |    "outputs": [],
218 |    "source": [
219 |     "from sagemaker.estimator import Estimator\n",
220 |     "\n",
221 |     "autogluon = Estimator(\n",
222 |     "    image_uri=image_uri,\n",
223 |     "    role=role,\n",
224 |     "    output_path=f\"s3://{bucket}/{job_name}\",\n",
225 |     "    base_job_name=job_name,\n",
226 |     "    instance_count=1,\n",
227 |     "    instance_type=\"ml.m5.xlarge\",\n",
228 |     "    hyperparameters={\n",
229 |     "        \"label\": \"rings\",\n",
230 |     "        \"bucket\": bucket,\n",
231 |     "        \"training_job\": job_name\n",
232 |     "    },\n",
233 |     "    volume_size=20\n",
234 |     ")"
235 |    ]
236 |   },
237 |   {
238 |    "cell_type": "markdown",
239 |    "metadata": {},
240 |    "source": [
241 |     "### Execute the Experiment"
242 |    ]
243 |   },
244 |   {
245 |    "cell_type": "code",
246 |    "execution_count": null,
247 |    "metadata": {},
248 |    "outputs": [],
249 |    "source": [
250 |     "autogluon.fit(\n",
251 |     "    inputs={\n",
252 |     "        \"training\": session.upload_data(\n",
253 |     "            \"training.csv\",\n",
254 |     "            bucket=bucket,\n",
255 |     "            key_prefix=f\"{job_name}/input\"\n",
256 |     "        ),\n",
257 |     "        \"testing\": session.upload_data(\n",
258 |     "            \"testing.csv\",\n",
259 |     "            bucket=bucket,\n",
260 |     "            key_prefix=f\"{job_name}/input\"\n",
261 |     "        )\n",
262 |     "    }\n",
263 |     ")"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "markdown",
268 |    "metadata": {},
269 |    "source": [
270 |     "### Experiment Results\n",
271 |     "\n",
272 |     "#### Download Model Artifacts"
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "code",
277 |    "execution_count": null,
278 |    "metadata": {},
279 |    "outputs": [],
280 |    "source": [
281 |     "!mkdir extract\n",
282 |     "sagemaker.s3.S3Downloader.download(autogluon.model_data, \"./\")\n",
283 |     "!tar xfz ./model.tar.gz -C extract"
284 |    ]
285 |   },
286 |   {
287 |    "cell_type": "markdown",
288 |    "metadata": {},
289 |    "source": [
290 |     "#### Review Model Leaderboard"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "code",
295 |    "execution_count": null,
296 |    "metadata": {},
297 |    "outputs": [],
298 |    "source": [
299 |     "df = pd.read_csv(\"./extract/Leaderboard.csv\")\n",
300 |     "df = df.filter([\"model\",\"score_test\", \"score_val\"]).sort_values(by=\"score_val\", ascending=False).reset_index().drop(columns=\"index\")\n",
301 |     "df"
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "markdown",
306 |    "metadata": {},
307 |    "source": [
308 |     "#### Plot Model Comparison"
309 |    ]
310 |   },
311 |   {
312 |    "cell_type": "code",
313 |    "execution_count": null,
314 |    "metadata": {},
315 |    "outputs": [],
316 |    "source": [
317 |     "import IPython\n",
318 |     "IPython.display.HTML(filename=\"./extract/SummaryOfModels.html\")"
319 |    ]
320 |   }
321 |  ],
322 |  "metadata": {
323 |   "instance_type": "ml.t3.medium",
324 |   "kernelspec": {
325 |    "display_name": "Python 3 (Data Science)",
326 |    "language": "python",
327 |    "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/datascience-1.0"
328 |   },
329 |   "language_info": {
330 |    "codemirror_mode": {
331 |     "name": "ipython",
332 |     "version": 3
333 |    },
334 |    "file_extension": ".py",
335 |    "mimetype": "text/x-python",
336 |    "name": "python",
337 |    "nbconvert_exporter": "python",
338 |    "pygments_lexer": "ipython3",
339 |    "version": "3.7.10"
340 |   }
341 |  },
342 |  "nbformat": 4,
343 |  "nbformat_minor": 4
344 | }
345 | 


--------------------------------------------------------------------------------
/Chapter03/policy.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "Version": "2012-10-17",
 3 |     "Statement": [
 4 |         {
 5 |             "Effect": "Allow",
 6 |             "Action": [
 7 |                 "codebuild:DeleteProject",
 8 |                 "codebuild:CreateProject",
 9 |                 "codebuild:BatchGetBuilds",
10 |                 "codebuild:StartBuild"
11 |             ],
12 |             "Resource": "arn:aws:codebuild:*:*:project/sagemaker-studio*"
13 |         },
14 |         {
15 |             "Effect": "Allow",
16 |             "Action": "logs:CreateLogStream",
17 |             "Resource": "arn:aws:logs:*:*:log-group:/aws/codebuild/sagemaker-studio*"
18 |         },
19 |         {
20 |             "Effect": "Allow",
21 |             "Action": [
22 |                 "logs:GetLogEvents",
23 |                 "logs:PutLogEvents"
24 |             ],
25 |             "Resource": "arn:aws:logs:*:*:log-group:/aws/codebuild/sagemaker-studio*:log-stream:*"
26 |         },
27 |         {
28 |             "Effect": "Allow",
29 |             "Action": "logs:CreateLogGroup",
30 |             "Resource": "*"
31 |         },
32 |         {
33 |             "Effect": "Allow",
34 |             "Action": [
35 |                 "ecr:CreateRepository",
36 |                 "ecr:BatchGetImage",
37 |                 "ecr:CompleteLayerUpload",
38 |                 "ecr:DescribeImages",
39 |                 "ecr:DescribeRepositories",
40 |                 "ecr:UploadLayerPart",
41 |                 "ecr:ListImages",
42 |                 "ecr:InitiateLayerUpload",
43 |                 "ecr:BatchCheckLayerAvailability",
44 |                 "ecr:PutImage"
45 |             ],
46 |             "Resource": "arn:aws:ecr:*:*:repository/sagemaker-studio*"
47 |         },
48 |         {
49 |             "Effect": "Allow",
50 |             "Action": "ecr:GetAuthorizationToken",
51 |             "Resource": "*"
52 |         },
53 |         {
54 |             "Effect": "Allow",
55 |             "Action": [
56 |               "s3:GetObject",
57 |               "s3:DeleteObject",
58 |               "s3:PutObject"
59 |               ],
60 |             "Resource": "arn:aws:s3:::sagemaker-*/*"
61 |         },
62 |         {
63 |             "Effect": "Allow",
64 |             "Action": [
65 |                 "s3:CreateBucket"
66 |             ],
67 |             "Resource": "arn:aws:s3:::sagemaker*"
68 |         },
69 |         {
70 |             "Effect": "Allow",
71 |             "Action": [
72 |                 "iam:GetRole",
73 |                 "iam:ListRoles"
74 |             ],
75 |             "Resource": "*"
76 |         },
77 |         {
78 |             "Effect": "Allow",
79 |             "Action": "iam:PassRole",
80 |             "Resource": "arn:aws:iam::*:role/*",
81 |             "Condition": {
82 |                 "StringLikeIfExists": {
83 |                     "iam:PassedToService": "codebuild.amazonaws.com"
84 |                 }
85 |             }
86 |         }
87 |     ]
88 | }


--------------------------------------------------------------------------------
/Chapter04/cdk/abalone_endpoint_stack.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import aws_cdk as cdk
 3 | import aws_cdk.aws_sagemaker as sagemaker
 4 | 
 5 | class EndpointStack(cdk.Stack):
 6 |     def __init__(self, app: cdk.App, id: str, *, model_name: str=None, **kwargs) -> None:
 7 |         super().__init__(app, id, **kwargs)
 8 |         
 9 |         bucket_name = cdk.CfnParameter(
10 |             self,
11 |             "BucketName",
12 |             type="String"
13 |         )
14 |         
15 |         execution_id = cdk.CfnParameter(
16 |             self,
17 |             "ExecutionId",
18 |             type="String"
19 |         )
20 |         
21 |         model_uri = cdk.CfnParameter(
22 |             self,
23 |             "ModelUri",
24 |             type="String"
25 |         )
26 |         
27 |         execution_role = cdk.CfnParameter(
28 |             self,
29 |             "ExecutionRole",
30 |             type="String"
31 |         )
32 |         
33 |         model_image = cdk.CfnParameter(
34 |             self,
35 |             "ImageUri",
36 |             type="String"
37 |         )
38 |         
39 |         model = sagemaker.CfnModel(
40 |             self,
41 |             "Model",
42 |             model_name="{}-model-{}".format(model_name.capitalize(), execution_id.value_as_string),
43 |             execution_role_arn=execution_role.value_as_string,
44 |             primary_container=sagemaker.CfnModel.ContainerDefinitionProperty(
45 |                 image=model_image.value_as_string,
46 |                 model_data_url=model_uri.value_as_string,
47 |                 image_config=sagemaker.CfnModel.ImageConfigProperty(
48 |                     repository_access_mode="Platform"
49 |                 )
50 |             )
51 |         )
52 |         
53 |         endpoint_config = sagemaker.CfnEndpointConfig(
54 |             self,
55 |             "EndpointConfig",
56 |             endpoint_config_name="{}-config-{}".format(model_name.capitalize(), execution_id.value_as_string),
57 |             production_variants=[
58 |                 sagemaker.CfnEndpointConfig.ProductionVariantProperty(
59 |                     initial_instance_count=2,
60 |                     initial_variant_weight=1.0,
61 |                     instance_type="ml.m5.large",
62 |                     model_name=model.attr_model_name,
63 |                     variant_name="AllTraffic"
64 |                 )
65 |             ],
66 |             data_capture_config=sagemaker.CfnEndpointConfig.DataCaptureConfigProperty(
67 |                 capture_content_type_header=sagemaker.CfnEndpointConfig.CaptureContentTypeHeaderProperty(
68 |                     csv_content_types=[
69 |                         "text/csv"
70 |                     ]
71 |                 ),
72 |                 capture_options=[
73 |                     sagemaker.CfnEndpointConfig.CaptureOptionProperty(capture_mode="Input"),
74 |                     sagemaker.CfnEndpointConfig.CaptureOptionProperty(capture_mode="Output")
75 |                 ],
76 |                 destination_s3_uri="s3://{}/endpoint-data-capture".format(bucket_name.value_as_string),
77 |                 enable_capture=True,
78 |                 initial_sampling_percentage=100.0
79 |             )
80 |         )
81 |         endpoint_config.add_depends_on(model)
82 |         
83 |         endpoint = sagemaker.CfnEndpoint(
84 |             self,
85 |             "AbaloneEndpoint",
86 |             endpoint_config_name=endpoint_config.attr_endpoint_config_name,
87 |             endpoint_name="{}-Endpoint".format(model_name.capitalize())
88 |         )
89 |         endpoint.add_depends_on(endpoint_config)


--------------------------------------------------------------------------------
/Chapter04/scripts/build.py:
--------------------------------------------------------------------------------
  1 | import boto3
  2 | import logging
  3 | import os
  4 | import sys
  5 | import time
  6 | from botocore.exceptions import ClientError
  7 | 
  8 | logger = logging.getLogger()
  9 | logging_format = "%(levelname)s: [%(filename)s:%(lineno)s] %(message)s"
 10 | logging.basicConfig(format=logging_format, level=os.environ.get("LOGLEVEL", "INFO").upper())
 11 | codepipeline_client = boto3.client("codepipeline")
 12 | sagemaker_client = boto3.client("sagemaker")
 13 | image_uri = os.environ["IMAGE_URI"]
 14 | bucket_name = os.environ["BUCKET_NAME"]
 15 | role_arn = os.environ["ROLE_ARN"]
 16 | pipeline_name = os.environ["PIPELINE_NAME"]
 17 | model_name = os.environ["MODEL_NAME"]
 18 | 
 19 | 
 20 | def get_execution_id(name=None, task=None):
 21 |     try:
 22 |         response = codepipeline_client.get_pipeline_state(name=name)
 23 |         for stage in response["stageStates"]:
 24 |             if stage["stageName"] == "Build":
 25 |                 for action in stage["actionStates"]:
 26 |                     if action["actionName"] == task.capitalize():
 27 |                         return stage["latestExecution"]["pipelineExecutionId"]
 28 |     except ClientError as e:
 29 |         error = e.response["Error"]["Message"]
 30 |         logger.error(error)
 31 |         raise Exception(error)
 32 | 
 33 | 
 34 | def get_model_artifact(name=None):
 35 |     try:
 36 |         response = sagemaker_client.describe_training_job(TrainingJobName=name)
 37 |         return response["ModelArtifacts"]["S3ModelArtifacts"]
 38 |     except ClientError as e:
 39 |         error = e.response["Error"]["Message"]
 40 |         logger.error(error)
 41 |         raise Exception(error)
 42 | 
 43 | 
 44 | def handle_data(model_name=None, execution_id=None):
 45 |     try:
 46 |         response = sagemaker_client.create_processing_job(
 47 |             ProcessingJobName=f"{model_name}-ProcessingJob-{execution_id}",
 48 |             ProcessingResources={
 49 |                 'ClusterConfig': {
 50 |                     'InstanceCount': 1,
 51 |                     'InstanceType': 'ml.m5.xlarge',
 52 |                     'VolumeSizeInGB': 30
 53 |                 }
 54 |             },
 55 |             StoppingCondition={
 56 |                 'MaxRuntimeInSeconds': 3600
 57 |             },
 58 |             AppSpecification={
 59 |                 'ImageUri': f"{image_uri}:latest",
 60 |                 'ContainerEntrypoint': ["python", "app.py", "preprocess"]
 61 |             },
 62 |             ProcessingInputs=[
 63 |                 {
 64 |                     'InputName': 'data',
 65 |                     'S3Input': {
 66 |                         'S3Uri': f"s3://{bucket_name}/data/{model_name}.data",
 67 |                         'LocalPath': '/opt/ml/processing/input/data',
 68 |                         'S3DataType': 'S3Prefix',
 69 |                         'S3InputMode': 'File',
 70 |                         'S3DataDistributionType': 'FullyReplicated',
 71 |                         'S3CompressionType': 'None'
 72 |                     }
 73 |                 }
 74 |             ],
 75 |             ProcessingOutputConfig={
 76 |                 'Outputs': [
 77 |                     {
 78 |                         'OutputName': 'training',
 79 |                         'S3Output': {
 80 |                             'S3Uri': f"s3://{bucket_name}/{execution_id}/input/training",
 81 |                             'LocalPath': '/opt/ml/processing/output/training',
 82 |                             'S3UploadMode': 'EndOfJob'
 83 |                         }
 84 |                     },
 85 |                     {
 86 |                         'OutputName': 'testing',
 87 |                         'S3Output': {
 88 |                             'S3Uri': f"s3://{bucket_name}/{execution_id}/input/testing",
 89 |                             'LocalPath': '/opt/ml/processing/output/testing',
 90 |                             'S3UploadMode': 'EndOfJob'
 91 |                         }
 92 |                     }
 93 |                 ]
 94 |             },
 95 |             RoleArn=role_arn
 96 |         )
 97 |         return f"{model_name}-ProcessingJob-{execution_id}"
 98 |     except ClientError as e:
 99 |         error = e.response["Error"]["Message"]
100 |         logger.error(error)
101 |         raise Exception(error)
102 | 
103 | 
104 | def handle_training(model_name=None, execution_id=None):
105 |     try:
106 |         response = sagemaker_client.create_training_job(
107 |             TrainingJobName=f"{model_name}-TrainingJob-{execution_id}",
108 |             AlgorithmSpecification={
109 |                 'TrainingImage': f"{image_uri}:latest",
110 |                 'TrainingInputMode': 'File',
111 |                 'EnableSageMakerMetricsTimeSeries': True,
112 |                 'MetricDefinitions': [
113 |                     {
114 |                         'Name': 'loss',
115 |                         'Regex': 'loss: ([0-9\\.]+)'
116 |                     },
117 |                     {
118 |                         'Name': 'mae',
119 |                         'Regex': 'mae: ([0-9\\.]+)'
120 |                     },
121 |                     {
122 |                         'Name': 'validation_loss',
123 |                         'Regex': 'val_loss: ([0-9\\.]+)'
124 |                     },
125 |                     {
126 |                         'Name': 'validation_mae',
127 |                         'Regex': 'val_mae: ([0-9\\.]+)'
128 |                     }
129 |                 ]
130 |             },
131 |             HyperParameters={
132 |                 'epochs': '200',
133 |                 'batch_size': '8'
134 |             },
135 |             InputDataConfig=[
136 |                 {
137 |                     'ChannelName': 'training',
138 |                     'ContentType': 'text/csv',
139 |                     'DataSource': {
140 |                         'S3DataSource': {
141 |                             'S3Uri': f"s3://{bucket_name}/{execution_id}/input/training",
142 |                             'S3DataType': 'S3Prefix',
143 |                             'S3DataDistributionType': 'FullyReplicated'
144 |                         }
145 |                     }
146 |                 }
147 |             ],
148 |             OutputDataConfig={
149 |                 'S3OutputPath': f"s3://{bucket_name}/{execution_id}"
150 |             },
151 |             ResourceConfig={
152 |                 'InstanceType': 'ml.m5.xlarge',
153 |                 'InstanceCount': 1,
154 |                 'VolumeSizeInGB': 30
155 |             },
156 |             RoleArn=role_arn,
157 |             StoppingCondition={
158 |                 'MaxRuntimeInSeconds': 3600
159 |             }
160 |         )
161 |         return f"{model_name}-TrainingJob-{execution_id}"
162 |     except ClientError as e:
163 |         error = e.response["Error"]["Message"]
164 |         logger.error(error)
165 |         raise Exception(error)
166 | 
167 | 
168 | def handle_evaluation(model_name=None, execution_id=None):
169 |     try:
170 |         response = sagemaker_client.create_processing_job(
171 |             ProcessingJobName=f"{model_name}-EvaluationJob-{execution_id}",
172 |             ProcessingResources={
173 |                 'ClusterConfig': {
174 |                     'InstanceCount': 1,
175 |                     'InstanceType': 'ml.m5.xlarge',
176 |                     'VolumeSizeInGB': 30
177 |                 }
178 |             },
179 |             StoppingCondition={
180 |                 'MaxRuntimeInSeconds': 3600
181 |             },
182 |             AppSpecification={
183 |                 'ImageUri': f"{image_uri}:latest",
184 |                 'ContainerEntrypoint': ["python", "app.py", "evaluate"]
185 |             },
186 |             ProcessingInputs=[
187 |                 {
188 |                     'InputName': 'data',
189 |                     'S3Input': {
190 |                         'S3Uri': f"s3://{bucket_name}/{execution_id}/input/testing",
191 |                         'LocalPath': '/opt/ml/processing/input/data',
192 |                         'S3DataType': 'S3Prefix',
193 |                         'S3InputMode': 'File',
194 |                         'S3DataDistributionType': 'FullyReplicated',
195 |                         'S3CompressionType': 'None'
196 |                     }
197 |                 },
198 |                 {
199 |                     'InputName': 'model',
200 |                     'S3Input': {
201 |                         'S3Uri': get_model_artifact(name=f"{model_name}-TrainingJob-{execution_id}"),
202 |                         'LocalPath': '/opt/ml/processing/input/model',
203 |                         'S3DataType': 'S3Prefix',
204 |                         'S3InputMode': 'File',
205 |                         'S3DataDistributionType': 'FullyReplicated',
206 |                         'S3CompressionType': 'None'
207 |                     }
208 |                 }
209 |             ],
210 |             ProcessingOutputConfig={
211 |                 'Outputs': [
212 |                     {
213 |                         'OutputName': 'evaluation',
214 |                         'S3Output': {
215 |                             'S3Uri': f"s3://{bucket_name}/{execution_id}/evaluation",
216 |                             'LocalPath': '/opt/ml/processing/output/evaluation',
217 |                             'S3UploadMode': 'EndOfJob'
218 |                         }
219 |                     }
220 |                 ]
221 |             },
222 |             RoleArn=role_arn
223 |         )
224 |         return f"{model_name}-EvaluationJob-{execution_id}"
225 |     except ClientError as e:
226 |         error = e.response["Error"]["Message"]
227 |         logger.error(error)
228 |         raise Exception(error)
229 | 
230 | 
231 | def handle_status(task=None, job_name=None):
232 |     if task == "preprocess" or task == "evaluate":
233 |         status = sagemaker_client.describe_processing_job(ProcessingJobName=job_name)["ProcessingJobStatus"]
234 |         while status == "InProgress":
235 |             time.sleep(60)
236 |             logger.info(f"Task: {task},  Status: {status}")
237 |             status = sagemaker_client.describe_processing_job(ProcessingJobName=job_name)["ProcessingJobStatus"]
238 |         return status
239 |     elif task == "train":
240 |         status = sagemaker_client.describe_training_job(TrainingJobName=job_name)["TrainingJobStatus"]
241 |         while status == "InProgress":
242 |             time.sleep(60)
243 |             logger.info(f"Task: {task}, Status: {status}")
244 |             status = sagemaker_client.describe_training_job(TrainingJobName=job_name)["TrainingJobStatus"]
245 |         return status
246 | 
247 | 
248 | if __name__ == "__main__":
249 |     task = sys.argv[1]
250 |     execution_id = get_execution_id(name=pipeline_name, task=task)
251 |     logger.info(f"Executing {task.upper()} task")
252 |     if task == "preprocess":
253 |         job_name = handle_data(model_name=model_name, execution_id=execution_id)
254 |         status = handle_status(task=task, job_name=job_name)
255 |     elif task == "train":
256 |         job_name = handle_training(model_name=model_name, execution_id=execution_id)
257 |         status = handle_status(task=task, job_name=job_name)
258 |     elif task == "evaluate":
259 |         job_name = handle_evaluation(model_name=model_name, execution_id=execution_id)
260 |         status = handle_status(task=task, job_name=job_name)
261 |     else:
262 |         error = "Invalid argument: Specify 'preprocess', 'train' or 'evaluate'"
263 |         logger.error(error)
264 |         sys.exit(255)
265 |     if status == "Completed":
266 |         logger.info(f"Task: {task}, Final Status: {status}")
267 |         sys.exit(0)
268 |     else:
269 |         error = f"Task: {task}, Failed! See CloudWatch Logs for further information"
270 |         logger.error(error)
271 |         sys.exit(255)
272 | 


--------------------------------------------------------------------------------
/Chapter04/scripts/deploy.py:
--------------------------------------------------------------------------------
 1 | import boto3
 2 | import logging
 3 | import os
 4 | import json
 5 | import sys
 6 | from botocore.exceptions import ClientError
 7 | 
 8 | logger = logging.getLogger()
 9 | logging_format = "%(levelname)s: [%(filename)s:%(lineno)s] %(message)s"
10 | logging.basicConfig(format=logging_format, level=os.environ.get("LOGLEVEL", "INFO").upper())
11 | codepipeline_client = boto3.client("codepipeline")
12 | sagemaker_client = boto3.client("sagemaker")
13 | pipeline_name = os.environ["PIPELINE_NAME"]
14 | model_name = os.environ["MODEL_NAME"]
15 | role_arn = os.environ["ROLE_ARN"]
16 | 
17 | 
18 | def get_execution_id(name=None, task=None):
19 |     try:
20 |         response = codepipeline_client.get_pipeline_state(name=name)
21 |         for stage in response["stageStates"]:
22 |             if stage["stageName"] == "Deploy":
23 |                 for action in stage["actionStates"]:
24 |                     if action["actionName"] == task:
25 |                         return stage["latestExecution"]["pipelineExecutionId"]
26 |     except ClientError as e:
27 |         error = e.response["Error"]["Message"]
28 |         logger.error(error)
29 |         raise Exception(error)
30 | 
31 | 
32 | def get_model_artifact(model_name=None, execution_id=None):
33 |     try:
34 |         response = sagemaker_client.describe_training_job(TrainingJobName=f"{model_name}-TrainingJob-{execution_id}")
35 |         return response["ModelArtifacts"]["S3ModelArtifacts"]
36 |     except ClientError as e:
37 |         error = e.response["Error"]["Message"]
38 |         logger.error(error)
39 |         raise Exception(error)
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     task = "DeploymentBuild"
44 |     execution_id = get_execution_id(name=pipeline_name, task=task)
45 |     logger.info("Creating Stack Parameters")
46 |     params = {
47 |         "ImageUri": "{}:latest".format(os.environ["IMAGE_URI"]),
48 |         "ExecutionId": execution_id,
49 |         "BucketName": os.environ["BUCKET_NAME"],
50 |         "ModelUri": get_model_artifact(model_name=model_name, execution_id=execution_id),
51 |         "ExecutionRole": os.environ["ROLE_ARN"]
52 |     }
53 |     try:
54 |         with open(os.path.join(os.environ["CODEBUILD_SRC_DIR"], "output/params.json"), "w") as f:
55 |             json.dump(params, f)
56 |         logger.info(json.dumps(params, indent=4)),
57 |         sys.exit(0)
58 |     except Exception as error:
59 |         logger.error(error)
60 |         sys.exit(255)
61 | 


--------------------------------------------------------------------------------
/Chapter05/cdk/app.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | import aws_cdk as cdk
 5 | from abalone_cicd_pipeline.abalone_endpoint_stack import EndpointStack
 6 | from abalone_cicd_pipeline.abalone_cicd_pipeline_stack import PipelineStack
 7 | 
 8 | 
 9 | MODEL = "abalone"
10 | CODECOMMIT_REPOSITORY = "abalone-cicd-pipeline"
11 | CDK_VERSION = "2.3.0"
12 | 
13 | app = cdk.App()
14 | 
15 | EndpointStack(
16 |     app,
17 |     "EndpointStack",
18 |     env=cdk.Environment(account=os.getenv("CDK_DEFAULT_ACCOUNT"), region=os.getenv("CDK_DEFAULT_REGION")),
19 |     model_name=MODEL
20 | )
21 | 
22 | PipelineStack(
23 |     app,
24 |     CODECOMMIT_REPOSITORY,
25 |     env=cdk.Environment(account=os.getenv("CDK_DEFAULT_ACCOUNT"), region=os.getenv("CDK_DEFAULT_REGION")),
26 |     model_name=MODEL,
27 |     repo_name=CODECOMMIT_REPOSITORY,
28 |     cdk_version=CDK_VERSION
29 | )
30 | 
31 | app.synth()


--------------------------------------------------------------------------------
/Chapter06/cdk/abalone_endpoint_stack.py:
--------------------------------------------------------------------------------
 1 | import aws_cdk as cdk
 2 | import aws_cdk.aws_sagemaker as sagemaker
 3 | 
 4 | class EndpointStack(cdk.Stack):
 5 |     def __init__(self, app: cdk.App, id: str, *, model_name: str=None, **kwargs) -> None:
 6 |         super().__init__(app, id, **kwargs)
 7 |         
 8 |         bucket_name = cdk.CfnParameter(
 9 |             self,
10 |             "BucketName",
11 |             type="String"
12 |         )
13 |         
14 |         execution_id = cdk.CfnParameter(
15 |             self,
16 |             "ExecutionId",
17 |             type="String"
18 |         )
19 | 
20 |         endpoint_config = sagemaker.CfnEndpointConfig(
21 |             self,
22 |             "EndpointConfig",
23 |             endpoint_config_name="{}-config-{}".format(model_name.capitalize(), execution_id.value_as_string),
24 |             production_variants=[
25 |                 sagemaker.CfnEndpointConfig.ProductionVariantProperty(
26 |                     initial_instance_count=2,
27 |                     initial_variant_weight=1.0,
28 |                     instance_type="ml.m5.large",
29 |                     model_name="{}-{}".format(model_name, execution_id.value_as_string),
30 |                     variant_name="AllTraffic"
31 |                 )
32 |             ],
33 |             data_capture_config=sagemaker.CfnEndpointConfig.DataCaptureConfigProperty(
34 |                 capture_content_type_header=sagemaker.CfnEndpointConfig.CaptureContentTypeHeaderProperty(
35 |                     csv_content_types=[
36 |                         "text/csv"
37 |                     ]
38 |                 ),
39 |                 capture_options=[
40 |                     sagemaker.CfnEndpointConfig.CaptureOptionProperty(capture_mode="Input"),
41 |                     sagemaker.CfnEndpointConfig.CaptureOptionProperty(capture_mode="Output")
42 |                 ],
43 |                 destination_s3_uri="s3://{}/endpoint-data-capture".format(bucket_name.value_as_string),
44 |                 enable_capture=True,
45 |                 initial_sampling_percentage=100.0
46 |             )
47 |         )
48 | 
49 |         endpoint = sagemaker.CfnEndpoint(
50 |             self,
51 |             "AbaloneEndpoint",
52 |             endpoint_config_name=endpoint_config.attr_endpoint_config_name,
53 |             endpoint_name="{}-Endpoint".format(model_name.capitalize())
54 |         )
55 |         endpoint.add_depends_on(endpoint_config)


--------------------------------------------------------------------------------
/Chapter06/cdk/app.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | import aws_cdk as cdk
 5 | from abalone_cicd_pipeline.abalone_endpoint_stack import EndpointStack
 6 | from abalone_cicd_pipeline.abalone_cicd_pipeline_stack import PipelineStack
 7 | 
 8 | MODEL = "abalone"
 9 | CODECOMMIT_REPOSITORY = "abalone-cicd-pipeline"
10 | CDK_VERSION = "2.3.0"
11 | 
12 | app = cdk.App()
13 | 
14 | EndpointStack(
15 |     app,
16 |     "EndpointStack",
17 |     env=cdk.Environment(account=os.getenv("CDK_DEFAULT_ACCOUNT"), region=os.getenv("CDK_DEFAULT_REGION")),
18 |     model_name=MODEL
19 | )
20 | 
21 | PipelineStack(
22 |     app,
23 |     CODECOMMIT_REPOSITORY,
24 |     env=cdk.Environment(account=os.getenv("CDK_DEFAULT_ACCOUNT"), region=os.getenv("CDK_DEFAULT_REGION")),
25 |     model_name=MODEL,
26 |     repo_name=CODECOMMIT_REPOSITORY,
27 |     cdk_version=CDK_VERSION
28 | )
29 | 
30 | app.synth()


--------------------------------------------------------------------------------
/Chapter06/scripts/deploy.py:
--------------------------------------------------------------------------------
 1 | import boto3
 2 | import logging
 3 | import os
 4 | import json
 5 | import sys
 6 | from botocore.exceptions import ClientError
 7 | 
 8 | logger = logging.getLogger()
 9 | logging_format = "%(levelname)s: [%(filename)s:%(lineno)s] %(message)s"
10 | logging.basicConfig(format=logging_format, level=os.environ.get("LOGLEVEL", "INFO").upper())
11 | codepipeline_client = boto3.client("codepipeline")
12 | sagemaker_client = boto3.client("sagemaker")
13 | pipeline_name = os.environ["PIPELINE_NAME"]
14 | model_name = os.environ["MODEL_NAME"]
15 | 
16 | 
17 | def get_execution_id(name=None, task=None):
18 |     try:
19 |         response = codepipeline_client.get_pipeline_state(name=name)
20 |         for stage in response["stageStates"]:
21 |             if stage["stageName"] == "Deploy":
22 |                 for action in stage["actionStates"]:
23 |                     if action["actionName"] == task:
24 |                         return stage["latestExecution"]["pipelineExecutionId"]
25 |     except ClientError as e:
26 |         error = e.response["Error"]["Message"]
27 |         logger.error(error)
28 |         raise Exception(error)
29 | 
30 | 
31 | if __name__ == "__main__":
32 |     task = "DeploymentBuild"
33 |     execution_id = get_execution_id(name=pipeline_name, task=task)
34 |     logger.info("Creating Stack Parameters")
35 |     params = {
36 |         "ExecutionId": execution_id,
37 |         "BucketName": os.environ["BUCKET_NAME"]
38 |     }
39 |     try:
40 |         with open(os.path.join(os.environ["CODEBUILD_SRC_DIR"], "output/params.json"), "w") as f:
41 |             json.dump(params, f)
42 |         logger.info(json.dumps(params, indent=4)),
43 |         sys.exit(0)
44 |     except Exception as error:
45 |         logger.error(error)
46 |         sys.exit(255)


--------------------------------------------------------------------------------
/Chapter07/Files/buildspec.yml:
--------------------------------------------------------------------------------
 1 | version: 0.2
 2 | env:
 3 |   variables:
 4 |     DATA_PREFIX: abalone_data
 5 |     EPOCHS: 200
 6 |     BATCH_SIZE: 8
 7 |     THRESHOLD: 2.1
 8 | 
 9 | phases:
10 |   install:
11 |     runtime-versions:
12 |       python: 3.8
13 |     commands:
14 |       - printenv
15 |       - echo "Updating Build Environment"
16 |       - apt-get update
17 |       - python -m pip install --upgrade pip
18 |       - python -m pip install --upgrade boto3 awscli sagemaker==2.49.1 stepfunctions==2.2.0
19 |   build:
20 |     commands:
21 |       - echo Build started on `date`
22 |       - echo "Creating ML Workflow "
23 |       - |
24 |         sh -c """
25 |         cd workflow/
26 |         python main.py
27 |         """
28 |   post_build:
29 |     commands:
30 |       - echo "Build Completed"


--------------------------------------------------------------------------------
/Chapter08/airflow/dags/.airflowignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Automated-Machine-Learning-on-AWS/fa106b966823211a4588286656b6cf01cc6c14b2/Chapter08/airflow/dags/.airflowignore


--------------------------------------------------------------------------------
/Chapter08/airflow/rerquirements.txt:
--------------------------------------------------------------------------------
1 | sagemaker==2.49.1
2 | protobuf==3.19.0
3 | s3fs==0.5.1
4 | boto3>=1.17.4
5 | 


--------------------------------------------------------------------------------
/Chapter08/airflow/scripts/evaluate.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import tarfile
 4 | import pandas as pd
 5 | import tensorflow as tf
 6 | from sklearn import preprocessing
 7 | def load_model(model_path):
 8 |     model = tf.keras.models.load_model(os.path.join(model_path, "model.h5"))
 9 |     model.compile(optimizer="adam", loss="mse")
10 |     return model
11 | 
12 | def evaluate_model(prefix, model):
13 |     column_names = ["rings", "sex", "length", "diameter", "height", "whole_weight", "shucked_weight", "viscera_weight", "shell_weight"]
14 |     input_path = os.path.join(prefix, "processing/testing")
15 |     output_path = os.path.join(prefix, "processing/evaluation")
16 |     predictions = []
17 |     truths = []
18 |     test_df = pd.read_csv(os.path.join(input_path, "testing.csv"), names=column_names)
19 |     y = test_df["rings"].to_numpy()
20 |     X = test_df.drop(["rings"], axis=1).to_numpy()
21 |     X = preprocessing.normalize(X)
22 |     for row in range(len(X)):
23 |         payload = [X[row].tolist()]
24 |         result = model.predict(payload)
25 |         print(result[0][0])
26 |         predictions.append(float(result[0][0]))
27 |         truths.append(float(y[row]))
28 |     report = {
29 |             "GroundTruth": truths,
30 |             "Predictions": predictions
31 |     }
32 |     with open(os.path.join(output_path, "evaluation.json"), "w") as f:
33 |         f.write(json.dumps(report))
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     print("Extracting model archive")
38 |     prefix = "/opt/ml"
39 |     model_path = os.path.join(prefix, "model")
40 |     tarfile_path = os.path.join(prefix, "processing/model/model.tar.gz")
41 |     with tarfile.open(tarfile_path) as tar:
42 |         tar.extractall(path=model_path)
43 |     print("Loading Trained Model")
44 |     model = load_model(model_path)
45 |     print("Evaluating Trained Model")
46 |     evaluate_model(prefix, model)
47 |     print("Done!")


--------------------------------------------------------------------------------
/Chapter08/cdk/abalone_data_pipeline_stack.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import aws_cdk.aws_codecommit as codecommit
  3 | import aws_cdk.aws_codebuild as codebuild
  4 | import aws_cdk as cdk
  5 | import aws_cdk.aws_s3 as s3
  6 | import aws_cdk.aws_ssm as ssm
  7 | import aws_cdk.aws_s3_deployment as s3_deployment
  8 | import aws_cdk.aws_iam as iam
  9 | import aws_cdk.aws_glue as glue
 10 | import aws_cdk.aws_lambda as lambda_
 11 | import aws_cdk.aws_events_targets as targets
 12 | from constructs import Construct
 13 | 
 14 | class DataPipelineStack(cdk.Stack):
 15 |     def __init__(self, scope: Construct, id: str, *, airflow_environment_name: str=None, model_name: str=None, repo_name: str=None, **kwargs) -> None:
 16 |         super().__init__(scope, id, **kwargs)
 17 |         
 18 |         code_repo = codecommit.Repository.from_repository_name(
 19 |             self,
 20 |             "SourceRepository",
 21 |             repository_name=repo_name
 22 |         )
 23 | 
 24 |         data_bucket = s3.Bucket(
 25 |             self,
 26 |             "AirflowDataBucket",
 27 |             bucket_name=f"{model_name}-data-{cdk.Aws.REGION}-{cdk.Aws.ACCOUNT_ID}",
 28 |             block_public_access=s3.BlockPublicAccess.BLOCK_ALL,
 29 |             auto_delete_objects=True,
 30 |             removal_policy=cdk.RemovalPolicy.DESTROY,
 31 |             versioned=True
 32 |         )
 33 |         ssm.StringParameter(
 34 |             self,
 35 |             "DataBucketParameter",
 36 |             description="Airflow Data Bucket Name",
 37 |             parameter_name="AirflowDataBucket",
 38 |             string_value=data_bucket.bucket_name
 39 |         )
 40 | 
 41 |         sagemaker_role = iam.Role(
 42 |             self,
 43 |             "SageMakerBuildRole",
 44 |             assumed_by=iam.CompositePrincipal(
 45 |                 iam.ServicePrincipal("sagemaker.amazonaws.com")
 46 |             ),
 47 |             managed_policies=[
 48 |                 iam.ManagedPolicy.from_aws_managed_policy_name("AmazonSageMakerFullAccess")
 49 |             ]
 50 |         )
 51 |         data_bucket.grant_read_write(sagemaker_role)
 52 |         ssm.StringParameter(
 53 |             self,
 54 |             "SageMakerRoleParameter",
 55 |             description="SageMaker Role ARN",
 56 |             parameter_name="SageMakerRoleARN",
 57 |             string_value=sagemaker_role.role_arn
 58 |         )
 59 | 
 60 |         analyze_results_lambda = lambda_.Function(
 61 |             self,
 62 |             "AnalyzeResults",
 63 |             handler="index.lambda_handler",
 64 |             runtime=lambda_.Runtime.PYTHON_3_8,
 65 |             code=lambda_.Code.from_asset(os.path.join(os.path.dirname(__file__), "../artifacts/lambda/analyze_results")),
 66 |             memory_size=128,
 67 |             timeout=cdk.Duration.seconds(60)
 68 |         )
 69 |         data_bucket.grant_read(analyze_results_lambda)
 70 |         ssm.StringParameter(
 71 |             self,
 72 |             "AnalyzeResultsParameter",
 73 |             description="Analyze Results Lambda Function Name",
 74 |             parameter_name="AnalyzeResultsLambda",
 75 |             string_value=analyze_results_lambda.function_name
 76 |         )
 77 | 
 78 |         glue_role = iam.Role(
 79 |             self,
 80 |             "GlueRole",
 81 |             assumed_by=iam.CompositePrincipal(
 82 |                 iam.ServicePrincipal("glue.amazonaws.com")
 83 |             ),
 84 |             managed_policies=[
 85 |                 iam.ManagedPolicy.from_aws_managed_policy_name("service-role/AWSGlueServiceRole")
 86 |             ]
 87 |         )
 88 |         data_bucket.grant_read_write(glue_role)
 89 | 
 90 |         glue_catalog = glue.CfnDatabase(
 91 |             self,
 92 |             "GlueDatabase",
 93 |             catalog_id=cdk.Aws.ACCOUNT_ID,
 94 |             database_input=glue.CfnDatabase.DatabaseInputProperty(
 95 |                 name=f"{model_name}_new"
 96 |             )
 97 |         )
 98 | 
 99 |         glue_crawler = glue.CfnCrawler(
100 |             self,
101 |             "GlueCrawler",
102 |             name=f"{model_name}-crawler",
103 |             role=glue_role.role_arn,
104 |             database_name=glue_catalog.ref,
105 |             targets={
106 |                 "s3Targets": [
107 |                     {
108 |                         "path": f"s3://{data_bucket.bucket_name}/{model_name}_data/new/"
109 |                     }
110 |                 ]
111 |             }
112 |         )
113 |         ssm.StringParameter(
114 |             self,
115 |             "GlueCrawlerParameter",
116 |             description="Glue Crawler Name",
117 |             parameter_name="GlueCrawler",
118 |             string_value=glue_crawler.name
119 |         )
120 | 
121 |         glue_job = glue.CfnJob(
122 |             self,
123 |             "GlueETLJob",
124 |             name=f"{model_name}-etl-job",
125 |             description="AWS Glue ETL Job to merge new + raw data, and process training data",
126 |             role=glue_role.role_arn,
127 |             glue_version="2.0",
128 |             execution_property=glue.CfnJob.ExecutionPropertyProperty(
129 |                 max_concurrent_runs=1
130 |             ),
131 |             command=glue.CfnJob.JobCommandProperty(
132 |                 name="glueetl",
133 |                 python_version="3",
134 |                 script_location=f"s3://{data_bucket.bucket_name}/airflow/scripts/preprocess.py"
135 |             ),
136 |             default_arguments={
137 |                 "--job-language": "python",
138 |                 "--GLUE_CATALOG": glue_catalog.ref,
139 |                 "--S3_BUCKET": data_bucket.bucket_name,
140 |                 "--S3_INPUT_KEY_PREFIX": f"{model_name}_data/raw/abalone.data",
141 |                 "--S3_OUTPUT_KEY_PREFIX": f"{model_name}_data",
142 |                 "--TempDir": f"s3://{data_bucket.bucket_name}/glue-temp"
143 |             },
144 |             allocated_capacity=5,
145 |             timeout=10
146 |         )
147 |         ssm.StringParameter(
148 |             self,
149 |             "GlueJobParameter",
150 |             description="Glue Job Name",
151 |             parameter_name="GlueJob",
152 |             string_value=glue_job.name
153 |         )
154 | 
155 |         s3_deployment.BucketDeployment(
156 |             self,
157 |             "DeployData",
158 |             sources=[
159 |                 s3_deployment.Source.asset(os.path.join(os.path.dirname(__file__), "../artifacts/data"))
160 |             ],
161 |             destination_bucket=data_bucket,
162 |             destination_key_prefix=f"{model_name}_data/raw",
163 |             retain_on_delete=False
164 |         )
165 | 
166 |         code_deployment = codebuild.Project(
167 |             self,
168 |             "CodeDeploymentProject",
169 |             project_name="CodeDeploymentProject",
170 |             description="CodeBuild Project to Copy Airflow Artifacts to S3",
171 |             source=codebuild.Source.code_commit(
172 |                 repository=code_repo
173 |             ),
174 |             environment=codebuild.BuildEnvironment(
175 |                 build_image=codebuild.LinuxBuildImage.STANDARD_5_0
176 |             ),
177 |             environment_variables={
178 |                 "DATA_BUCKET": codebuild.BuildEnvironmentVariable(
179 |                     value=data_bucket.bucket_name
180 |                 )
181 |             },
182 |             build_spec=codebuild.BuildSpec.from_object(
183 |                 {
184 |                     "version": "0.2",
185 |                     "phases": {
186 |                         "install": {
187 |                             "runtime-versions": {
188 |                                 "python": 3.8
189 |                             },
190 |                             "commands": [
191 |                                 "printenv",
192 |                                 "echo 'Updating Build Environment'",
193 |                                 "python -m pip install --upgrade pip",
194 |                                 "python -m pip install --upgrade boto3 awscli"
195 |                             ]
196 |                         },
197 |                         "build": {
198 |                             "commands": [
199 |                                 "echo 'Deploying Airflow Artifacts to S3'",
200 |                                 "cd artifacts",
201 |                                 "aws s3 sync airflow s3://${DATA_BUCKET}/airflow"
202 |                             ]
203 |                         },
204 |                         "post_build": {
205 |                             "commands": [
206 |                                 "echo 'Airflow Artifacts Deployment Complete'"
207 |                             ]
208 |                         }
209 |                     }
210 |                 }
211 |             )
212 |         )
213 |         data_bucket.grant_read_write(code_deployment.role)
214 | 
215 |         code_repo.on_commit(
216 |             "StartDeploymentProject",
217 |             target=targets.CodeBuildProject(code_deployment)
218 |         )


--------------------------------------------------------------------------------
/Chapter08/cdk/app.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import os
 3 | import aws_cdk as cdk
 4 | from abalone_data_pipeline.abalone_data_pipeline_stack import DataPipelineStack
 5 | 
 6 | MODEL = "abalone"
 7 | CODECOMMIT_REPOSITORY = "abalone-data-pipeline"
 8 | 
 9 | app = cdk.App()
10 | 
11 | DataPipelineStack(
12 |     app,
13 |     CODECOMMIT_REPOSITORY,
14 |     env=cdk.Environment(account=os.getenv("CDK_DEFAULT_ACCOUNT"), region=os.getenv("CDK_DEFAULT_REGION")),
15 |     model_name=MODEL,
16 |     repo_name=CODECOMMIT_REPOSITORY,
17 |     airflow_environment_name=f"{MODEL}-airflow-environment"
18 | )
19 | 
20 | app.synth()


--------------------------------------------------------------------------------
/Chapter08/lambda/analyze_results/index.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import logging
 4 | import boto3
 5 | import math
 6 | 
 7 | logger = logging.getLogger()
 8 | logger.setLevel(logging.INFO)
 9 | 
10 | def lambda_handler(event, context):
11 |     logger.debug("## Environment Variables ##")
12 |     logger.debug(os.environ)
13 |     logger.debug("## Event ##")
14 |     logger.debug(event)
15 |     s3 = boto3.client("s3")
16 |     if ("Bucket" in event):
17 |         bucket = event["Bucket"]
18 |     else:
19 |         raise KeyError("S3 'Bucket' not found in Lambda event!")
20 |     if ("Key" in event):
21 |         key = event["Key"]
22 |     else:
23 |         raise KeyError("S3 'Key' not found in Lambda event!")
24 |     logger.info("Downloading evlauation results file ...")
25 |     json_file = json.loads(s3.get_object(Bucket = bucket, Key = key)['Body'].read())
26 |     logger.info("Analyzing Model Evaluation Results ...")
27 |     y = json_file["GroundTruth"]
28 |     y_hat = json_file["Predictions"]
29 |     summation = 0
30 |     for i in range (0, len(y)):
31 |         squared_diff = (y[i] - y_hat[i])**2
32 |         summation += squared_diff
33 |     rmse = math.sqrt(summation/len(y))
34 |     logger.info("Root Mean Square Error: {}".format(rmse))
35 |     logger.info("Done!")
36 |     return {
37 |         "statusCode": 200,
38 |         "Result": rmse,
39 |     }


--------------------------------------------------------------------------------
/Chapter09/Files/airflow/dags/.airflowignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Automated-Machine-Learning-on-AWS/fa106b966823211a4588286656b6cf01cc6c14b2/Chapter09/Files/airflow/dags/.airflowignore


--------------------------------------------------------------------------------
/Chapter09/Files/airflow/dags/abalone_data_pipeline.py:
--------------------------------------------------------------------------------
  1 | import boto3
  2 | import json
  3 | from datetime import timedelta
  4 | 
  5 | import sagemaker
  6 | from sagemaker.tensorflow import TensorFlow
  7 | from sagemaker.tensorflow.serving import Model
  8 | from sagemaker.processing import ProcessingInput, ProcessingOutput, Processor
  9 | from sagemaker.model_monitor import DataCaptureConfig
 10 | 
 11 | import airflow
 12 | from airflow import DAG
 13 | from airflow.operators.python_operator import PythonOperator
 14 | from airflow.providers.amazon.aws.operators.glue import AwsGlueJobOperator
 15 | from airflow.providers.amazon.aws.operators.glue_crawler import AwsGlueCrawlerOperator
 16 | from airflow.providers.amazon.aws.hooks.lambda_function import AwsLambdaHook
 17 | from airflow.operators.python_operator import BranchPythonOperator
 18 | from airflow.operators.dummy import DummyOperator
 19 | 
 20 | sagemaker_seesion = sagemaker.Session()
 21 | region_name = sagemaker_seesion.boto_region_name
 22 | model_name = "abalone"
 23 | data_prefix = "abalone_data"
 24 | data_bucket = f"""{boto3.client("ssm", region_name=region_name).get_parameter(Name="AirflowDataBucket")["Parameter"]["Value"]}"""
 25 | glue_job_name = f"""{boto3.client("ssm", region_name=region_name).get_parameter(Name="GlueJob")["Parameter"]["Value"]}"""
 26 | crawler_name = f"""{boto3.client("ssm", region_name=region_name).get_parameter(Name="GlueCrawler")["Parameter"]["Value"]}"""
 27 | sagemaker_role = f"""{boto3.client("ssm", region_name=region_name).get_parameter(Name="SageMakerRoleARN")["Parameter"]["Value"]}"""
 28 | lambda_function = f"""{boto3.client("ssm", region_name=region_name).get_parameter(Name="AnalyzeResultsLambda")["Parameter"]["Value"]}"""
 29 | container_image = f"763104351884.dkr.ecr.{region_name}.amazonaws.com/tensorflow-training:2.5.0-cpu-py37-ubuntu18.04-v1.0"
 30 | training_input = f"s3://{data_bucket}/{data_prefix}/training"
 31 | testing_input = f"s3://{data_bucket}/{data_prefix}/testing"
 32 | data_capture = f"s3://{data_bucket}/endpoint-data-capture"
 33 | default_args = {
 34 |     "owner": "airflow",
 35 |     "depends_on_past": False,
 36 |     "start_date": airflow.utils.dates.days_ago(1),
 37 |     "retries": 0,
 38 |     "retry_delay": timedelta(minutes=2)
 39 | }
 40 | 
 41 | 
 42 | def training(data, **kwargs):
 43 |     estimator = TensorFlow(
 44 |         base_job_name=model_name,
 45 |         entry_point="/usr/local/airflow/dags/model/model_training.py",
 46 |         role=sagemaker_role,
 47 |         framework_version="2.4",
 48 |         py_version="py37",
 49 |         hyperparameters={"epochs": 200, "batch-size": 8},
 50 |         script_mode=True,
 51 |         instance_count=1,
 52 |         instance_type="ml.m5.xlarge",
 53 |     )
 54 |     estimator.fit(data)
 55 |     kwargs["ti"].xcom_push(
 56 |         key="TrainingJobName",
 57 |         value=str(estimator.latest_training_job.name)
 58 |     )
 59 | 
 60 | 
 61 | def evaluation(ds, **kwargs):
 62 |     training_job_name = kwargs["ti"].xcom_pull(key="TrainingJobName")
 63 |     estimator = TensorFlow.attach(training_job_name)
 64 |     model_data = estimator.model_data,
 65 |     processor = Processor(
 66 |         base_job_name=f"{model_name}-evaluation",
 67 |         image_uri=container_image,
 68 |         entrypoint=[
 69 |             "python3",
 70 |             "/opt/ml/processing/input/code/evaluate.py"
 71 |         ],
 72 |         instance_count=1,
 73 |         instance_type="ml.m5.xlarge",
 74 |         role=sagemaker_role,
 75 |         max_runtime_in_seconds=1200
 76 |     )
 77 |     processor.run(
 78 |         inputs=[
 79 |             ProcessingInput(
 80 |                 source=testing_input,
 81 |                 destination="/opt/ml/processing/testing",
 82 |                 input_name="input"
 83 |             ),
 84 |             ProcessingInput(
 85 |                 source=model_data[0],
 86 |                 destination="/opt/ml/processing/model",
 87 |                 input_name="model"
 88 |             ),
 89 |             ProcessingInput(
 90 |                 source="s3://{}/airflow/scripts/evaluate.py".format(data_bucket),
 91 |                 destination="/opt/ml/processing/input/code",
 92 |                 input_name="code"
 93 |             )
 94 |         ],
 95 |         outputs=[
 96 |             ProcessingOutput(
 97 |                 source="/opt/ml/processing/evaluation",
 98 |                 destination="s3://{}/{}/evaluation".format(data_bucket, data_prefix),
 99 |                 output_name="evaluation"
100 |             )
101 |         ]
102 |     )
103 | 
104 | 
105 | def deploy_model(ds, **kwargs):
106 |     training_job_name = kwargs["ti"].xcom_pull(key="TrainingJobName")
107 |     estimator = TensorFlow.attach(training_job_name)
108 |     model = Model(
109 |         model_data=estimator.model_data,
110 |         role=sagemaker_role,
111 |         framework_version="2.4",
112 |         sagemaker_session=sagemaker.Session()
113 |     )
114 |     model.deploy(
115 |         initial_instance_count=2,
116 |         instance_type="ml.m5.large",
117 |         data_capture_config=DataCaptureConfig(
118 |             enable_capture=True,
119 |             sampling_percentage=100,
120 |             destination_s3_uri=data_capture
121 |         )
122 |     )
123 | 
124 | 
125 | def get_results(ds, **kwargs):
126 |     hook = AwsLambdaHook(
127 |         function_name=lambda_function,
128 |         aws_conn_id="aws_default",
129 |         invocation_type="RequestResponse",
130 |         log_type="None",
131 |         qualifier="$LATEST",
132 |         config=None
133 |     )
134 |     request = hook.invoke_lambda(
135 |         payload=json.dumps(
136 |             {
137 |                 "Bucket": data_bucket,
138 |                 "Key": f"{data_prefix}/evaluation/evaluation.json"
139 |             }
140 |         )
141 |     )
142 |     response = json.loads(request["Payload"].read().decode())
143 |     kwargs["ti"].xcom_push(
144 |         key="Results",
145 |         value=response["Result"]
146 |     )
147 | 
148 | 
149 | def branch(ds, **kwargs):
150 |     result = kwargs["ti"].xcom_pull(key="Results")
151 |     if result > 3.1:
152 |         return "rejected"
153 |     else:
154 |         return "approved"
155 | 
156 | 
157 | with DAG(
158 |     dag_id=f"{model_name}-data-workflow",
159 |     default_args=default_args,
160 |     schedule_interval="@daily",
161 |     concurrency=1,
162 |     max_active_runs=1,
163 | ) as dag:
164 |     
165 |     crawler_task = AwsGlueCrawlerOperator(
166 |         task_id="crawl_data",
167 |         config={"Name": crawler_name}
168 |     )
169 | 
170 |     etl_task = AwsGlueJobOperator(
171 |         task_id="preprocess_data",
172 |         job_name=glue_job_name
173 |     )
174 | 
175 |     training_task = PythonOperator(
176 |         task_id="training",
177 |         python_callable=training,
178 |         op_args=[training_input],
179 |         provide_context=True,
180 |         dag=dag
181 |     )
182 | 
183 |     evaluation_task = PythonOperator(
184 |         task_id="evaluate_model",
185 |         python_callable=evaluation,
186 |         provide_context=True,
187 |         dag=dag
188 |     )
189 | 
190 |     analyze_results_task = PythonOperator(
191 |         task_id="analyze_results",
192 |         python_callable=get_results,
193 |         provide_context=True,
194 |         dag=dag
195 |     )
196 | 
197 |     check_threshold_task = BranchPythonOperator(
198 |         task_id="check_threshold",
199 |         python_callable=branch,
200 |         provide_context=True,
201 |         dag=dag
202 |     )
203 | 
204 |     deployment_task = PythonOperator(
205 |         task_id="deploy_model",
206 |         python_callable=deploy_model,
207 |         provide_context=True,
208 |         dag=dag
209 |     )
210 | 
211 |     start_task = DummyOperator(
212 |         task_id="start",
213 |         dag=dag
214 |     )
215 | 
216 |     end_task = DummyOperator(
217 |         task_id="end",
218 |         dag=dag
219 |     )
220 | 
221 |     rejected_task = DummyOperator(
222 |         task_id="rejected",
223 |         dag=dag
224 |     )
225 | 
226 |     approved_task = DummyOperator(
227 |         task_id="approved",
228 |         dag=dag
229 |     )
230 | 
231 |     start_task >> crawler_task >> etl_task >> training_task >> evaluation_task >> analyze_results_task >> check_threshold_task >> [rejected_task, approved_task]
232 |     approved_task >> deployment_task >> end_task
233 |     rejected_task >> end_task


--------------------------------------------------------------------------------
/Chapter09/Files/airflow/dags/model/model_training.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import numpy as np
 4 | import pandas as pd
 5 | 
 6 | import tensorflow as tf
 7 | from tensorflow import keras
 8 | from tensorflow.keras.models import Sequential
 9 | from tensorflow.keras.layers import Dense
10 | from tensorflow.keras.optimizers import Adam
11 | from sklearn import preprocessing
12 | 
13 | tf.get_logger().setLevel("ERROR")
14 | 
15 | if __name__ == "__main__":
16 |     print(f"Tensorflow Version: {tf.__version__}")
17 |     column_names = ["rings", "sex", "length", "diameter", "height", "whole_weight", "shucked_weight", "viscera_weight", "shell_weight"]
18 |     parser = argparse.ArgumentParser()
19 |     parser.add_argument("--epochs", type=int, default=2)
20 |     parser.add_argument("--batch-size", type=int, default=8)
21 |     parser.add_argument("--model-dir", type=str, default=os.environ["SM_MODEL_DIR"])
22 |     parser.add_argument("--training", type=str, default=os.environ["SM_CHANNEL_TRAINING"])
23 |     args, _ = parser.parse_known_args()
24 |     epochs = args.epochs
25 |     batch_size = args.batch_size
26 |     training_path = args.training
27 |     model_path = args.model_dir
28 |     train_data = pd.read_csv(os.path.join(training_path, "training.csv"), sep=",", names=column_names)
29 |     val_data = pd.read_csv(os.path.join(training_path, "validation.csv"), sep=",", names=column_names)
30 |     train_y = train_data["rings"].to_numpy()
31 |     train_X = train_data.drop(["rings"], axis=1).to_numpy()
32 |     val_y = val_data["rings"].to_numpy()
33 |     val_X = val_data.drop(["rings"], axis=1).to_numpy()
34 |     train_X = preprocessing.normalize(train_X)
35 |     val_X = preprocessing.normalize(val_X)
36 |     network_layers = [
37 |         Dense(64, activation="relu", kernel_initializer="normal", input_dim=8),
38 |         Dense(64, activation="relu"),
39 |         Dense(1, activation="linear")
40 |     ]
41 |     model = Sequential(network_layers)
42 |     model.compile(optimizer="adam", loss="mse", metrics=["mae", "accuracy"])
43 |     model.summary()
44 |     model.fit(
45 |         train_X,
46 |         train_y,
47 |         validation_data=(val_X, val_y),
48 |         batch_size=batch_size,
49 |         epochs=epochs,
50 |         shuffle=True,
51 |         verbose=1
52 |     )
53 |     
54 |     model.save(os.path.join(model_path, "model.h5"))
55 |     model_version = 1
56 |     export_path = os.path.join(model_path, str(model_version))
57 |     tf.keras.models.save_model(
58 |         model,
59 |         export_path,
60 |         overwrite=True,
61 |         include_optimizer=True,
62 |         save_format=None,
63 |         signatures=None,
64 |         options=None
65 |     )


--------------------------------------------------------------------------------
/Chapter09/Files/airflow/scripts/preprocess.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import boto3
 4 | import pyspark
 5 | import pandas as pd
 6 | from functools import reduce
 7 | from pyspark.sql import SparkSession, DataFrame
 8 | from pyspark.ml import Pipeline
 9 | from pyspark.sql.types import StructField, StructType, StringType, DoubleType
10 | from pyspark.ml.feature import StringIndexer, VectorIndexer, OneHotEncoder, VectorAssembler
11 | from pyspark.sql.functions import *
12 | from awsglue.job import Job
13 | from awsglue.transforms import *
14 | from awsglue.context import GlueContext
15 | from pyspark.context import SparkContext
16 | from awsglue.utils import getResolvedOptions
17 | from awsglue.dynamicframe import DynamicFrame
18 | from awsglue.utils import getResolvedOptions
19 | 
20 | def csv_line(data):
21 |     r = ','.join(str(d) for d in data[1])
22 |     return str(data[0]) + "," + r
23 |     
24 | def toS3(df, path):
25 |     rdd = df.rdd.map(lambda x: (x.rings, x.features))
26 |     rdd_lines = rdd.map(csv_line)
27 |     spark_df = rdd_lines.map(lambda x: str(x)).map(lambda s: s.split(",")).toDF()
28 |     pd_df = spark_df.toPandas()
29 |     pd_df = pd_df.drop(columns=["_3"])
30 |     pd_df.to_csv(f"s3://{path}", header=False, index=False)
31 | 
32 | def main():
33 |     glueContext = GlueContext(SparkContext.getOrCreate())
34 |     spark = SparkSession.builder.appName("PySparkAbalone").getOrCreate()
35 |     spark.sparkContext._jsc.hadoopConfiguration().set("mapred.output.committer.class", "org.apache.hadoop.mapred.FileOutputCommitter")
36 |     args = getResolvedOptions(sys.argv, ["GLUE_CATALOG", "S3_BUCKET", "S3_INPUT_KEY_PREFIX", "S3_OUTPUT_KEY_PREFIX"])
37 |     schema = StructType(
38 |         [
39 |             StructField("sex", StringType(), True),
40 |             StructField("length", DoubleType(), True),
41 |             StructField("diameter", DoubleType(), True),
42 |             StructField("height", DoubleType(), True),
43 |             StructField("whole_weight", DoubleType(), True),
44 |             StructField("shucked_weight", DoubleType(), True),
45 |             StructField("viscera_weight", DoubleType(), True),
46 |             StructField("shell_weight", DoubleType(), True),
47 |             StructField("rings", DoubleType(), True)
48 |         ]
49 |     )
50 |     columns = ["sex", "length", "diameter", "height", "whole_weight", "shucked_weight", "viscera_weight", "shell_weight", "rings"]
51 |     new = glueContext.create_dynamic_frame_from_catalog(database=args["GLUE_CATALOG"], table_name="new", transformation_ctx="new")
52 |     new_df = new.toDF()
53 |     new_df = new_df.toDF(*columns)
54 |     raw_df = spark.read.csv(("s3://{}".format(os.path.join(args["S3_BUCKET"], args["S3_INPUT_KEY_PREFIX"]))), header=False, schema=schema)
55 |     merged_df = reduce(DataFrame.unionAll, [raw_df, new_df])
56 |     distinct_df = merged_df.distinct()
57 |     sex_indexer = StringIndexer(inputCol="sex", outputCol="indexed_sex")
58 |     sex_encoder = OneHotEncoder(inputCol="indexed_sex", outputCol="sex_vec")
59 |     assembler = VectorAssembler(
60 |         inputCols=[
61 |             "sex_vec",
62 |             "length",
63 |             "diameter",
64 |             "height",
65 |             "whole_weight",
66 |             "shucked_weight",
67 |             "viscera_weight",
68 |             "shell_weight"
69 |         ],
70 |         outputCol="features"
71 |     )
72 |     pipeline = Pipeline(stages=[sex_indexer, sex_encoder, assembler])
73 |     model = pipeline.fit(distinct_df)
74 |     transformed_df = model.transform(merged_df)
75 |     (train_df, validation_df, test_df) = transformed_df.randomSplit([0.8, 0.15, 0.05])
76 |     toS3(train_df, os.path.join(args["S3_BUCKET"], args["S3_OUTPUT_KEY_PREFIX"], "training/training.csv"))
77 |     toS3(validation_df, os.path.join(args["S3_BUCKET"], args["S3_OUTPUT_KEY_PREFIX"], "training/validation.csv"))
78 |     toS3(test_df, os.path.join(args["S3_BUCKET"], args["S3_OUTPUT_KEY_PREFIX"], "testing/testing.csv"))
79 | 
80 | if __name__ == "__main__":
81 |     main()


--------------------------------------------------------------------------------
/Chapter09/Notebook/Simulating New Abalone Survey Data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Creating new `abalone` data using `CTGAN`\n",
  8 |     ">__NOTE:__ Recommend using the _Python 3 (Data Science)_ kernel, using an _ml.m5.4xlarge (16vCPU + 64MB)_ Instance Type. However, this will incur additional AWS usage costs."
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "metadata": {},
 14 |    "source": [
 15 |     "## Install `ctgan`"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": null,
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "%%capture\n",
 25 |     "!pip install ctgan"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "## Load the Required Libraries"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": null,
 38 |    "metadata": {},
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "import io\n",
 42 |     "import boto3\n",
 43 |     "import warnings\n",
 44 |     "import pandas as pd\n",
 45 |     "from time import gmtime, strftime\n",
 46 |     "\n",
 47 |     "warnings.filterwarnings(\"ignore\")\n",
 48 |     "s3 = boto3.client(\"s3\")\n",
 49 |     "model_name = \"abalone\"\n",
 50 |     "column_names = [\n",
 51 |     "    \"sex\",\n",
 52 |     "    \"length\",\n",
 53 |     "    \"diameter\",\n",
 54 |     "    \"height\",\n",
 55 |     "    \"whole_weight\",\n",
 56 |     "    \"shucked_weight\",\n",
 57 |     "    \"viscera_weight\",\n",
 58 |     "    \"shell_weight\",\n",
 59 |     "    \"rings\"\n",
 60 |     "]"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {},
 66 |    "source": [
 67 |     "## Load the \"raw\" data"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {},
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "data_bucket = f\"\"\"{boto3.client(\"ssm\").get_parameter(Name=\"AirflowDataBucket\")[\"Parameter\"][\"Value\"]}\"\"\"\n",
 77 |     "raw_data_key = f\"{model_name}_data/raw/abalone.data\"\n",
 78 |     "new_data_key = f\"{model_name}_data/new/abalone.{strftime('%Y%m%d%H%M%S', gmtime())}\"\n",
 79 |     "s3_object = s3.get_object(Bucket=data_bucket, Key=raw_data_key)\n",
 80 |     "raw_df = pd.read_csv(io.BytesIO(s3_object[\"Body\"].read()), encoding=\"utf8\", names=column_names)"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "metadata": {},
 86 |    "source": [
 87 |     "## Fit the CTGAN Model on the `sex` target label\n",
 88 |     "\n",
 89 |     ">__NOTE:__ Fitting the `ctgan` model can up to 5 minutes, depending on the Kernel compute resources."
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "metadata": {},
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "from ctgan import CTGAN\n",
 99 |     "\n",
100 |     "ctgan = CTGAN()\n",
101 |     "ctgan.fit(raw_df, [\"sex\"])"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "markdown",
106 |    "metadata": {},
107 |    "source": [
108 |     "## Generate `100` samples of \"new\" data\n",
109 |     ">__NOTE:__ `100` new samples are used to realistially simulate the potential amount of new daily survey data"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": null,
115 |    "metadata": {},
116 |    "outputs": [],
117 |    "source": [
118 |     "samples = ctgan.sample(100)"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "metadata": {},
124 |    "source": [
125 |     "## Compare Datasets\n",
126 |     "### `raw` dataset"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": null,
132 |    "metadata": {},
133 |    "outputs": [],
134 |    "source": [
135 |     "raw_df.describe()"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "markdown",
140 |    "metadata": {},
141 |    "source": [
142 |     "### `new` dataset"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": null,
148 |    "metadata": {},
149 |    "outputs": [],
150 |    "source": [
151 |     "samples.describe()"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "markdown",
156 |    "metadata": {},
157 |    "source": [
158 |     "## Upload the new data to test the Airflow DAG"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": null,
164 |    "metadata": {},
165 |    "outputs": [],
166 |    "source": [
167 |     "samples.to_csv(f\"s3://{data_bucket}/{new_data_key}\", header=False, index=False)"
168 |    ]
169 |   }
170 |  ],
171 |  "metadata": {
172 |   "instance_type": "ml.m5.4xlarge",
173 |   "kernelspec": {
174 |    "display_name": "Python 3 (Data Science)",
175 |    "language": "python",
176 |    "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/datascience-1.0"
177 |   },
178 |   "language_info": {
179 |    "codemirror_mode": {
180 |     "name": "ipython",
181 |     "version": 3
182 |    },
183 |    "file_extension": ".py",
184 |    "mimetype": "text/x-python",
185 |    "name": "python",
186 |    "nbconvert_exporter": "python",
187 |    "pygments_lexer": "ipython3",
188 |    "version": "3.7.10"
189 |   }
190 |  },
191 |  "nbformat": 4,
192 |  "nbformat_minor": 4
193 | }
194 | 


--------------------------------------------------------------------------------
/Chapter10/Files/airflow/dags/.airflowignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Automated-Machine-Learning-on-AWS/fa106b966823211a4588286656b6cf01cc6c14b2/Chapter10/Files/airflow/dags/.airflowignore


--------------------------------------------------------------------------------
/Chapter10/Files/airflow/dags/continuous_training_pipeline.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import json
 3 | import sagemaker
 4 | import boto3
 5 | import numpy as np
 6 | import pandas as pd
 7 | from time import sleep
 8 | from datetime import timedelta
 9 | from sagemaker.feature_store.feature_group import FeatureGroup
10 | 
11 | import airflow
12 | from airflow import DAG
13 | from airflow.operators.python_operator import PythonOperator
14 | from airflow.providers.amazon.aws.hooks.lambda_function import AwsLambdaHook
15 | from airflow.providers.amazon.aws.sensors.s3_prefix import S3PrefixSensor
16 | 
17 | sagemaker_session = sagemaker.Session()
18 | region_name = sagemaker_session.boto_region_name
19 | data_prefix = "abalone_data"
20 | data_bucket = f"""{boto3.client("ssm", region_name=region_name).get_parameter(Name="DataBucket")["Parameter"]["Value"]}"""
21 | lambda_function = f"""{boto3.client("ssm", region_name=region_name).get_parameter(Name="ReleaseChangeLambda")["Parameter"]["Value"]}"""
22 | fg_name = f"""{boto3.client("ssm", region_name=region_name).get_parameter(Name="FeatureGroup")["Parameter"]["Value"]}"""
23 | default_args = {
24 |     "owner": "airflow",
25 |     "depends_on_past": False,
26 |     "start_date": airflow.utils.dates.days_ago(1),
27 |     "retries": 0,
28 |     "retry_delay": timedelta(minutes=2)
29 | }
30 | 
31 | 
32 | def start_pipeline():
33 |     hook = AwsLambdaHook(
34 |         function_name=lambda_function,
35 |         aws_conn_id="aws_default",
36 |         invocation_type="RequestResponse",
37 |         log_type="Tail",
38 |         qualifier="$LATEST",
39 |         config=None
40 |     )
41 |     request = hook.invoke_lambda(payload="null")
42 |     response = json.loads(request["Payload"].read().decode())
43 |     print(f"Response: {response}")
44 | 
45 | 
46 | def update_feature_group():
47 |     fg = FeatureGroup(name=fg_name, sagemaker_session=sagemaker_session)
48 |     column_names = ["sex", "length", "diameter", "height", "whole_weight", "shucked_weight", "viscera_weight", "shell_weight", "rings"]
49 |     abalone_data = pd.read_csv(f"s3://{data_bucket}/{data_prefix}/abalone.new", names=column_names)
50 |     data = abalone_data[["rings", "sex", "length", "diameter", "height", "whole_weight", "shucked_weight", "viscera_weight", "shell_weight"]]
51 |     processed_data = pd.get_dummies(data)
52 |     time_stamp = int(round(time.time()))
53 |     processed_data["TimeStamp"] = pd.Series([time_stamp] * len(processed_data), dtype="float64")
54 |     fg.ingest(data_frame=processed_data, max_workers=5, wait=True)
55 |     sleep(300)
56 | 
57 | 
58 | with DAG(
59 |     dag_id=f"acme-data-workflow",
60 |     default_args=default_args,
61 |     schedule_interval="@daily",
62 |     concurrency=1,
63 |     max_active_runs=1,
64 | ) as dag:
65 | 
66 |     s3_trigger = S3PrefixSensor(  
67 |         task_id="s3_trigger",
68 |         bucket_name=data_bucket,
69 |         prefix=data_prefix,
70 |         dag=dag
71 |     )
72 |     
73 |     update_fg_task = PythonOperator(
74 |         task_id="update_fg",
75 |         python_callable=update_feature_group,
76 |         dag=dag
77 |     )
78 |     
79 |     trigger_release_task = PythonOperator(
80 |         task_id="trigger_release_change",
81 |         python_callable=start_pipeline,
82 |         dag=dag
83 |     )
84 |     
85 |     s3_trigger >> update_fg_task >> trigger_release_task


--------------------------------------------------------------------------------
/Chapter10/Files/airflow/requirements.txt:
--------------------------------------------------------------------------------
1 | sagemaker==2.49.1
2 | protobuf==3.19.0
3 | s3fs<=0.4
4 | boto3>=1.17.4
5 | numpy
6 | pandas
7 | 


--------------------------------------------------------------------------------
/Chapter10/Files/cdk/acme_pipeline_stack.py:
--------------------------------------------------------------------------------
 1 | import aws_cdk as cdk
 2 | import aws_cdk.aws_codecommit as codecommit
 3 | import aws_cdk.aws_s3 as s3
 4 | import aws_cdk.pipelines as pipelines
 5 | import aws_cdk.aws_ssm as ssm
 6 | from constructs import Construct
 7 | 
 8 | class PipelineStack(cdk.Stack):
 9 | 
10 |     def __init__(self, scope: Construct, id: str, *, model_name: str=None, group_name: str=None, repo_name: str=None, feature_group: str=None, threshold: float=None, cdk_version: str=None, **kwargs) -> None:
11 |         super().__init__(scope, id, **kwargs)
12 | 
13 |         self.code_repo = codecommit.Repository(
14 |             self,
15 |             "Source-Repository",
16 |             repository_name=repo_name,
17 |             description="ACME Web Application Source Code Repository"
18 |         )
19 |         cdk.CfnOutput(
20 |             self,
21 |             "Clone-URL",
22 |             description="CodeCommit Clone URL",
23 |             value=self.code_repo.repository_clone_url_http
24 |         )
25 | 
26 |         self.data_bucket = s3.Bucket(
27 |             self,
28 |             "Data-Bucket",
29 |             bucket_name=f"data-{cdk.Aws.REGION}-{cdk.Aws.ACCOUNT_ID}",
30 |             block_public_access=s3.BlockPublicAccess.BLOCK_ALL,
31 |             auto_delete_objects=True,
32 |             removal_policy=cdk.RemovalPolicy.DESTROY,
33 |             versioned=True
34 |         )
35 | 
36 |         ssm.StringParameter(
37 |             self,
38 |             "Data-Bucket-Parameter",
39 |             parameter_name="DataBucket",
40 |             description="SSM Parameter for the S3 Data Bucket Name",
41 |             string_value=self.data_bucket.bucket_name
42 |         )
43 | 
44 |         ssm.StringParameter(
45 |             self,
46 |             "Feature-Group-Parameter",
47 |             parameter_name="FeatureGroup",
48 |             description="SSM Paramater for the SageMaker Feature Store group",
49 |             string_value=feature_group
50 |         )
51 | 
52 |         source_artifact = pipelines.CodePipelineSource.code_commit(
53 |             repository=self.code_repo,
54 |             branch="main"
55 |         )
56 | 
57 |         pipeline = pipelines.CodePipeline(
58 |             self,
59 |             "Application-Pipeline",
60 |             pipeline_name="ACME-WebApp-Pipeline",
61 |             self_mutation=False,
62 |             cli_version=cdk_version,
63 |             synth=pipelines.ShellStep(
64 |                 "Synth",
65 |                 input=source_artifact,
66 |                 commands=[
67 |                     "printenv",
68 |                     f"npm install -g aws-cdk@{cdk_version}",
69 |                     "python -m pip install --upgrade pip",
70 |                     "pip install -r requirements.txt",
71 |                     "cdk synth"
72 |                 ]
73 |             )
74 |         )


--------------------------------------------------------------------------------
/Chapter10/Files/cdk/app.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import os
 3 | import aws_cdk as cdk
 4 | from acme_web_application.acme_pipeline_stack import PipelineStack
 5 | 
 6 | MODEL = "abalone"
 7 | MODEL_GROUP = f"{MODEL.capitalize()}PackageGroup"
 8 | FEATURE_GROUP = "PLACEHOLDER"
 9 | CODECOMMIT_REPOSITORY = "acme-web-application"
10 | CDK_VERSION = "2.3.0"
11 | QUALITY_THRESHOLD = 3.1
12 | 
13 | app = cdk.App()
14 | 
15 | PipelineStack(
16 |     app,
17 |     CODECOMMIT_REPOSITORY,
18 |     env=cdk.Environment(account=os.getenv("CDK_DEFAULT_ACCOUNT"), region=os.getenv("CDK_DEFAULT_REGION")),
19 |     model_name=MODEL,
20 |     repo_name=CODECOMMIT_REPOSITORY,
21 |     group_name=MODEL_GROUP,
22 |     feature_group=FEATURE_GROUP,
23 |     cdk_version=CDK_VERSION,
24 |     threshold=QUALITY_THRESHOLD,
25 | )
26 | 
27 | app.synth()
28 | 


--------------------------------------------------------------------------------
/Chapter10/Files/cdk/cdk.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "app": "python3 app.py",
 3 |   "watch": {
 4 |     "include": [
 5 |       "**"
 6 |     ],
 7 |     "exclude": [
 8 |       "README.md",
 9 |       "cdk*.json",
10 |       "requirements*.txt",
11 |       "source.bat",
12 |       "**/__init__.py",
13 |       "python/__pycache__",
14 |       "tests"
15 |     ]
16 |   },
17 |   "context": {
18 |     "@aws-cdk/aws-apigateway:usagePlanKeyOrderInsensitiveId": true,
19 |     "@aws-cdk/core:stackRelativeExports": true,
20 |     "@aws-cdk/aws-rds:lowercaseDbIdentifier": true,
21 |     "@aws-cdk/aws-lambda:recognizeVersionProps": true,
22 |     "@aws-cdk/aws-cloudfront:defaultSecurityPolicyTLSv1.2_2021": true
23 |   }
24 | }
25 | 


--------------------------------------------------------------------------------
/Chapter10/Files/cdk/data_workflow_stack.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import aws_cdk as cdk
  3 | import aws_cdk.aws_s3 as s3
  4 | import aws_cdk.aws_ssm as ssm
  5 | import aws_cdk.aws_s3_deployment as s3_deployment
  6 | import aws_cdk.aws_ec2 as ec2
  7 | import aws_cdk.aws_iam as iam
  8 | import aws_cdk.aws_mwaa as mwaa
  9 | import aws_cdk.aws_lambda as lambda_
 10 | from constructs import Construct
 11 | 
 12 | 
 13 | class DataWorkflowStack(cdk.Stack):
 14 |     def __init__(self, scope: Construct, id: str, *, airflow_environment_name: str=None, data_bucket_name: str=None, pipeline_name: str=None, **kwargs) -> None:
 15 |         super().__init__(scope, id, **kwargs)
 16 | 
 17 |         data_bucket = s3.Bucket.from_bucket_name(
 18 |             self,
 19 |             "Data-Bucket",
 20 |             bucket_name=data_bucket_name
 21 |         )
 22 | 
 23 |         data_bucket_param = ssm.StringParameter.from_string_parameter_name(
 24 |             self,
 25 |             "Data-Bucket-Parameter",
 26 |             string_parameter_name="DataBucket"
 27 |         )
 28 | 
 29 |         group_name_param = ssm.StringParameter.from_string_parameter_name(
 30 |             self,
 31 |             "Feature-Group-Parameter",
 32 |             string_parameter_name="FeatureGroup"
 33 |         )
 34 | 
 35 |         start_pipeline = lambda_.Function(
 36 |             self,
 37 |             "Release-Change",
 38 |             handler="index.lambda_handler",
 39 |             runtime=lambda_.Runtime.PYTHON_3_8,
 40 |             code=lambda_.Code.from_asset(os.path.join(os.path.dirname(__file__), "../../lambda/releaseChange")),
 41 |             environment={
 42 |                 "PIPELINE_NAME": pipeline_name
 43 |             },
 44 |             memory_size=128,
 45 |             timeout=cdk.Duration.seconds(60)
 46 |         )
 47 |         start_pipeline.add_to_role_policy(
 48 |             iam.PolicyStatement(
 49 |                 actions=[
 50 |                     "codepipeline:StartPipelineExecution"
 51 |                 ],
 52 |                 effect=iam.Effect.ALLOW,
 53 |                 resources=[
 54 |                     f"arn:aws:codepipeline:{cdk.Aws.REGION}:{cdk.Aws.ACCOUNT_ID}:{pipeline_name}"
 55 |                 ]
 56 |             )
 57 |         )
 58 | 
 59 |         airflow_policy_document = {
 60 |             "Version": "2012-10-17",
 61 |             "Statement": [
 62 |                 {
 63 |                     "Effect": "Allow",
 64 |                     "Action": "airflow:PublishMetrics",
 65 |                     "Resource": f"arn:aws:airflow:{cdk.Aws.REGION}:{cdk.Aws.ACCOUNT_ID}:environment/{airflow_environment_name}"
 66 |                 },
 67 |                 {
 68 |                     "Effect": "Deny",
 69 |                     "Action": "s3:ListAllMyBuckets",
 70 |                     "Resource": [
 71 |                         f"arn:aws:s3:::{data_bucket.bucket_name}",
 72 |                         f"arn:aws:s3:::{data_bucket.bucket_name}/*"
 73 |                     ]
 74 |                 },
 75 |                 {
 76 |                     "Effect": "Allow",
 77 |                     "Action": [
 78 |                         "s3:GetObject*",
 79 |                         "s3:GetBucket*",
 80 |                         "s3:List*"
 81 |                     ],
 82 |                     "Resource": [
 83 |                         f"arn:aws:s3:::{data_bucket.bucket_name}",
 84 |                         f"arn:aws:s3:::{data_bucket.bucket_name}/*"
 85 |                     ]
 86 |                 },
 87 |                 {
 88 |                     "Effect": "Allow",
 89 |                     "Action": [
 90 |                         "logs:CreateLogStream",
 91 |                         "logs:CreateLogGroup",
 92 |                         "logs:PutLogEvents",
 93 |                         "logs:GetLogEvents",
 94 |                         "logs:GetLogRecord",
 95 |                         "logs:GetLogGroupFields",
 96 |                         "logs:GetQueryResults"
 97 |                     ],
 98 |                     "Resource": [
 99 |                         f"arn:aws:logs:{cdk.Aws.REGION}:{cdk.Aws.ACCOUNT_ID}:log-group:airflow-{airflow_environment_name}-*"
100 |                     ]
101 |                 },
102 |                 {
103 |                     "Effect": "Allow",
104 |                     "Action": [
105 |                         "logs:DescribeLogGroups"
106 |                     ],
107 |                     "Resource": [
108 |                         "*"
109 |                     ]
110 |                 },
111 |                 {
112 |                     "Effect": "Allow",
113 |                     "Action": "cloudwatch:PutMetricData",
114 |                     "Resource": "*"
115 |                 },
116 |                 {
117 |                     "Effect": "Allow",
118 |                     "Action": [
119 |                         "sqs:ChangeMessageVisibility",
120 |                         "sqs:DeleteMessage",
121 |                         "sqs:GetQueueAttributes",
122 |                         "sqs:GetQueueUrl",
123 |                         "sqs:ReceiveMessage",
124 |                         "sqs:SendMessage"
125 |                     ],
126 |                     "Resource": f"arn:aws:sqs:{cdk.Aws.REGION}:*:airflow-celery-*"
127 |                 },
128 |                 {
129 |                     "Effect": "Allow",
130 |                     "Action": [
131 |                         "kms:Decrypt",
132 |                         "kms:DescribeKey",
133 |                         "kms:GenerateDataKey*",
134 |                         "kms:Encrypt"
135 |                     ],
136 |                     "NotResource": f"arn:aws:kms:*:{cdk.Aws.ACCOUNT_ID}:key/*",
137 |                     "Condition": {
138 |                         "StringLike": {
139 |                             "kms:ViaService": [
140 |                                 f"sqs.{cdk.Aws.REGION}.amazonaws.com"
141 |                             ]
142 |                         }
143 |                     }
144 |                 },
145 |                 {
146 |                     "Effect": "Allow",
147 |                     "Action": [
148 |                         "lambda:InvokeFunction"
149 |                     ],
150 |                     "Resource": f"{start_pipeline.function_arn}*"
151 |                 }
152 |             ]
153 |         }
154 | 
155 |         airflow_role = iam.Role(
156 |             self,
157 |             "AirflowRole",
158 |             assumed_by=iam.CompositePrincipal(
159 |                 iam.ServicePrincipal("airflow.amazonaws.com"),
160 |                 iam.ServicePrincipal("airflow-env.amazonaws.com")
161 |             ),
162 |             inline_policies={
163 |                 "AirflowRole-InlinePolicy": iam.PolicyDocument.from_json(airflow_policy_document)
164 |             },
165 |             path="/service-role/"
166 |         )
167 |         airflow_role.add_managed_policy(policy=iam.ManagedPolicy.from_aws_managed_policy_name("AmazonSageMakerFullAccess"))
168 |         airflow_role.add_managed_policy(policy=iam.ManagedPolicy.from_aws_managed_policy_name("AmazonSageMakerFeatureStoreAccess"))
169 |         data_bucket.grant_read_write(airflow_role)
170 |         data_bucket_param.grant_read(airflow_role)
171 |         group_name_param.grant_read(airflow_role)
172 | 
173 |         vpc = ec2.Vpc(
174 |             self,
175 |             "Airflow-VPC",
176 |             cidr="10.0.0.0/16",
177 |             max_azs=2,
178 |             subnet_configuration=[
179 |                 ec2.SubnetConfiguration(
180 |                     name="AirflowPublicSubnet",
181 |                     subnet_type=ec2.SubnetType.PUBLIC,
182 |                     cidr_mask=24
183 |                 ),
184 |                 ec2.SubnetConfiguration(
185 |                     name="AirflowPrivateSubnet",
186 |                     subnet_type=ec2.SubnetType.PRIVATE_WITH_NAT,
187 |                     cidr_mask=24
188 |                 )
189 |             ],
190 |             nat_gateways=2,
191 |             enable_dns_hostnames=True,
192 |             enable_dns_support=True
193 |         )
194 | 
195 |         airflow_sg = ec2.SecurityGroup(
196 |             self,
197 |             "Airflow-SG",
198 |             vpc=vpc,
199 |             description="Airflow Internal Traffic",
200 |             security_group_name=f"{airflow_environment_name}-sg"
201 |         )
202 |         airflow_sg.connections.allow_internally(ec2.Port.all_traffic(), "MWAA")
203 | 
204 |         airflow_subnet_ids = list(map(lambda x: x.subnet_id, vpc.private_subnets))
205 | 
206 |         airflow_network = mwaa.CfnEnvironment.NetworkConfigurationProperty(
207 |             security_group_ids=[
208 |                 airflow_sg.security_group_id
209 |             ],
210 |             subnet_ids=airflow_subnet_ids
211 |         )
212 |         
213 |         airflow_environment = mwaa.CfnEnvironment(
214 |             self,
215 |             "Airflow-Environment",
216 |             name=airflow_environment_name,
217 |             airflow_version="2.0.2",
218 |             airflow_configuration_options={
219 |                 "core.default_timezone": "utc",
220 |                 "logging.logging_level": "INFO"
221 |             },
222 |             execution_role_arn=airflow_role.role_arn,
223 |             environment_class="mw1.small",
224 |             max_workers=5,
225 |             source_bucket_arn=data_bucket.bucket_arn,
226 |             dag_s3_path="airflow/dags",
227 |             requirements_s3_path="airflow/requirements.txt",
228 |             logging_configuration=mwaa.CfnEnvironment.LoggingConfigurationProperty(
229 |                 dag_processing_logs=mwaa.CfnEnvironment.ModuleLoggingConfigurationProperty(
230 |                     enabled=True,
231 |                     log_level="INFO"
232 |                 ),
233 |             ),
234 |             network_configuration=airflow_network,
235 |             webserver_access_mode="PUBLIC_ONLY"
236 |         )
237 | 
238 |         artifacts_deployment = s3_deployment.BucketDeployment(
239 |             self,
240 |             "Deploy-Airflow-Artifacts",
241 |             sources=[
242 |                 s3_deployment.Source.asset(os.path.join(os.path.dirname(__file__), "../../airflow"))
243 |             ],
244 |             destination_bucket=data_bucket,
245 |             destination_key_prefix="airflow",
246 |             retain_on_delete=False
247 |         )
248 |         airflow_environment.node.add_dependency(artifacts_deployment)
249 | 
250 |         lambda_param = ssm.StringParameter(
251 |             self,
252 |             "Release-Change-Parameter",
253 |             parameter_name="ReleaseChangeLambda",
254 |             description="SSM Parameter for the releaseChange Lambda Function",
255 |             string_value=start_pipeline.function_name
256 |         )
257 |         lambda_param.grant_read(airflow_role)
258 | 


--------------------------------------------------------------------------------
/Chapter10/Files/cdk/requirements.txt:
--------------------------------------------------------------------------------
1 | aws-cdk-lib==2.3.0
2 | constructs>=10.0.0,<11.0.0
3 | aws-cdk.aws-apigatewayv2-alpha==2.3.0a0
4 | aws-cdk.aws-apigatewayv2-integrations-alpha==2.3.0a0
5 | 


--------------------------------------------------------------------------------
/Chapter10/Files/lambda/createExperiment/index.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import logging
  3 | import boto3
  4 | import botocore
  5 | from botocore.exceptions import ClientError
  6 | 
  7 | logger = logging.getLogger()
  8 | logger.setLevel(logging.INFO)
  9 | cp = boto3.client("codepipeline")
 10 | sm = boto3.client("sagemaker")
 11 | 
 12 | 
 13 | def lambda_handler(event, context):
 14 |     logger.debug("## Environment Variables ##")
 15 |     logger.debug(os.environ)
 16 |     logger.debug("## Event ##")
 17 |     logger.debug(event)
 18 |     logger.info('Creating SageMaker Experiment')
 19 | 
 20 |     if ("modelName" in event):
 21 |         model_name = event["modelName"]
 22 |     else:
 23 |         raise KeyError("'Model Name' not found in Lambda event!")
 24 |     
 25 |     if ("pipelineName" in event):
 26 |         pipeline_name = event["pipelineName"]
 27 |     else:
 28 |         raise KeyError("'Pipeline Name' not found in Lambda event!")
 29 |     
 30 |     if ("stageName" in event):
 31 |         stage_name = event["stageName"]
 32 |     else:
 33 |         raise KeyError("'Pipeline Stage Name' not in Lambda event!")
 34 |     
 35 |     if ("actionName" in event):
 36 |         action_name = event["actionName"]
 37 |     else:
 38 |         raise KeyError("'Pipeline Action Name' not in Lambda event!")
 39 |     
 40 |     if ("dataBucket" in event):
 41 |         data_bucket = event["dataBucket"]
 42 |     else:
 43 |         raise KeyError("'Data Bucket Name' not found in Lambda event!")
 44 |     
 45 |     execution_id = get_executionId(pipeline_name, stage_name, action_name)
 46 |     experiment_name, trial_name = create_experiment(model_name, execution_id)
 47 | 
 48 |     payload = {
 49 |         "statusCode": 200,
 50 |         "executionId": execution_id,
 51 |         "experimentName": experiment_name,
 52 |         "trialName": trial_name,
 53 |         "processingJobName": f"{model_name}-processing-{execution_id}",
 54 |         "processingCodeInput": f"s3://{data_bucket}/scripts/preprocessing.py",
 55 |         "processingTrainingOutput": f"s3://{data_bucket}/{execution_id}/input/training",
 56 |         "processingTestingOutput": f"s3://{data_bucket}/{execution_id}/input/testing",
 57 |         "processingBaselineOutput": f"s3://{data_bucket}/{execution_id}/input/baseline",
 58 |         "trainingJobName": f"{model_name}-training-{execution_id}",
 59 |         "trainingDataInput": f"s3://{data_bucket}/{execution_id}/input/training",
 60 |         "trainingModelOutput": f"s3://{data_bucket}/{execution_id}/",
 61 |         "evaluationJobName": f"{model_name}-evaluation-{execution_id}",
 62 |         "evaluationCodeInput": f"s3://{data_bucket}/scripts/evaluation.py",
 63 |         "evaluationDataInput": f"s3://{data_bucket}/{execution_id}/input/testing/testing.csv",
 64 |         "evaluationOutput": f"s3://{data_bucket}/{execution_id}/input/evaluation",
 65 |         "evaluationOutputFile": f"{execution_id}/input/evaluation/evaluation.json",
 66 |         "baselineDataInput": f"s3://{data_bucket}/{execution_id}/input/baseline/baseline.csv",
 67 |     }
 68 | 
 69 |     return payload
 70 | 
 71 | 
 72 | def get_executionId(pipeline_name, stage_name, action_name):
 73 |     logger.info(f"Getting the latest CodePipeline Execution ID for {pipeline_name}")
 74 |     try:
 75 |         response = cp.get_pipeline_state(name=pipeline_name)
 76 |         for stageState in response["stageStates"]:
 77 |             if stageState["stageName"] == stage_name:
 78 |                 for actionState in stageState["actionStates"]:
 79 |                     if actionState["actionName"] == action_name:
 80 |                         executionId = stageState["latestExecution"]["pipelineExecutionId"]
 81 |     except ClientError as e:
 82 |         error_message = e.response["Error"]["Message"]
 83 |         logger.error(error_message)
 84 |         raise Exception(error_message)
 85 | 
 86 |     logger.info(f"Current Pipeline Execution ID: {executionId}")
 87 |     return executionId
 88 | 
 89 | 
 90 | def create_experiment(model_name, execution_id):
 91 |     experiment_name = f"{model_name.capitalize()}Experiments"
 92 |     trial_name = f"{model_name.capitalize()}-{execution_id}"
 93 |     logger.info("Getting list of SageMaker Experiments")
 94 |     try:
 95 |         response = sm.list_experiments(
 96 |             SortBy="Name",
 97 |             MaxResults=100
 98 |         )
 99 |         names = [experiments["ExperimentName"] for experiments in response["ExperimentSummaries"]]
100 |     except ClientError as e:
101 |         error_message = e.response["Error"]["Message"]
102 |         logger.error(error_message)
103 |         raise Exception(error_message)
104 |     
105 |     logger.info(f"Checking if Experiment already exists")
106 |     if experiment_name not in names:
107 |         try:
108 |             response = sm.create_experiment(
109 |                 ExperimentName=experiment_name,
110 |                 Description=f"Training Experiments for {model_name}",
111 |             )
112 |             logger.info(f"Created SageMaker Experiment: {experiment_name}")
113 |         except ClientError as e:
114 |             error_message = e.response["Error"]["Message"]
115 |             logger.error(error_message)
116 |             raise Exception(error_message)
117 | 
118 |     logger.info(f"Creating Associated SageMaker Trial")
119 |     try:
120 |         response = sm.create_trial(
121 |             ExperimentName=experiment_name,
122 |             TrialName=trial_name
123 |         )
124 |     except ClientError as e:
125 |         error_message = e.response["Error"]["Message"]
126 |         logger.error(error_message)
127 |         raise Exception(error_message)
128 |     
129 |     return experiment_name, trial_name


--------------------------------------------------------------------------------
/Chapter10/Files/lambda/evaluateResults/index.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import logging
 4 | import boto3
 5 | import botocore
 6 | from botocore.exceptions import ClientError
 7 | from urllib.parse import urlparse
 8 | 
 9 | logger = logging.getLogger()
10 | logger.setLevel(logging.INFO)
11 | s3 = boto3.client("s3")
12 | ssm = boto3.client("ssm")
13 | sm = boto3.client("sagemaker")
14 | 
15 | 
16 | def lambda_handler(event, context):
17 |     logger.debug("## Environment Variables ##")
18 |     logger.debug(os.environ)
19 |     logger.debug("## Event ##")
20 |     logger.debug(event)
21 | 
22 |     if ("evaluationFile" in event):
23 |         evaluation_file = event["evaluationFile"]
24 |     else:
25 |         raise KeyError("'S3 Key for Evaluation File' not found in Lambda event!")
26 | 
27 |     logger.info("Reading Evaluation Report")
28 |     try:
29 |         obj = s3.get_object(Bucket=os.environ["BUCKET"], Key=evaluation_file)["Body"].read()
30 |     except ClientError as e:
31 |         error_message = e.response["Error"]["Message"]
32 |         logger.error(error_message)
33 |         raise Exception(error_message)
34 |     
35 |     current_report = json.loads(obj)
36 |     logger.info(f"Current Evaluation Report: {current_report}")
37 |     current_rmse = current_report["regression_metrics"]["rmse"]["value"]
38 | 
39 |     logger.info("Reading Previous Model's Evaluation Report")
40 |     model_package = get_package(os.environ["PACKAGE_PARAMETER"])
41 |     if model_package != "PLACEHOLDER":
42 |         try:
43 |             uri = sm.describe_model_package(
44 |                 ModelPackageName=model_package
45 |             )["ModelMetrics"]["ModelQuality"]["Statistics"]["S3Uri"]
46 |             bucket = urlparse(uri).netloc
47 |             key = urlparse(uri).path.lstrip("/")
48 |             previous_obj = s3.get_object(Bucket=bucket, Key=key)["Body"].read()
49 |         except ClientError as e:
50 |             error_message = e.response["Error"]["Message"]
51 |             logger.error(error_message)
52 |             raise Exception(error_message)
53 |         
54 |         previous_report = json.loads(previous_obj)
55 |         logger.info(f"Previous Evaluation Report: {previous_report}")
56 |         previous_rmse = previous_report ["regression_metrics"]["rmse"]["value"]
57 | 
58 |         if current_rmse < previous_rmse:
59 |             improved = "TRUE"
60 |         else:
61 |             improved = "FALSE"
62 |     else:
63 |         improved = "TRUE"
64 |     logger.info(f"Model Improved: {improved}")
65 | 
66 |     return {
67 |         'statusCode': 200,
68 |         'rmse': current_rmse,
69 |         'improved': improved
70 |     }
71 | 
72 | 
73 | def get_package(parameter_name):
74 |     try:
75 |         package = ssm.get_parameter(
76 |             Name=parameter_name
77 |         )['Parameter']['Value']
78 | 
79 |         return package
80 | 
81 |     except ClientError as e:
82 |         error_message = e.response['Error']['Message']
83 |         logger.error(error_message)
84 |         raise Exception(error_message)
85 | 


--------------------------------------------------------------------------------
/Chapter10/Files/lambda/registerModel/index.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import logging
  3 | import boto3
  4 | from botocore.exceptions import ClientError
  5 | 
  6 | sm = boto3.client("sagemaker")
  7 | ssm = boto3.client("ssm")
  8 | logger = logging.getLogger()
  9 | logger.setLevel(logging.INFO)
 10 | 
 11 | 
 12 | def lambda_handler(event, context):
 13 |     logger.debug("## Environment Variables ##")
 14 |     logger.debug(os.environ)
 15 |     logger.debug("## Event ##")
 16 |     logger.debug(event)
 17 | 
 18 |     if ("modelUri" in event):
 19 |         model_uri = event["modelUri"]
 20 |     else:
 21 |         raise KeyError("'Model Uri' not found in Lambda event!")
 22 |     
 23 |     if ("evaluationUri" in event):
 24 |         evaluation_uri = event["evaluationUri"]
 25 |     else:
 26 |         raise KeyError("'Evaluation File URI' not found in Lambda event!")
 27 |     
 28 |     if ("baselineUri" in event):
 29 |         baseline_uri = event["baselineUri"]
 30 |     else:
 31 |         raise KeyError("'Testing Data URI' not found in Lambda event!")
 32 |     
 33 |     if ("executionId" in event):
 34 |         execution_id = event["executionId"]
 35 |     else:
 36 |         raise KeyError("'Execition ID' not found in Lambda event!")
 37 | 
 38 |     request = {
 39 |         "InferenceSpecification": { 
 40 |             "Containers": [ 
 41 |                 { 
 42 |                     "Image": os.environ["IMAGE_URI"],
 43 |                     "ModelDataUrl": model_uri
 44 |                 }
 45 |             ],
 46 |             "SupportedContentTypes": [ 
 47 |                 "text/csv" 
 48 |             ],
 49 |             "SupportedRealtimeInferenceInstanceTypes": [ 
 50 |                 "ml.t2.large",
 51 |                 "ml.c5.large",
 52 |                 "ml.c5.xlarge"
 53 |             ],
 54 |             "SupportedResponseMIMETypes": [ 
 55 |                 "text/csv" 
 56 |             ],
 57 |             "SupportedTransformInstanceTypes": [ 
 58 |                 "ml.c5.xlarge"
 59 |             ]
 60 |         },
 61 |         "ModelApprovalStatus": "Approved",
 62 |         "MetadataProperties": {
 63 |             "ProjectId": execution_id,
 64 |             "GeneratedBy": "CDK Pipeline"
 65 |         },
 66 |         "ModelMetrics": {
 67 |             "ModelQuality": { 
 68 |                 "Statistics": { 
 69 |                     "ContentType": "application/json",
 70 |                     "S3Uri": f"s3://{os.environ['BUCKET']}/{evaluation_uri}"
 71 |                 }
 72 |             }
 73 |         },
 74 |         "ModelPackageDescription": "MLOps Production Model",
 75 |         "ModelPackageGroupName": os.environ["GROUP_NAME"]
 76 |     }
 77 | 
 78 |     try:
 79 |         logger.info("Creating model package.")
 80 |         response = sm.create_model_package(**request)
 81 |         model_package_arn = response["ModelPackageArn"]
 82 |     except ClientError as e:
 83 |         error_message = e.response["Error"]["Message"]
 84 |         logger.error(error_message)
 85 |         raise Exception(error_message)
 86 |     
 87 |     try:
 88 |         logger.info("Updating SSM Parameter with the latest model package.")
 89 |         response = ssm.put_parameter(
 90 |             Name=os.environ["PACKAGE_PARAMETER"],
 91 |             Value=model_package_arn,
 92 |             Type="String",
 93 |             Overwrite=True
 94 |         )
 95 |     except ClientError as e:
 96 |         error_message = e.response["Error"]["Message"]
 97 |         logger.error(error_message)
 98 |         raise Exception(error_message)
 99 |     
100 |     try:
101 |         logger.info("Creating SSM Parameter with the latest copy of the testing data.")
102 |         response = ssm.put_parameter(
103 |             Name=os.environ["BASELINE_PARAMETER"],
104 |             Value=baseline_uri,
105 |             Type="String",
106 |             Overwrite=True
107 |         )
108 |     except ClientError as e:
109 |         error_message = e.response["Error"]["Message"]
110 |         logger.error(error_message)
111 |         raise Exception(error_message)
112 | 
113 |     logger.info("Done!")
114 |     return {
115 |         "statusCode": 200,
116 |         "PackageArn": model_package_arn,
117 |         "TestingParameter": "TestingDataUri"
118 |     }
119 | 


--------------------------------------------------------------------------------
/Chapter10/Files/lambda/registryCreator/index.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import logging
 3 | import boto3
 4 | from botocore.exceptions import ClientError
 5 | 
 6 | sm = boto3.client("sagemaker")
 7 | logger = logging.getLogger()
 8 | logger.setLevel(logging.INFO)
 9 | 
10 | 
11 | def lambda_handler(event, context):
12 |     logger.debug("## Environment Variables ##")
13 |     logger.debug(os.environ)
14 |     logger.debug("## Event ##")
15 |     logger.debug(event)
16 |     props = event["ResourceProperties"]
17 |     group_name = props["GroupName"]
18 | 
19 |     if event["RequestType"] == "Create":
20 |         try:
21 |             response = sm.create_model_package_group(
22 |                 ModelPackageGroupName=group_name,
23 |                 ModelPackageGroupDescription="Models Package Group for Production Models",
24 |                 Tags=[
25 |                     {
26 |                         "Key": "Name",
27 |                         "Value": group_name
28 |                     }
29 |                 ]
30 |             )
31 |             package_arn = response["ModelPackageGroupArn"]
32 |             logger.info(f"Created Model Model Package Group: {package_arn}")
33 |             return {
34 |                 "PhysicalResourceId": group_name,
35 |                 "Data": {
36 |                     "ModelPackageArn": package_arn
37 |                 }
38 |             }
39 |         except ClientError as e:
40 |             error_message = e.response["Error"]["Message"]
41 |             logging.error(f"Failed to create Model Package Group: {error_message}")
42 |             raise Exception(error_message)
43 |     
44 |     elif event["RequestType"] == "Delete":
45 |         try:
46 |             response = sm.list_model_packages(
47 |                 ModelPackageGroupName=group_name,
48 |                 ModelApprovalStatus="Approved",
49 |                 SortBy="CreationTime",
50 |                 MaxResults=100
51 |             )
52 | 
53 |             for model_package in response["ModelPackageSummaryList"]:
54 |                 sm.delete_model_package(ModelPackageName=model_package["ModelPackageArn"])
55 |             
56 |             sm.delete_model_package_group(ModelPackageGroupName=group_name)
57 |             logger.info(f"Deleted Model Package Group: {group_name}")
58 |             return {
59 |                 "PhysicalResourceId": group_name,
60 |                 "Data":{}
61 |             }
62 |         
63 |         except ClientError as e:
64 |             error_message = e.response["Error"]["Messgae"]
65 |             logger.error(f"Failed to delete Model Package Group: {error_message}")
66 |             raise Exception(error_message)


--------------------------------------------------------------------------------
/Chapter10/Files/lambda/releaseChange/index.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import logging
 3 | import boto3
 4 | import botocore
 5 | from botocore.exceptions import ClientError
 6 | 
 7 | logger = logging.getLogger()
 8 | logger.setLevel(logging.INFO)
 9 | cp = boto3.client("codepipeline")
10 | 
11 | 
12 | def lambda_handler(event, context):
13 |     logger.debug("## Environment Variables ##")
14 |     logger.debug(os.environ)
15 |     logger.debug("## Event ##")
16 |     logger.debug(event)
17 |     pipeline_name = os.environ["PIPELINE_NAME"]
18 |     logger.info(f"Starting Coninuous Training release change for {pipeline_name}")
19 |     try:
20 |         response = cp.start_pipeline_execution(
21 |             name=pipeline_name
22 |         )
23 |         logger.info(f'Release Change ExecutionId: {response["pipelineExecutionId"]}')
24 |     except ClientError as e:
25 |         error_message = e.response["Error"]["Message"]
26 |         logger.error(error_message)
27 |         raise Exception(error_message)
28 |     return {
29 |         "statusCode": 200,
30 |         "ExecutionId": response["pipelineExecutionId"]
31 |     }


--------------------------------------------------------------------------------
/Chapter10/Files/scripts/evaluation.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import pathlib
  4 | import tarfile
  5 | import pandas as pd
  6 | import numpy as np
  7 | import tensorflow as tf
  8 | from tensorflow import keras
  9 | from tensorflow.keras.models import Sequential
 10 | from tensorflow.keras.layers import Dense
 11 | from tensorflow.keras.optimizers import Adam
 12 | from sklearn import preprocessing
 13 | from sklearn.metrics import mean_squared_error
 14 | 
 15 | 
 16 | def load_model(base_dir):
 17 |     print("Loading Model")
 18 |     
 19 |     model_path = os.path.join(base_dir, "model/model.tar.gz")
 20 |     with tarfile.open(model_path) as tar:
 21 |         tar.extractall(".")
 22 |     
 23 |     model = tf.keras.models.load_model("model.h5")
 24 |     model.compile(optimizer="adam", loss="mse")
 25 |     return model
 26 | 
 27 | 
 28 | def save_report(directory, report):
 29 |     print("Saving Evaluation Report")
 30 |     pathlib.Path(directory).mkdir(parents=True, exist_ok=True)
 31 |     evaluation_path = f"{directory}/evaluation.json"
 32 |     with open(evaluation_path, "w") as f:
 33 |         f.write(json.dumps(report))
 34 | 
 35 | def save_baseline(directory, predictions, labels):
 36 |     print("Saving Evaluation Quality Baseline")
 37 |     pathlib.Path(directory).mkdir(parents=True, exist_ok=True)
 38 |     baseline_path = f"{directory}/baseline.csv"
 39 |     baseline_dict = {"prediction": predictions, "label": labels}
 40 |     pd.DataFrame(baseline_dict).to_csv(baseline_path, header=True, index=False)
 41 | 
 42 | 
 43 | def evaluate_model(base_dir, model):
 44 |     print("Evaluating Model")
 45 |     truths = []
 46 |     predictions = []
 47 |     column_names = [
 48 |         "rings",
 49 |         "length", 
 50 |         "diameter",
 51 |         "height",
 52 |         "whole_weight",
 53 |         "shucked_weight",
 54 |         "viscera_weight",
 55 |         "shell_weight",
 56 |         "sex_F",
 57 |         "sex_I",
 58 |         "sex_M"
 59 |     ]
 60 |     data_path = os.path.join(base_dir, "data/testing.csv")
 61 |     data = pd.read_csv(data_path, names=column_names)
 62 |     y = data["rings"].to_numpy()
 63 |     X = data.drop(["rings"], axis=1).to_numpy()
 64 |     X = preprocessing.normalize(X)
 65 |     for row in range(len(X)):
 66 |         payload = [X[row].tolist()]
 67 |         result = model.predict(payload)
 68 |         print(f"Result: {result[0][0]}")
 69 |         predictions.append(float(result[0][0]))
 70 |         truths.append(float(y[row]))
 71 |     return truths, predictions
 72 | 
 73 | 
 74 | if __name__ == "__main__":
 75 |     input_dir = "/opt/ml/processing/input"
 76 |     output_dir = "/opt/ml/processing/output/evaluation"
 77 |     baseline_dir = "/opt/ml/processing/output/baseline"
 78 |     model = load_model(input_dir)
 79 |     y, y_pred = evaluate_model(input_dir, model)
 80 |     save_baseline(baseline_dir, y_pred, y)
 81 |     mse = mean_squared_error(y, y_pred)
 82 |     print(f"Mean Squared Error: {mse}")
 83 |     rmse = mean_squared_error(y, y_pred, squared=False)
 84 |     print(f"Root Mean Squared Error: {rmse}")
 85 |     std = np.std(np.array(y) - np.array(y_pred))
 86 |     print(f"Standard Deviation: {std}")
 87 |     report_dict = {
 88 |         "regression_metrics": {
 89 |             "rmse": {
 90 |                 "value": rmse,
 91 |                 "standard_deviation": std
 92 |             },
 93 |             "mse": {
 94 |                 "value": mse,
 95 |                 "standard_deviation": std
 96 |             },
 97 |         },
 98 |     }
 99 |     save_report(output_dir, report_dict)
100 | 


--------------------------------------------------------------------------------
/Chapter10/Files/scripts/preprocessing.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import boto3
 3 | import numpy as np
 4 | import pandas as pd
 5 | import awswrangler as wr
 6 | from sklearn.utils import shuffle
 7 | from botocore.exceptions import ClientError
 8 | 
 9 | boto3.setup_default_session(region_name=os.environ["AWS_REGION"])
10 | sm = boto3.client("sagemaker")
11 | 
12 | 
13 | def get_featurestore_params(feature_group_name):
14 |     try:
15 |         response = sm.describe_feature_group(
16 |             FeatureGroupName=feature_group_name
17 |         )
18 |         return response["OfflineStoreConfig"]["DataCatalogConfig"]["Database"], response["OfflineStoreConfig"]["DataCatalogConfig"]["TableName"]
19 |     except ClientError as e:
20 |         error_message = e.response["Error"]["Message"]
21 |         print(error_message)
22 |         raise Exception(error_message)
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     base_dir = "/opt/ml/processing"
27 |     print('Loading "raw" data')
28 |     fg_name = os.environ["FEATURE_GROUP_NAME"]
29 |     print(f"Using Feature Group: {fg_name}")
30 |     columns = ["rings", "length", "diameter", "height", "whole_weight", "shucked_weight", "viscera_weight", "shell_weight", "sex_f", "sex_i", "sex_m"]
31 |     database, table = get_featurestore_params(fg_name)
32 |     print("Querying Feature Store Data")
33 |     query_string = f'SELECT {",".join(columns)} FROM "{table}" WHERE is_deleted=false;'
34 |     featurestore_df = wr.athena.read_sql_query(query_string, database=database, ctas_approach=False)
35 |     print("Shuffling Data")
36 |     X = shuffle(featurestore_df).to_numpy()
37 |     print("Spliting the data into training, validation and testing datasets ...")
38 |     training, validation, testing = np.split(X, [int(.8*len(X)), int(.95*len(X))])
39 |     print("Saving datasets to S3")
40 |     pd.DataFrame(training).to_csv(f"{base_dir}/output/training/training.csv", header=False, index=False)
41 |     pd.DataFrame(validation).to_csv(f"{base_dir}/output/training/validation.csv", header=False, index=False)
42 |     pd.DataFrame(testing).to_csv(f"{base_dir}/output/testing/testing.csv", header=False, index=False)
43 | 


--------------------------------------------------------------------------------
/Chapter10/Notebooks/SageMaker Feature Store Example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# SageMaker Feature Store Example\n",
  8 |     "\n",
  9 |     ">__NOTE:__ This Notebook uses the _Python 3 (Data Science)_ Kernel\n",
 10 |     "\n",
 11 |     "## Setup"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "import warnings\n",
 21 |     "import time\n",
 22 |     "import sagemaker\n",
 23 |     "import boto3\n",
 24 |     "import numpy as np\n",
 25 |     "import pandas as pd\n",
 26 |     "from time import gmtime, strftime\n",
 27 |     "from sklearn import preprocessing\n",
 28 |     "from sagemaker.feature_store.feature_group import FeatureGroup\n",
 29 |     "\n",
 30 |     "warnings.filterwarnings(\"ignore\")\n",
 31 |     "\n",
 32 |     "role = sagemaker.get_execution_role()\n",
 33 |     "sagemaker_session = sagemaker.Session()\n",
 34 |     "region_name = sagemaker_session.boto_region_name\n",
 35 |     "data_bucket = f\"\"\"{boto3.client(\"ssm\", region_name=region_name).get_parameter(Name=\"DataBucket\")[\"Parameter\"][\"Value\"]}\"\"\"\n",
 36 |     "prefix = \"sagemaker-featurestore\"\n",
 37 |     "\n",
 38 |     "def check_feature_group_status(feature_group):\n",
 39 |     "    status = feature_group.describe().get(\"FeatureGroupStatus\")\n",
 40 |     "    while status == \"Creating\":\n",
 41 |     "        print(\"Waiting for Feature Group to be Created\")\n",
 42 |     "        time.sleep(5)\n",
 43 |     "        status = feature_group.describe().get(\"FeatureGroupStatus\")\n",
 44 |     "    print(f\"FeatureGroup {feature_group.name} successfully created.\")\n",
 45 |     "\n",
 46 |     "def check_data_availability(feature_group, bucket):\n",
 47 |     "    s3_client = sagemaker_session.boto_session.client('s3', region_name=region_name)\n",
 48 |     "    offline_store_contents = None\n",
 49 |     "    feature_group_s3_uri = feature_group.describe().get(\"OfflineStoreConfig\").get(\"S3StorageConfig\").get(\"ResolvedOutputS3Uri\")\n",
 50 |     "    feature_group_s3_prefix = feature_group_s3_uri.replace(f\"s3://{bucket}/\", \"\")\n",
 51 |     "    while offline_store_contents is None:\n",
 52 |     "        objects_in_bucket = s3_client.list_objects(Bucket=bucket, Prefix=feature_group_s3_prefix)\n",
 53 |     "        if ('Contents' in objects_in_bucket and len(objects_in_bucket['Contents']) > 1):\n",
 54 |     "            offline_store_contents = objects_in_bucket['Contents']\n",
 55 |     "        else:\n",
 56 |     "            print('Waiting for data into the offline store...\\n')\n",
 57 |     "            time.sleep(60)\n",
 58 |     "    print('Data available.')    "
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "markdown",
 63 |    "metadata": {},
 64 |    "source": [
 65 |     "---\n",
 66 |     "\n",
 67 |     "## Data Preparation\n",
 68 |     "\n",
 69 |     "### Import Python Libraries and Helper Funcitons"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "markdown",
 74 |    "metadata": {},
 75 |    "source": [
 76 |     "### Download the Data"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "column_names = [\"sex\", \"length\", \"diameter\", \"height\", \"whole_weight\", \"shucked_weight\", \"viscera_weight\", \"shell_weight\", \"rings\"]\n",
 86 |     "abalone_data = pd.read_csv(\"http://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data\", names=column_names)\n",
 87 |     "abalone_data.head()"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "markdown",
 92 |    "metadata": {},
 93 |    "source": [
 94 |     "### Data Processing and Feature Engineering"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": null,
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": [
103 |     "data = abalone_data[[\"rings\", \"sex\", \"length\", \"diameter\", \"height\", \"whole_weight\", \"shucked_weight\", \"viscera_weight\", \"shell_weight\"]]\n",
104 |     "processed_data = pd.get_dummies(data)\n",
105 |     "processed_data.head()"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "markdown",
110 |    "metadata": {},
111 |    "source": [
112 |     "---\n",
113 |     "\n",
114 |     "## SageMaker Feature Store\n",
115 |     "\n",
116 |     "### Define the Feature Group"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": null,
122 |    "metadata": {},
123 |    "outputs": [],
124 |    "source": [
125 |     "fg_name = f\"abalone-fg-{strftime('%d-%H-%M-%S', gmtime())}\"\n",
126 |     "fg = FeatureGroup(name=fg_name, sagemaker_session=sagemaker_session)"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "markdown",
131 |    "metadata": {},
132 |    "source": [
133 |     "### Create Ingestion Timestamp Identifier (Event Time Feature)"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": null,
139 |    "metadata": {},
140 |    "outputs": [],
141 |    "source": [
142 |     "time_stamp = int(round(time.time()))\n",
143 |     "processed_data[\"TimeStamp\"] = pd.Series([time_stamp] * len(processed_data), dtype=\"float64\")"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "markdown",
148 |    "metadata": {},
149 |    "source": [
150 |     "### Create Feature Definition Schema"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": null,
156 |    "metadata": {},
157 |    "outputs": [],
158 |    "source": [
159 |     "fg.load_feature_definitions(data_frame=processed_data)"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "markdown",
164 |    "metadata": {},
165 |    "source": [
166 |     "### Create the Feature Group"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": null,
172 |    "metadata": {},
173 |    "outputs": [],
174 |    "source": [
175 |     "fg.create(\n",
176 |     "    s3_uri=f\"s3://{data_bucket}/{prefix}\",\n",
177 |     "    record_identifier_name=\"rings\",\n",
178 |     "    event_time_feature_name=\"TimeStamp\",\n",
179 |     "    role_arn=role,\n",
180 |     "    enable_online_store=False\n",
181 |     ")\n",
182 |     "\n",
183 |     "check_feature_group_status(fg)"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "markdown",
188 |    "metadata": {},
189 |    "source": [
190 |     "### Ingest Data into the Feature Group"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": null,
196 |    "metadata": {},
197 |    "outputs": [],
198 |    "source": [
199 |     "fg.ingest(data_frame=processed_data, max_workers=5, wait=True)\n",
200 |     "\n",
201 |     "check_data_availability(fg, data_bucket)"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "markdown",
206 |    "metadata": {},
207 |    "source": [
208 |     "### Describe the Feature Group\n",
209 |     "\n",
210 |     ">__NOTE:__ Make sure to capture the name of the Feature Group _(FeatureGroupName)_, as we will be using this later."
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "code",
215 |    "execution_count": null,
216 |    "metadata": {},
217 |    "outputs": [],
218 |    "source": [
219 |     "fg.describe()"
220 |    ]
221 |   }
222 |  ],
223 |  "metadata": {
224 |   "instance_type": "ml.t3.medium",
225 |   "kernelspec": {
226 |    "display_name": "Python 3 (Data Science)",
227 |    "language": "python",
228 |    "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/datascience-1.0"
229 |   },
230 |   "language_info": {
231 |    "codemirror_mode": {
232 |     "name": "ipython",
233 |     "version": 3
234 |    },
235 |    "file_extension": ".py",
236 |    "mimetype": "text/x-python",
237 |    "name": "python",
238 |    "nbconvert_exporter": "python",
239 |    "pygments_lexer": "ipython3",
240 |    "version": "3.7.10"
241 |   }
242 |  },
243 |  "nbformat": 4,
244 |  "nbformat_minor": 4
245 | }
246 | 


--------------------------------------------------------------------------------
/Chapter10/www/404.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 | <head>
 3 |  <meta charset="UTF-8">
 4 |    <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=no">
 5 |    <link href="https://fonts.googleapis.com/css?family=Lato" rel="stylesheet">
 6 | 	 <link rel="stylesheet" href="css/error-page.css">
 7 |   <title>404 pages</title>
 8 | </head>
 9 | 
10 | <body>
11 | 
12 | 
13 | <div id="container">
14 | 
15 | <svg version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
16 | 	 viewBox="0 0 200 82.7" style="enable-background:new 0 0 200 82.7;" xml:space="preserve">
17 | 
18 | <g id="Calque_1">
19 | 	<text id="XMLID_3_" transform="matrix(1.2187 0 0 1 13 75.6393)" class="st0 st1">4</text>
20 | 	<text id="XMLID_4_" transform="matrix(1.2187 0 0 1 133.0003 73.6393)" class="st0 st1">4</text>
21 | </g>
22 | <g id="Calque_2">
23 | <g>
24 | 	<path id="XMLID_11_" d="M81.8,29.2c4.1-5.7,10.7-9.4,18.3-9.4c6.3,0,12.1,2.7,16.1,6.9c0.6-0.4,1.1-0.7,1.7-1.1
25 | 		c-4.4-4.8-10.8-7.9-17.8-7.9c-8.3,0-15.6,4.2-20,10.6C80.7,28.5,81.3,28.8,81.8,29.2z"/>
26 | 		<path id="XMLID_2_" d="M118.1,53.7c-4,5.7-10.7,9.5-18.2,9.5c-6.3,0-12.1-2.6-16.2-6.8c-0.6,0.4-1.1,0.7-1.7,1.1
27 | 		c4.4,4.8,10.8,7.8,17.9,7.8c8.3,0,15.6-4.3,19.9-10.7C119.2,54.5,118.6,54.1,118.1,53.7z"/>
28 | 		 <animateTransform attributeName="transform" type="rotate" from="360 100 41.3" to="0 100 41.3" dur="10s" repeatCount="indefinite" />
29 |   </g>
30 |   <g id="XMLID_6_">
31 |   <g  id="XMLID_18_">	
32 |   	
33 | 
34 |   		
35 |   		<circle class="circle"  cx="100" cy="41" r="1"></circle>
36 |   	</g>
37 |   </g><defs>
38 |   		<filter id="blurFilter4" x="-20" y="-20" width="200" height="200">
39 |   			<feGaussianBlur in="SourceGraphic" stdDeviation="2" />
40 |   		</filter>
41 |   	</defs>
42 |   <path    id="XMLID_5_" class="st2" d="M103.8,16.7c0.1,0.3,0.1,0.6,0.1,0.9c11.6,1.9,20.4,11.9,20.4,24.1c0,13.5-10.9,24.4-24.4,24.4
43 |   S75.6,55.1,75.6,41.7c0-3.2,0.6-6.3,1.7-9.1c-0.3-0.2-0.5-0.3-0.7-0.5c-1.2,3-1.9,6.2-1.9,9.6c0,14,11.3,25.3,25.3,25.3
44 |   s25.3-11.3,25.3-25.3C125.3,29,115.9,18.5,103.8,16.7z"/>
45 | 
46 | 
47 | </g>
48 | </svg>
49 | 
50 | <div class="message">
51 |      Page not found
52 | </div>
53 | </div>
54 | </body>
55 | 	</html>
56 | 
57 | 


--------------------------------------------------------------------------------
/Chapter10/www/css/error-page.css:
--------------------------------------------------------------------------------
  1 | *{
  2 | 	padding:0px;
  3 | 	margin:0px;
  4 | }
  5 | body{
  6 | 		background:black;
  7 | 	}
  8 | .st0{font-family:'FootlightMTLight';}
  9 | .st1{font-size:83.0285px;}
 10 | .st2{fill:gray;}
 11 | 
 12 | svg{
 13 |   width: 500px;
 14 |     height: 400px;
 15 |     text-align: center;
 16 |     /* fill: #ff6a00; */
 17 |     fill: #ff06ac;
 18 | }
 19 | path#XMLID_5_ {
 20 |     fill: #ff06ac;
 21 |     /* fill: #ff6a00; */
 22 |     filter: url(#blurFilter4);
 23 | }
 24 | path#XMLID_11_ ,path#XMLID_2_ {
 25 |     fill: #ff06ac;
 26 |     /* fill: #ff6a00; */
 27 | }
 28 | .circle{
 29 |   animation: out 2s infinite ease-out;
 30 |   fill: #ff06ac;
 31 |   /* fill: #ff6a00; */
 32 | }
 33 | 
 34 | #container{
 35 |   text-align:center;
 36 | }
 37 | .message{
 38 |   /* color: #ff6a00; */
 39 |   color: #ff06ac;
 40 | }
 41 | .message:after{
 42 | 	content:"]";
 43 | }
 44 | .message:before{
 45 | 	content:"[";
 46 | }
 47 | 
 48 | .message:after, .message:before {
 49 |   color: #ff06ac;
 50 |   /* color: #ff6a00; */
 51 |   font-size: 20px;
 52 |   -webkit-animation-name: opacity;
 53 |   -webkit-animation-duration: 2s;
 54 |   -webkit-animation-iteration-count: infinite;
 55 |   -webkit-animation-name: opacity;
 56 |           animation-name: opacity;
 57 |   -webkit-animation-duration: 2s;
 58 |           animation-duration: 2s;
 59 |   -webkit-animation-iteration-count: infinite;
 60 |           animation-iteration-count: infinite;
 61 |           margin:0 50px;
 62 | }
 63 | 
 64 | @-webkit-keyframes opacity {
 65 |   0%, 100% {
 66 |     opacity: 0;
 67 |   }
 68 |   50% {
 69 |     opacity: 1;
 70 |   }
 71 | }
 72 | 
 73 | @keyframes opacity {
 74 |   0%, 100% {
 75 |     opacity: 0;
 76 |   }
 77 |   50% {
 78 |     opacity: 1;
 79 |   }
 80 | }
 81 | 
 82 | @keyframes out {
 83 |   0% {r:1;  opacity: 0.9 ;}
 84 |   25%{r:5;  opacity: 0.3 ;}
 85 |   50%{r:10; opacity: 0.2 ;}
 86 |   75%{r:15;opacity:0.1;}
 87 |  	100% {r:20;opacity:0;}
 88 | }
 89 | 
 90 | 
 91 | 
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 |   
 99 |   	
100 | 
101 | 


--------------------------------------------------------------------------------
/Chapter10/www/css/main-page.css:
--------------------------------------------------------------------------------
  1 | /*!
  2 |  * Start Bootstrap - One Page Wonder v5.0.8 (https://startbootstrap.com/theme/one-page-wonder)
  3 |  * Copyright 2013-2020 Start Bootstrap
  4 |  * Licensed under MIT (https://github.com/StartBootstrap/startbootstrap-one-page-wonder/blob/master/LICENSE)
  5 |  */
  6 | 
  7 |  body {
  8 |     font-family: 'Lato';
  9 |   }
 10 |   
 11 |   h1,
 12 |   h2,
 13 |   h3,
 14 |   h4,
 15 |   h5,
 16 |   h6 {
 17 |     font-family: 'Catamaran';
 18 |     font-weight: 800 !important;
 19 |   }
 20 |   
 21 |   .btn-xl {
 22 |     text-transform: uppercase;
 23 |     padding: 1.5rem 3rem;
 24 |     font-size: 0.9rem;
 25 |     font-weight: 700;
 26 |     letter-spacing: 0.1rem;
 27 |   }
 28 |   
 29 |   .bg-black {
 30 |     background-color: #000 !important;
 31 |   }
 32 |   
 33 |   .rounded-pill {
 34 |     border-radius: 5rem;
 35 |   }
 36 |   
 37 |   .navbar-custom {
 38 |     padding-top: 1rem;
 39 |     padding-bottom: 1rem;
 40 |     background-color: rgba(0, 0, 0, 0.7);
 41 |   }
 42 |   
 43 |   .navbar-custom .navbar-brand {
 44 |     text-transform: uppercase;
 45 |     font-size: 1rem;
 46 |     letter-spacing: 0.1rem;
 47 |     font-weight: 700;
 48 |   }
 49 |   
 50 |   .navbar-custom .navbar-nav .nav-item .nav-link {
 51 |     text-transform: uppercase;
 52 |     font-size: 0.8rem;
 53 |     font-weight: 700;
 54 |     letter-spacing: 0.1rem;
 55 |   }
 56 |   
 57 |   header.masthead {
 58 |     position: relative;
 59 |     overflow: hidden;
 60 |     padding-top: calc(7rem + 72px);
 61 |     padding-bottom: 7rem;
 62 |     background: linear-gradient(0deg, #ff6a00 0%, #ee0979 100%);
 63 |     background-repeat: no-repeat;
 64 |     background-position: center center;
 65 |     background-attachment: scroll;
 66 |     background-size: cover;
 67 |   }
 68 |   
 69 |   header.masthead .masthead-content {
 70 |     z-index: 1;
 71 |     position: relative;
 72 |   }
 73 |   
 74 |   header.masthead .masthead-content .masthead-heading {
 75 |     font-size: 4rem;
 76 |   }
 77 |   
 78 |   header.masthead .masthead-content .masthead-subheading {
 79 |     font-size: 2rem;
 80 |   }
 81 |   
 82 |   header.masthead .bg-circle {
 83 |     z-index: 0;
 84 |     position: absolute;
 85 |     border-radius: 100%;
 86 |     background: linear-gradient(0deg, #ee0979 0%, #ff6a00 100%);
 87 |   }
 88 |   
 89 |   header.masthead .bg-circle-1 {
 90 |     height: 90rem;
 91 |     width: 90rem;
 92 |     bottom: -55rem;
 93 |     left: -55rem;
 94 |   }
 95 |   
 96 |   header.masthead .bg-circle-2 {
 97 |     height: 50rem;
 98 |     width: 50rem;
 99 |     top: -25rem;
100 |     right: -25rem;
101 |   }
102 |   
103 |   header.masthead .bg-circle-3 {
104 |     height: 20rem;
105 |     width: 20rem;
106 |     bottom: -10rem;
107 |     right: 5%;
108 |   }
109 |   
110 |   header.masthead .bg-circle-4 {
111 |     height: 30rem;
112 |     width: 30rem;
113 |     top: -5rem;
114 |     right: 35%;
115 |   }
116 |   
117 |   @media (min-width: 992px) {
118 |     header.masthead {
119 |       padding-top: calc(10rem + 55px);
120 |       padding-bottom: 10rem;
121 |     }
122 |     header.masthead .masthead-content .masthead-heading {
123 |       font-size: 6rem;
124 |     }
125 |     header.masthead .masthead-content .masthead-subheading {
126 |       font-size: 4rem;
127 |     }
128 |   }
129 |   
130 |   .bg-primary {
131 |     background-color: #ee0979 !important;
132 |   }
133 |   
134 |   .btn-primary {
135 |     background-color: #ee0979;
136 |     /* border-color: #ee0979; */
137 |     border-color: #6600ba;
138 |   }
139 |   
140 |   .btn-primary:active, .btn-primary:focus, .btn-primary:hover {
141 |     background-color: #bd0760 !important;
142 |     border-color: #bd0760 !important;
143 |   }
144 |   
145 |   .btn-primary:focus {
146 |     box-shadow: 0 0 0 0.2rem rgba(238, 9, 121, 0.5);
147 |   }
148 |   
149 |   .btn-secondary {
150 |     background-color: #ff6a00;
151 |     border-color: #ff6a00;
152 |   }
153 |   
154 |   .btn-secondary:active, .btn-secondary:focus, .btn-secondary:hover {
155 |     background-color: #cc5500 !important;
156 |     border-color: #cc5500 !important;
157 |   }
158 |   
159 |   .btn-secondary:focus {
160 |     box-shadow: 0 0 0 0.2rem rgba(255, 106, 0, 0.5);
161 |   }
162 | 
163 |   .modal-header, h4, .close {
164 |     /*background-color: #008CBA;*/
165 |     /* background-color: #6600ba; */
166 |     background-color: #ff06ac;
167 |     color:white !important;
168 |     text-align: center;
169 |     font-size: 30px;
170 |   }
171 |   .modal-footer {
172 |     background-color: #f9f9f9;
173 |   }
174 |   


--------------------------------------------------------------------------------
/Chapter10/www/img/team-work.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Automated-Machine-Learning-on-AWS/fa106b966823211a4588286656b6cf01cc6c14b2/Chapter10/www/img/team-work.jpeg


--------------------------------------------------------------------------------
/Chapter10/www/img/undersea-abalone.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Automated-Machine-Learning-on-AWS/fa106b966823211a4588286656b6cf01cc6c14b2/Chapter10/www/img/undersea-abalone.jpg


--------------------------------------------------------------------------------
/Chapter10/www/img/video-monitoring.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Automated-Machine-Learning-on-AWS/fa106b966823211a4588286656b6cf01cc6c14b2/Chapter10/www/img/video-monitoring.jpeg


--------------------------------------------------------------------------------
/Chapter10/www/robots.txt:
--------------------------------------------------------------------------------
1 | User-agent: *
2 | Disallow: /


--------------------------------------------------------------------------------
/Chapter10/www/scss/_bootstrap-overrides.scss:
--------------------------------------------------------------------------------
 1 | .bg-primary {
 2 |   background-color: $primary !important;
 3 | }
 4 | 
 5 | .btn-primary {
 6 |   background-color: $primary;
 7 |   border-color: $primary;
 8 |   &:active,
 9 |   &:focus,
10 |   &:hover {
11 |     background-color: darken($primary, 10%) !important;
12 |     border-color: darken($primary, 10%) !important;
13 |   }
14 |   &:focus {
15 |     box-shadow: 0 0 0 0.2rem fade-out($primary, 0.5);
16 |   }
17 | }
18 | 
19 | .btn-secondary {
20 |   background-color: $secondary;
21 |   border-color: $secondary;
22 |   &:active,
23 |   &:focus,
24 |   &:hover {
25 |     background-color: darken($secondary, 10%) !important;
26 |     border-color: darken($secondary, 10%) !important;
27 |   }
28 |   &:focus {
29 |     box-shadow: 0 0 0 0.2rem fade-out($secondary, 0.5);
30 |   }
31 | }
32 | 


--------------------------------------------------------------------------------
/Chapter10/www/scss/_global.scss:
--------------------------------------------------------------------------------
 1 | body {
 2 |   @include body-font;
 3 | }
 4 | 
 5 | h1,
 6 | h2,
 7 | h3,
 8 | h4,
 9 | h5,
10 | h6 {
11 |   @include heading-font;
12 | }
13 | 
14 | .btn-xl {
15 |   text-transform: uppercase;
16 |   padding: 1.5rem 3rem;
17 |   font-size: 0.9rem;
18 |   font-weight: 700;
19 |   letter-spacing: 0.1rem;
20 | }
21 | 
22 | .bg-black {
23 |   background-color: $black !important;
24 | }
25 | 
26 | .rounded-pill {
27 |   border-radius: 5rem;
28 | }
29 | 


--------------------------------------------------------------------------------
/Chapter10/www/scss/_masthead.scss:
--------------------------------------------------------------------------------
 1 | header.masthead {
 2 |   position: relative;
 3 |   overflow: hidden;
 4 |   padding-top: calc(7rem + 72px);
 5 |   padding-bottom: 7rem;
 6 |   background: linear-gradient(0deg, $secondary 0%, $primary 100%);
 7 |   background-repeat: no-repeat;
 8 |   background-position: center center;
 9 |   background-attachment: scroll;
10 |   background-size: cover;
11 |   .masthead-content {
12 |     z-index: 1;
13 |     position: relative;
14 |     .masthead-heading {
15 |       font-size: 4rem;
16 |     }
17 |     .masthead-subheading {
18 |       font-size: 2rem;
19 |     }
20 |   }
21 |   .bg-circle {
22 |     z-index: 0;
23 |     position: absolute;
24 |     border-radius: 100%;
25 |     background: linear-gradient(0deg, $primary 0%, $secondary 100%);
26 |   }
27 |   .bg-circle-1 {
28 |     height: 90rem;
29 |     width: 90rem;
30 |     bottom: -55rem;
31 |     left: -55rem;
32 |   }
33 |   .bg-circle-2 {
34 |     height: 50rem;
35 |     width: 50rem;
36 |     top: -25rem;
37 |     right: -25rem;
38 |   }
39 |   .bg-circle-3 {
40 |     height: 20rem;
41 |     width: 20rem;
42 |     bottom: -10rem;
43 |     right: 5%;
44 |   }
45 |   .bg-circle-4 {
46 |     height: 30rem;
47 |     width: 30rem;
48 |     top: -5rem;
49 |     right: 35%;
50 |   }
51 | }
52 | 
53 | @media (min-width: 992px) {
54 |   header.masthead {
55 |     padding-top: calc(10rem + 55px);
56 |     padding-bottom: 10rem;
57 |     .masthead-content {
58 |       .masthead-heading {
59 |         font-size: 6rem;
60 |       }
61 |       .masthead-subheading {
62 |         font-size: 4rem;
63 |       }
64 |     }
65 |   }
66 | }
67 | 


--------------------------------------------------------------------------------
/Chapter10/www/scss/_mixins.scss:
--------------------------------------------------------------------------------
1 | @mixin heading-font {
2 |   font-family: 'Catamaran';
3 |   font-weight: 800 !important;
4 | }
5 | 
6 | @mixin body-font {
7 |   font-family: 'Lato';
8 | }
9 | 


--------------------------------------------------------------------------------
/Chapter10/www/scss/_navbar.scss:
--------------------------------------------------------------------------------
 1 | .navbar-custom {
 2 |   padding-top: 1rem;
 3 |   padding-bottom: 1rem;
 4 |   background-color: fade-out($black, 0.3);
 5 |   .navbar-brand {
 6 |     text-transform: uppercase;
 7 |     font-size: 1rem;
 8 |     letter-spacing: 0.1rem;
 9 |     font-weight: 700;
10 |   }
11 |   .navbar-nav {
12 |     .nav-item {
13 |       .nav-link {
14 |         text-transform: uppercase;
15 |         font-size: 0.8rem;
16 |         font-weight: 700;
17 |         letter-spacing: 0.1rem;
18 |       }
19 |     }
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/Chapter10/www/scss/_variables.scss:
--------------------------------------------------------------------------------
 1 | // Variables
 2 | 
 3 | // Restated Bootstrap Variables
 4 | 
 5 | $white: #fff !default;
 6 | $gray-100: #f8f9fa !default;
 7 | $gray-200: #e9ecef !default;
 8 | $gray-300: #dee2e6 !default;
 9 | $gray-400: #ced4da !default;
10 | $gray-500: #adb5bd !default;
11 | $gray-600: #868e96 !default;
12 | $gray-700: #495057 !default;
13 | $gray-800: #343a40 !default;
14 | $gray-900: #212529 !default;
15 | $black: #000 !default;
16 | 
17 | $primary: #ee0979 !default;
18 | $secondary: #ff6a00 !default;
19 | 


--------------------------------------------------------------------------------
/Chapter10/www/scss/one-page-wonder.scss:
--------------------------------------------------------------------------------
 1 | // Core variables and mixins
 2 | @import "variables.scss";
 3 | @import "mixins.scss";
 4 | // Global CSS
 5 | @import "global.scss";
 6 | // Components
 7 | @import "navbar.scss";
 8 | @import "masthead.scss";
 9 | @import "bootstrap-overrides.scss";
10 | 


--------------------------------------------------------------------------------
/Chapter10/www/vendor/bootstrap/css/bootstrap-reboot.css:
--------------------------------------------------------------------------------
  1 | /*!
  2 |  * Bootstrap Reboot v4.5.3 (https://getbootstrap.com/)
  3 |  * Copyright 2011-2020 The Bootstrap Authors
  4 |  * Copyright 2011-2020 Twitter, Inc.
  5 |  * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)
  6 |  * Forked from Normalize.css, licensed MIT (https://github.com/necolas/normalize.css/blob/master/LICENSE.md)
  7 |  */
  8 | *,
  9 | *::before,
 10 | *::after {
 11 |   box-sizing: border-box;
 12 | }
 13 | 
 14 | html {
 15 |   font-family: sans-serif;
 16 |   line-height: 1.15;
 17 |   -webkit-text-size-adjust: 100%;
 18 |   -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
 19 | }
 20 | 
 21 | article, aside, figcaption, figure, footer, header, hgroup, main, nav, section {
 22 |   display: block;
 23 | }
 24 | 
 25 | body {
 26 |   margin: 0;
 27 |   font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
 28 |   font-size: 1rem;
 29 |   font-weight: 400;
 30 |   line-height: 1.5;
 31 |   color: #212529;
 32 |   text-align: left;
 33 |   background-color: #fff;
 34 | }
 35 | 
 36 | [tabindex="-1"]:focus:not(:focus-visible) {
 37 |   outline: 0 !important;
 38 | }
 39 | 
 40 | hr {
 41 |   box-sizing: content-box;
 42 |   height: 0;
 43 |   overflow: visible;
 44 | }
 45 | 
 46 | h1, h2, h3, h4, h5, h6 {
 47 |   margin-top: 0;
 48 |   margin-bottom: 0.5rem;
 49 | }
 50 | 
 51 | p {
 52 |   margin-top: 0;
 53 |   margin-bottom: 1rem;
 54 | }
 55 | 
 56 | abbr[title],
 57 | abbr[data-original-title] {
 58 |   text-decoration: underline;
 59 |   -webkit-text-decoration: underline dotted;
 60 |   text-decoration: underline dotted;
 61 |   cursor: help;
 62 |   border-bottom: 0;
 63 |   -webkit-text-decoration-skip-ink: none;
 64 |   text-decoration-skip-ink: none;
 65 | }
 66 | 
 67 | address {
 68 |   margin-bottom: 1rem;
 69 |   font-style: normal;
 70 |   line-height: inherit;
 71 | }
 72 | 
 73 | ol,
 74 | ul,
 75 | dl {
 76 |   margin-top: 0;
 77 |   margin-bottom: 1rem;
 78 | }
 79 | 
 80 | ol ol,
 81 | ul ul,
 82 | ol ul,
 83 | ul ol {
 84 |   margin-bottom: 0;
 85 | }
 86 | 
 87 | dt {
 88 |   font-weight: 700;
 89 | }
 90 | 
 91 | dd {
 92 |   margin-bottom: .5rem;
 93 |   margin-left: 0;
 94 | }
 95 | 
 96 | blockquote {
 97 |   margin: 0 0 1rem;
 98 | }
 99 | 
100 | b,
101 | strong {
102 |   font-weight: bolder;
103 | }
104 | 
105 | small {
106 |   font-size: 80%;
107 | }
108 | 
109 | sub,
110 | sup {
111 |   position: relative;
112 |   font-size: 75%;
113 |   line-height: 0;
114 |   vertical-align: baseline;
115 | }
116 | 
117 | sub {
118 |   bottom: -.25em;
119 | }
120 | 
121 | sup {
122 |   top: -.5em;
123 | }
124 | 
125 | a {
126 |   color: #007bff;
127 |   text-decoration: none;
128 |   background-color: transparent;
129 | }
130 | 
131 | a:hover {
132 |   color: #0056b3;
133 |   text-decoration: underline;
134 | }
135 | 
136 | a:not([href]):not([class]) {
137 |   color: inherit;
138 |   text-decoration: none;
139 | }
140 | 
141 | a:not([href]):not([class]):hover {
142 |   color: inherit;
143 |   text-decoration: none;
144 | }
145 | 
146 | pre,
147 | code,
148 | kbd,
149 | samp {
150 |   font-family: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
151 |   font-size: 1em;
152 | }
153 | 
154 | pre {
155 |   margin-top: 0;
156 |   margin-bottom: 1rem;
157 |   overflow: auto;
158 |   -ms-overflow-style: scrollbar;
159 | }
160 | 
161 | figure {
162 |   margin: 0 0 1rem;
163 | }
164 | 
165 | img {
166 |   vertical-align: middle;
167 |   border-style: none;
168 | }
169 | 
170 | svg {
171 |   overflow: hidden;
172 |   vertical-align: middle;
173 | }
174 | 
175 | table {
176 |   border-collapse: collapse;
177 | }
178 | 
179 | caption {
180 |   padding-top: 0.75rem;
181 |   padding-bottom: 0.75rem;
182 |   color: #6c757d;
183 |   text-align: left;
184 |   caption-side: bottom;
185 | }
186 | 
187 | th {
188 |   text-align: inherit;
189 |   text-align: -webkit-match-parent;
190 | }
191 | 
192 | label {
193 |   display: inline-block;
194 |   margin-bottom: 0.5rem;
195 | }
196 | 
197 | button {
198 |   border-radius: 0;
199 | }
200 | 
201 | button:focus {
202 |   outline: 1px dotted;
203 |   outline: 5px auto -webkit-focus-ring-color;
204 | }
205 | 
206 | input,
207 | button,
208 | select,
209 | optgroup,
210 | textarea {
211 |   margin: 0;
212 |   font-family: inherit;
213 |   font-size: inherit;
214 |   line-height: inherit;
215 | }
216 | 
217 | button,
218 | input {
219 |   overflow: visible;
220 | }
221 | 
222 | button,
223 | select {
224 |   text-transform: none;
225 | }
226 | 
227 | [role="button"] {
228 |   cursor: pointer;
229 | }
230 | 
231 | select {
232 |   word-wrap: normal;
233 | }
234 | 
235 | button,
236 | [type="button"],
237 | [type="reset"],
238 | [type="submit"] {
239 |   -webkit-appearance: button;
240 | }
241 | 
242 | button:not(:disabled),
243 | [type="button"]:not(:disabled),
244 | [type="reset"]:not(:disabled),
245 | [type="submit"]:not(:disabled) {
246 |   cursor: pointer;
247 | }
248 | 
249 | button::-moz-focus-inner,
250 | [type="button"]::-moz-focus-inner,
251 | [type="reset"]::-moz-focus-inner,
252 | [type="submit"]::-moz-focus-inner {
253 |   padding: 0;
254 |   border-style: none;
255 | }
256 | 
257 | input[type="radio"],
258 | input[type="checkbox"] {
259 |   box-sizing: border-box;
260 |   padding: 0;
261 | }
262 | 
263 | textarea {
264 |   overflow: auto;
265 |   resize: vertical;
266 | }
267 | 
268 | fieldset {
269 |   min-width: 0;
270 |   padding: 0;
271 |   margin: 0;
272 |   border: 0;
273 | }
274 | 
275 | legend {
276 |   display: block;
277 |   width: 100%;
278 |   max-width: 100%;
279 |   padding: 0;
280 |   margin-bottom: .5rem;
281 |   font-size: 1.5rem;
282 |   line-height: inherit;
283 |   color: inherit;
284 |   white-space: normal;
285 | }
286 | 
287 | progress {
288 |   vertical-align: baseline;
289 | }
290 | 
291 | [type="number"]::-webkit-inner-spin-button,
292 | [type="number"]::-webkit-outer-spin-button {
293 |   height: auto;
294 | }
295 | 
296 | [type="search"] {
297 |   outline-offset: -2px;
298 |   -webkit-appearance: none;
299 | }
300 | 
301 | [type="search"]::-webkit-search-decoration {
302 |   -webkit-appearance: none;
303 | }
304 | 
305 | ::-webkit-file-upload-button {
306 |   font: inherit;
307 |   -webkit-appearance: button;
308 | }
309 | 
310 | output {
311 |   display: inline-block;
312 | }
313 | 
314 | summary {
315 |   display: list-item;
316 |   cursor: pointer;
317 | }
318 | 
319 | template {
320 |   display: none;
321 | }
322 | 
323 | [hidden] {
324 |   display: none !important;
325 | }
326 | /*# sourceMappingURL=bootstrap-reboot.css.map */


--------------------------------------------------------------------------------
/Chapter10/www/vendor/bootstrap/css/bootstrap-reboot.min.css:
--------------------------------------------------------------------------------
1 | /*!
2 |  * Bootstrap Reboot v4.5.3 (https://getbootstrap.com/)
3 |  * Copyright 2011-2020 The Bootstrap Authors
4 |  * Copyright 2011-2020 Twitter, Inc.
5 |  * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)
6 |  * Forked from Normalize.css, licensed MIT (https://github.com/necolas/normalize.css/blob/master/LICENSE.md)
7 |  */*,::after,::before{box-sizing:border-box}html{font-family:sans-serif;line-height:1.15;-webkit-text-size-adjust:100%;-webkit-tap-highlight-color:transparent}article,aside,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}body{margin:0;font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,"Noto Sans",sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji";font-size:1rem;font-weight:400;line-height:1.5;color:#212529;text-align:left;background-color:#fff}[tabindex="-1"]:focus:not(:focus-visible){outline:0!important}hr{box-sizing:content-box;height:0;overflow:visible}h1,h2,h3,h4,h5,h6{margin-top:0;margin-bottom:.5rem}p{margin-top:0;margin-bottom:1rem}abbr[data-original-title],abbr[title]{text-decoration:underline;-webkit-text-decoration:underline dotted;text-decoration:underline dotted;cursor:help;border-bottom:0;-webkit-text-decoration-skip-ink:none;text-decoration-skip-ink:none}address{margin-bottom:1rem;font-style:normal;line-height:inherit}dl,ol,ul{margin-top:0;margin-bottom:1rem}ol ol,ol ul,ul ol,ul ul{margin-bottom:0}dt{font-weight:700}dd{margin-bottom:.5rem;margin-left:0}blockquote{margin:0 0 1rem}b,strong{font-weight:bolder}small{font-size:80%}sub,sup{position:relative;font-size:75%;line-height:0;vertical-align:baseline}sub{bottom:-.25em}sup{top:-.5em}a{color:#007bff;text-decoration:none;background-color:transparent}a:hover{color:#0056b3;text-decoration:underline}a:not([href]):not([class]){color:inherit;text-decoration:none}a:not([href]):not([class]):hover{color:inherit;text-decoration:none}code,kbd,pre,samp{font-family:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace;font-size:1em}pre{margin-top:0;margin-bottom:1rem;overflow:auto;-ms-overflow-style:scrollbar}figure{margin:0 0 1rem}img{vertical-align:middle;border-style:none}svg{overflow:hidden;vertical-align:middle}table{border-collapse:collapse}caption{padding-top:.75rem;padding-bottom:.75rem;color:#6c757d;text-align:left;caption-side:bottom}th{text-align:inherit;text-align:-webkit-match-parent}label{display:inline-block;margin-bottom:.5rem}button{border-radius:0}button:focus{outline:1px dotted;outline:5px auto -webkit-focus-ring-color}button,input,optgroup,select,textarea{margin:0;font-family:inherit;font-size:inherit;line-height:inherit}button,input{overflow:visible}button,select{text-transform:none}[role=button]{cursor:pointer}select{word-wrap:normal}[type=button],[type=reset],[type=submit],button{-webkit-appearance:button}[type=button]:not(:disabled),[type=reset]:not(:disabled),[type=submit]:not(:disabled),button:not(:disabled){cursor:pointer}[type=button]::-moz-focus-inner,[type=reset]::-moz-focus-inner,[type=submit]::-moz-focus-inner,button::-moz-focus-inner{padding:0;border-style:none}input[type=checkbox],input[type=radio]{box-sizing:border-box;padding:0}textarea{overflow:auto;resize:vertical}fieldset{min-width:0;padding:0;margin:0;border:0}legend{display:block;width:100%;max-width:100%;padding:0;margin-bottom:.5rem;font-size:1.5rem;line-height:inherit;color:inherit;white-space:normal}progress{vertical-align:baseline}[type=number]::-webkit-inner-spin-button,[type=number]::-webkit-outer-spin-button{height:auto}[type=search]{outline-offset:-2px;-webkit-appearance:none}[type=search]::-webkit-search-decoration{-webkit-appearance:none}::-webkit-file-upload-button{font:inherit;-webkit-appearance:button}output{display:inline-block}summary{display:list-item;cursor:pointer}template{display:none}[hidden]{display:none!important}
8 | /*# sourceMappingURL=bootstrap-reboot.min.css.map */


--------------------------------------------------------------------------------
/Chapter11/Files/cdk/acme_pipeline_stack.py:
--------------------------------------------------------------------------------
  1 | import aws_cdk as cdk
  2 | import aws_cdk.aws_codecommit as codecommit
  3 | import aws_cdk.aws_s3 as s3
  4 | import aws_cdk.pipelines as pipelines
  5 | import aws_cdk.aws_ssm as ssm
  6 | import aws_cdk.aws_ecr as ecr
  7 | import aws_cdk.aws_iam as iam
  8 | from constructs import Construct
  9 | 
 10 | 
 11 | from .stacks.ml_workflow_stack import MLWorkflowStack
 12 | from .stacks.test_application_stack import TestApplicaitonStack
 13 | from .stacks.production_application_stack import ProductionApplicaitonStack
 14 | from .stacks.data_workflow_stack import DataWorkflowStack
 15 | 
 16 | class MLWorkflowStage(cdk.Stage):
 17 |     
 18 |     def __init__(self, scope: Construct, id: str, *, group_name: str, threshold: float, data_bucket_name: str, feature_group_name: str, **kwargs):
 19 |         super().__init__(scope, id, **kwargs)
 20 |         ml_workflow_stack = MLWorkflowStack(
 21 |             self,
 22 |             "MLWorkflowStack",
 23 |             group_name=group_name,
 24 |             threshold=threshold,
 25 |             data_bucket_name=data_bucket_name,
 26 |             feature_group_name=feature_group_name
 27 |         )
 28 |         self.sfn_arn = ml_workflow_stack.sfn_output
 29 | 
 30 | class TestApplicationStage(cdk.Stage):
 31 | 
 32 |     def __init__(self, scope: Construct, id: str, *, model_name: str, **kwargs):
 33 |         super().__init__(scope, id, **kwargs)
 34 |         test_stack = TestApplicaitonStack(self, "TestApplicaitonStack", model_name=model_name)
 35 |         self.cdn_output = test_stack.cdn_output
 36 |         self.api_output = test_stack.api_output
 37 | 
 38 | 
 39 | class ProductionApplicationStage(cdk.Stage):
 40 |     def __init__(self, scope: Construct, id: str, *, model_name: str, **kwargs):
 41 |         super().__init__(scope, id, **kwargs)
 42 |         production_stack = ProductionApplicaitonStack(self, "ProdApplicationStack", model_name=model_name)
 43 |         self.cdn_output = production_stack.cdn_output
 44 |         self.api_output = production_stack.api_output
 45 | 
 46 | 
 47 | class DataWorkflowStage(cdk.Stage):
 48 |     def __init__(self, scope: Construct, id: str, *, airflow_environment_name: str, data_bucket_name: str, pipeline_name: str, **kwargs):
 49 |         super().__init__(scope, id, **kwargs)
 50 |         data_workflow_stack = DataWorkflowStack(self, "DataWorkflowStack", airflow_environment_name=airflow_environment_name, data_bucket_name=data_bucket_name, pipeline_name=pipeline_name)
 51 | 
 52 | 
 53 | class PipelineStack(cdk.Stack):
 54 | 
 55 |     def __init__(self, scope: Construct, id: str, *, model_name: str=None, group_name: str=None, repo_name: str=None, feature_group: str=None, threshold: float=None, cdk_version: str=None, **kwargs) -> None:
 56 |         super().__init__(scope, id, **kwargs)
 57 | 
 58 |         self.code_repo = codecommit.Repository(
 59 |             self,
 60 |             "Source-Repository",
 61 |             repository_name=repo_name,
 62 |             description="ACME Web Application Source Code Repository"
 63 |         )
 64 |         cdk.CfnOutput(
 65 |             self,
 66 |             "Clone-URL",
 67 |             description="CodeCommit Clone URL",
 68 |             value=self.code_repo.repository_clone_url_http
 69 |         )
 70 | 
 71 |         self.data_bucket = s3.Bucket(
 72 |             self,
 73 |             "Data-Bucket",
 74 |             bucket_name=f"data-{self.region}-{self.account}",
 75 |             block_public_access=s3.BlockPublicAccess.BLOCK_ALL,
 76 |             auto_delete_objects=True,
 77 |             removal_policy=cdk.RemovalPolicy.DESTROY,
 78 |             versioned=True
 79 |         )
 80 | 
 81 |         ssm.StringParameter(
 82 |             self,
 83 |             "Data-Bucket-Parameter",
 84 |             parameter_name="DataBucket",
 85 |             description="SSM Parameter for the S3 Data Bucket Name",
 86 |             string_value=self.data_bucket.bucket_name
 87 |         )
 88 | 
 89 |         ssm.StringParameter(
 90 |             self,
 91 |             "Feature-Group-Parameter",
 92 |             parameter_name="FeatureGroup",
 93 |             description="SSM Paramater for the SageMaker Feature Store group",
 94 |             string_value=feature_group
 95 |         )
 96 | 
 97 |         source_artifact = pipelines.CodePipelineSource.code_commit(
 98 |             repository=self.code_repo,
 99 |             branch="main"
100 |         )
101 | 
102 |         ml_workflow_stage = MLWorkflowStage(
103 |             self,
104 |             "Build-MLWorkflow",
105 |             data_bucket_name=self.data_bucket.bucket_name,
106 |             group_name=group_name,
107 |             threshold=threshold,
108 |             feature_group_name=feature_group
109 |         )
110 | 
111 |         test_stage = TestApplicationStage(
112 |             self,
113 |             "Test-Deployment",
114 |             model_name=model_name
115 |         )
116 | 
117 |         prod_stage = ProductionApplicationStage(
118 |             self,
119 |             "Production-Deployment",
120 |             model_name=model_name
121 |         )
122 | 
123 |         data_workflow_stage = DataWorkflowStage(
124 |             self,
125 |             "Build-DataWorkflow",
126 |             airflow_environment_name="acme-airflow-environment",
127 |             data_bucket_name=self.data_bucket.bucket_name,
128 |             pipeline_name="ACME-WebApp-Pipeline"
129 |         )
130 | 
131 |         pipeline = pipelines.CodePipeline(
132 |             self,
133 |             "Application-Pipeline",
134 |             pipeline_name="ACME-WebApp-Pipeline",
135 |             self_mutation=True,
136 |             cli_version=cdk_version,
137 |             synth=pipelines.ShellStep(
138 |                 "Synth",
139 |                 input=source_artifact,
140 |                 commands=[
141 |                     "printenv",
142 |                     f"npm install -g aws-cdk@{cdk_version}",
143 |                     "python -m pip install --upgrade pip",
144 |                     "pip install -r requirements.txt",
145 |                     "cdk synth"
146 |                 ]
147 |             )
148 |         )
149 |         pipeline.add_stage(
150 |             ml_workflow_stage,
151 |             post=[
152 |                 pipelines.CodeBuildStep(
153 |                     "Execute-MLWorkflow",
154 |                     input=source_artifact,
155 |                     commands=[
156 |                         "python3 ./scripts/invoke.py"
157 |                     ],
158 |                     env_from_cfn_outputs={
159 |                         "STATEMACHINE_ARN": ml_workflow_stage.sfn_arn
160 |                     },
161 |                     env={
162 |                         "MODEL_NAME": model_name,
163 |                         "PIPELINE_NAME": "ACME-WebApp-Pipeline",
164 |                         "STAGE_NAME": "Build-MLWorkflow",
165 |                         "ACTION_NAME": "Execute-MLWorkflow",
166 |                         "DATA_BUCKET": self.data_bucket.bucket_name
167 |                     },
168 |                     role_policy_statements=[
169 |                         iam.PolicyStatement(
170 |                             actions=[
171 |                                 "states:ListStateMachine",
172 |                                 "states:DescribeStateMachine",
173 |                                 "states:DescribeExecution",
174 |                                 "states:ListExecutions",
175 |                                 "states:GetExecutionHistory",
176 |                                 "states:StartExecution",
177 |                                 "states:StopExecution"
178 |                             ],
179 |                             effect=iam.Effect.ALLOW,
180 |                             resources=["*"]
181 |                         )
182 |                     ]
183 |                 )
184 |             ]
185 |         )
186 |         pipeline.add_stage(
187 |             test_stage,
188 |             post=[
189 |                 pipelines.ShellStep(
190 |                     "System-Tests",
191 |                     input=source_artifact,
192 |                     commands=[
193 |                         "pip install -r ./tests/requirements.txt",
194 |                         "pytest ./tests/system_tests.py"
195 |                     ],
196 |                     env_from_cfn_outputs={
197 |                         "WEBSITE_URL": test_stage.cdn_output,
198 |                         "API_URL": test_stage.api_output
199 |                     }
200 |                 )
201 |             ]
202 |         )
203 |         pipeline.add_stage(prod_stage)
204 |         pipeline.add_stage(data_workflow_stage)
205 | 


--------------------------------------------------------------------------------
/Chapter11/Files/cdk/test_application_stack.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from time import strftime
  3 | import aws_cdk as cdk
  4 | import aws_cdk.aws_s3 as s3
  5 | import aws_cdk.aws_cloudfront as cloudfront
  6 | import aws_cdk.aws_iam as iam
  7 | import aws_cdk.aws_s3_deployment as s3_deployment
  8 | import aws_cdk.aws_lambda as lambda_
  9 | import aws_cdk.aws_apigatewayv2_alpha as httpgw
 10 | import aws_cdk.aws_apigatewayv2_integrations_alpha as integrations
 11 | import aws_cdk.aws_sagemaker as sagemaker
 12 | import aws_cdk.custom_resources as cr
 13 | from constructs import Construct
 14 | 
 15 | class TestApplicaitonStack(cdk.Stack):
 16 | 
 17 |     def __init__(self, scope: Construct, id: str, *, model_name: str=None, **kwargs) -> None:
 18 |         super().__init__(scope, id, **kwargs)
 19 | 
 20 |         endpoint_name = f"{model_name}-test-endpoint"
 21 | 
 22 |         sagemaker_test_role = iam.Role(
 23 |             self,
 24 |             "SageMaker-TestRole",
 25 |             assumed_by=iam.CompositePrincipal(
 26 |                 iam.ServicePrincipal("sagemaker.amazonaws.com")
 27 |             ),
 28 |             managed_policies=[
 29 |                 iam.ManagedPolicy.from_aws_managed_policy_name("AmazonSageMakerFullAccess"),
 30 |                 iam.ManagedPolicy.from_aws_managed_policy_name("AmazonS3FullAccess")
 31 |             ]
 32 |         )
 33 | 
 34 |         model = sagemaker.CfnModel(
 35 |             self,
 36 |             "Test-Model",
 37 |             execution_role_arn=sagemaker_test_role.role_arn,
 38 |             primary_container=sagemaker.CfnModel.ContainerDefinitionProperty(
 39 |                 model_package_name=cr.AwsCustomResource(
 40 |                     self,
 41 |                     "Get-ModelPackage-Parameter",
 42 |                     on_create=cr.AwsSdkCall(
 43 |                         action="getParameter",
 44 |                         service="SSM",
 45 |                         parameters={
 46 |                             "Name": "ModelPackageName"
 47 |                         },
 48 |                         physical_resource_id=cr.PhysicalResourceId.of(strftime("%Y%m%d%H%M%S"))
 49 |                     ),
 50 |                     on_update=cr.AwsSdkCall(
 51 |                         action="getParameter",
 52 |                         service="SSM",
 53 |                         parameters={
 54 |                             "Name": "ModelPackageName"
 55 |                         },
 56 |                         physical_resource_id=cr.PhysicalResourceId.of(strftime("%Y%m%d%H%M%S"))
 57 |                     ),
 58 |                     policy=cr.AwsCustomResourcePolicy.from_sdk_calls(
 59 |                         resources=cr.AwsCustomResourcePolicy.ANY_RESOURCE
 60 |                     )
 61 |                 ).get_response_field("Parameter.Value")
 62 |             )
 63 |         )
 64 | 
 65 |         endpoint_config = sagemaker.CfnEndpointConfig(
 66 |             self,
 67 |             "Test-EndpointConfig",
 68 |             production_variants=[
 69 |                 sagemaker.CfnEndpointConfig.ProductionVariantProperty(
 70 |                     initial_instance_count=1,
 71 |                     initial_variant_weight=1.0,
 72 |                     instance_type="ml.t2.large",
 73 |                     model_name=model.attr_model_name,
 74 |                     variant_name="AllTraffic"
 75 |                 )
 76 |             ]
 77 |         )
 78 | 
 79 |         endpoint = sagemaker.CfnEndpoint(
 80 |             self,
 81 |             "Test-Endpoint",
 82 |             endpoint_config_name=endpoint_config.attr_endpoint_config_name,
 83 |             endpoint_name=endpoint_name
 84 |         )
 85 |         endpoint.add_depends_on(endpoint_config)
 86 | 
 87 |         static_bucket = s3.Bucket(
 88 |             self,
 89 |             "Static-Bucket",
 90 |             removal_policy=cdk.RemovalPolicy.DESTROY
 91 |         )
 92 | 
 93 |         origin = cloudfront.OriginAccessIdentity(
 94 |             self,
 95 |             "Bucket-Origin",
 96 |             comment="Origin associated with ACME website static content Bucket"
 97 |         )
 98 | 
 99 |         static_bucket.grant_read(
100 |             iam.CanonicalUserPrincipal(
101 |                 origin.cloud_front_origin_access_identity_s3_canonical_user_id
102 |             )
103 |         )
104 | 
105 |         form_lambda = lambda_.DockerImageFunction(
106 |             self,
107 |             "Form-Lambda",
108 |             code=lambda_.DockerImageCode.from_image_asset(
109 |                 os.path.join(os.path.dirname(__file__),
110 |                 "../../lambda/formHandler"
111 |                 )
112 |             ),
113 |             environment={
114 |                 "sagemakerEndpoint": endpoint.attr_endpoint_name
115 |             },
116 |             memory_size=512,
117 |             timeout=cdk.Duration.seconds(120)
118 |         )
119 |         form_lambda.add_to_role_policy(
120 |             iam.PolicyStatement(
121 |                 actions=[
122 |                     "sagemaker:InvokeEndpoint"
123 |                 ],
124 |                 effect=iam.Effect.ALLOW,
125 |                 resources=["*"]
126 |             )
127 |         )
128 | 
129 |         api = httpgw.HttpApi(
130 |             self,
131 |             "Form-API",
132 |             cors_preflight={
133 |                 "allow_origins": ["*"],
134 |                 "allow_methods": [httpgw.HttpMethod.POST],
135 |                 "allow_headers": ["*"]
136 |             }
137 |         )
138 |         api.add_routes(
139 |             path="/api/contact",
140 |             methods=[httpgw.HttpMethod.POST],
141 |             integration=integrations.HttpLambdaIntegration(
142 |                 "ContactForm-Integration",
143 |                 handler=form_lambda
144 |             )
145 |         )
146 |         api.add_routes(
147 |             path="/api/predict",
148 |             methods=[httpgw.HttpMethod.POST],
149 |             integration=integrations.HttpLambdaIntegration(
150 |                 "PredictForm-Integration",
151 |                 handler=form_lambda
152 |             )
153 |         )
154 | 
155 |         cdn = cloudfront.CloudFrontWebDistribution(
156 |             self,
157 |             "CloudFront-CDN",
158 |             comment="CDN for the ACME website",
159 |             origin_configs=[
160 |                 cloudfront.SourceConfiguration(
161 |                     custom_origin_source=cloudfront.CustomOriginConfig(
162 |                         domain_name=f"{api.http_api_id}.execute-api.{cdk.Aws.REGION}.amazonaws.com"
163 |                     ),
164 |                     behaviors=[
165 |                         cloudfront.Behavior(
166 |                             allowed_methods=cloudfront.CloudFrontAllowedMethods.ALL,
167 |                             default_ttl=cdk.Duration.seconds(0),
168 |                             forwarded_values={
169 |                                 "query_string": True,
170 |                                 "headers": ["Authorization"]
171 |                             },
172 |                             path_pattern="/api/*"
173 |                         )
174 |                     ]
175 |                 ),
176 |                 cloudfront.SourceConfiguration(
177 |                     s3_origin_source=cloudfront.S3OriginConfig(
178 |                         s3_bucket_source=static_bucket,
179 |                         origin_access_identity=origin
180 |                     ),
181 |                     behaviors=[
182 |                         cloudfront.Behavior(
183 |                             is_default_behavior=True,
184 |                             default_ttl=cdk.Duration.seconds(0),
185 |                             compress=True
186 |                         )
187 |                     ]
188 |                 )
189 |             ],
190 |             default_root_object="index.html",
191 |             enable_ip_v6=True,
192 |             http_version=cloudfront.HttpVersion.HTTP2,
193 |             price_class=cloudfront.PriceClass.PRICE_CLASS_100,
194 |             viewer_protocol_policy=cloudfront.ViewerProtocolPolicy.REDIRECT_TO_HTTPS
195 |         )
196 | 
197 |         s3_deployment.BucketDeployment(
198 |             self,
199 |             "Deploy-Website",
200 |             sources=[
201 |                 s3_deployment.Source.asset(os.path.join(os.path.dirname(__file__), "../../www"))
202 |             ],
203 |             destination_bucket=static_bucket,
204 |             distribution=cdn,
205 |             retain_on_delete=False
206 |         )
207 | 
208 |         self.cdn_output = cdk.CfnOutput(
209 |             self,
210 |             "CloudFront-URL",
211 |             value=f"http://{cdn.distribution_domain_name}"
212 |         )
213 | 
214 |         self.api_output = cdk.CfnOutput(
215 |             self,
216 |             "Form-API-URL",
217 |             value=api.url
218 |         )
219 | 


--------------------------------------------------------------------------------
/Chapter11/Files/lambda/createBaseline/index.py:
--------------------------------------------------------------------------------
  1 | import io
  2 | import json
  3 | import os
  4 | import logging
  5 | import boto3
  6 | from botocore.exceptions import ClientError
  7 | from urllib.parse import urlparse
  8 | from datetime import datetime
  9 | 
 10 | 
 11 | s3 = boto3.resource("s3")
 12 | sm = boto3.client("sagemaker")
 13 | logger = logging.getLogger()
 14 | logger.setLevel(logging.INFO)
 15 | image_map = {
 16 |     "us-east-1": "156813124566.dkr.ecr.us-east-1.amazonaws.com/sagemaker-model-monitor-analyzer",
 17 |     "us-east-2": "777275614652.dkr.ecr.us-east-2.amazonaws.com/sagemaker-model-monitor-analyzer",
 18 |     "us-west-1": "890145073186.dkr.ecr.us-west-1.amazonaws.com/sagemaker-model-monitor-analyzer",
 19 |     "us-west-2": "159807026194.dkr.ecr.us-west-2.amazonaws.com/sagemaker-model-monitor-analyzer",
 20 |     "af-south-1": "875698925577.dkr.ecr.af-south-1.amazonaws.com/sagemaker-model-monitor-analyzer",
 21 |     "ap-east-1": "001633400207.dkr.ecr.ap-east-1.amazonaws.com/sagemaker-model-monitor-analyzer",
 22 |     "ap-northeast-1": "574779866223.dkr.ecr.ap-northeast-1.amazonaws.com/sagemaker-model-monitor-analyzer",
 23 |     "ap-northeast-2": "709848358524.dkr.ecr.ap-northeast-2.amazonaws.com/sagemaker-model-monitor-analyzer",
 24 |     "ap-south-1": "126357580389.dkr.ecr.ap-south-1.amazonaws.com/sagemaker-model-monitor-analyzer",
 25 |     "ap-southeast-1": "245545462676.dkr.ecr.ap-southeast-1.amazonaws.com/sagemaker-model-monitor-analyzer",
 26 |     "ap-southeast-2": "563025443158.dkr.ecr.ap-southeast-2.amazonaws.com/sagemaker-model-monitor-analyzer",
 27 |     "ca-central-1": "536280801234.dkr.ecr.ca-central-1.amazonaws.com/sagemaker-model-monitor-analyzer",
 28 |     "cn-north-1": "453000072557.dkr.ecr.cn-north-1.amazonaws.com/sagemaker-model-monitor-analyzer",
 29 |     "cn-northwest-1": "453252182341.dkr.ecr.cn-northwest-1.amazonaws.com/sagemaker-model-monitor-analyzer",
 30 |     "eu-central-1": "048819808253.dkr.ecr.eu-central-1.amazonaws.com/sagemaker-model-monitor-analyzer",
 31 |     "eu-north-1": "895015795356.dkr.ecr.eu-north-1.amazonaws.com/sagemaker-model-monitor-analyzer",
 32 |     "eu-south-1": "933208885752.dkr.ecr.eu-south-1.amazonaws.com/sagemaker-model-monitor-analyzer",
 33 |     "eu-west-1": "468650794304.dkr.ecr.eu-west-1.amazonaws.com/sagemaker-model-monitor-analyzer",
 34 |     "eu-west-2": "749857270468.dkr.ecr.eu-west-2.amazonaws.com/sagemaker-model-monitor-analyzer",
 35 |     "eu-west-3": "680080141114.dkr.ecr.eu-west-3.amazonaws.com/sagemaker-model-monitor-analyzer",
 36 |     "me-south-1": "607024016150.dkr.ecr.me-south-1.amazonaws.com/sagemaker-model-monitor-analyzer",
 37 |     "sa-east-1": "539772159869.dkr.ecr.sa-east-1.amazonaws.com/sagemaker-model-monitor-analyzer"
 38 | }
 39 | 
 40 | 
 41 | def lambda_handler(event, context):
 42 |     logger.info("Received Event: {}".format(json.dumps(event, indent=2)))
 43 |     props = event["ResourceProperties"]
 44 |     source_bucket = urlparse(props["BaselineSourceUri"]).netloc
 45 |     source_key = urlparse(props["BaselineSourceUri"]).path.lstrip("/")
 46 |     logs_bucket = props["LogsBucketName"]
 47 | 
 48 |     if event["RequestType"] != "Delete":
 49 |         logger.info(f"Copying data from {source_bucket} to {logs_bucket}.")
 50 |         try:
 51 |             s3.meta.client.copy({"Bucket": source_bucket, "Key": source_key}, logs_bucket, "baselining/data/baseline.csv")
 52 |         except ClientError as e:
 53 |             error_message = e.response["Error"]["Message"]
 54 |             logger.error(error_message)
 55 |             raise Exception(error_message)
 56 |         
 57 |         request = {
 58 |             'ProcessingJobName': f'abalone-baseline-{datetime.utcnow():%Y-%m-%d-%H%M}',
 59 |             'Environment': {
 60 |                 'analysis_type': 'MODEL_QUALITY',
 61 |                 'dataset_format': '{"csv": {"header": true, "output_columns_position": "START"}}',
 62 |                 'dataset_source': '/opt/ml/processing/input/baseline_dataset_input',
 63 |                 'ground_truth_attribute': 'label',
 64 |                 'inference_attribute': 'prediction',
 65 |                 'output_path': '/opt/ml/processing/output',
 66 |                 'problem_type': 'Regression',
 67 |                 'publish_cloudwatch_metrics': 'Disabled'
 68 |             },
 69 |             'AppSpecification': {
 70 |                 'ImageUri': image_map[os.environ['AWS_DEFAULT_REGION']]
 71 |             },
 72 |             'ProcessingInputs': [
 73 |                 {
 74 |                     'InputName': 'baseline_dataset_input',
 75 |                     'AppManaged': False,
 76 |                     'S3Input': {
 77 |                         'LocalPath': '/opt/ml/processing/input/baseline_dataset_input',
 78 |                         'S3Uri': f's3://{logs_bucket}/baselining/data/baseline.csv',
 79 |                         'S3DataDistributionType': 'FullyReplicated',
 80 |                         'S3DataType': 'S3Prefix',
 81 |                         'S3InputMode': 'File',
 82 |                         'S3CompressionType': 'None'
 83 |                     }
 84 |                 }
 85 |             ],
 86 |             'ProcessingOutputConfig': {
 87 |                 'Outputs': [
 88 |                     {
 89 |                         'OutputName': 'monitoring_output',
 90 |                         'AppManaged': False,
 91 |                         'S3Output': {
 92 |                             'LocalPath': '/opt/ml/processing/output',
 93 |                             'S3Uri': f's3://{logs_bucket}/baselining/results',
 94 |                             'S3UploadMode': 'EndOfJob'
 95 |                         }
 96 |                     }
 97 |                 ]
 98 |             },
 99 |             'ProcessingResources': {
100 |                 'ClusterConfig': {
101 |                     'InstanceCount': 1,
102 |                     'InstanceType': 'ml.m5.xlarge',
103 |                     'VolumeSizeInGB': 20
104 |                 }
105 |             },
106 |             'RoleArn': props['RoleArn'],
107 |             'StoppingCondition': {
108 |                 'MaxRuntimeInSeconds': 1800
109 |             }
110 |         }
111 | 
112 | 
113 |         logger.info(f'Creating Basline Suggestion Job: {request["ProcessingJobName"]}')
114 |         try:
115 |             response = sm.create_processing_job(**request)
116 |             return {
117 |                 "PhysicalResourceId": response["ProcessingJobArn"],
118 |                 "Data": {
119 |                     "ProcessingJobName": request["ProcessingJobName"],
120 |                     "BaselineResultsUri": f"s3://{logs_bucket}/baselining/results"
121 |                 }
122 |             }
123 |         except ClientError as e:
124 |             error_message = e.response["Error"]["Message"]
125 |             logger.error(error_message)
126 |             raise Exception(error_message)
127 | 


--------------------------------------------------------------------------------
/Chapter11/Files/lambda/formHandler/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM public.ecr.aws/lambda/python:3.8
2 | COPY index.py requirements.txt ./
3 | RUN pip3 install -r requirements.txt
4 | CMD ["index.lambda_handler"]


--------------------------------------------------------------------------------
/Chapter11/Files/lambda/formHandler/index.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import logging
  3 | import json
  4 | import boto3
  5 | from botocore.exceptions import ClientError
  6 | from http import HTTPStatus
  7 | import numpy as np
  8 | import pandas as pd
  9 | from sklearn.preprocessing import normalize
 10 | 
 11 | sm = boto3.client("sagemaker-runtime")
 12 | logger = logging.getLogger()
 13 | logger.setLevel(logging.INFO)
 14 | 
 15 | def lambda_handler(request, context):
 16 |     logger.info(f"Processing HTTP API Request: {json.dumps(request, indent=2)}")
 17 |     if request["requestContext"]["http"]["method"] == "POST":
 18 |         response_code, response_body = handle_request(request)
 19 |         return generate_response(request, response_body, response_code)
 20 |     else:
 21 |         logger.info("Request is not using POST method")
 22 |         return generate_response(request, json.dumps({"message:" "Unsupported method."}), HTTPStatus.BAD_REQUEST)
 23 | 
 24 | 
 25 | def generate_response(request, response_body, response_code):
 26 |     logger.info("Generating response:")
 27 |     response = {
 28 |         "body": response_body,
 29 |         "isBase64Encoded": request["isBase64Encoded"],
 30 |         "headers": request["headers"],
 31 |         "statusCode": response_code
 32 |     }
 33 |     logger.info(json.dumps(response, indent=2))
 34 |     return response
 35 | 
 36 | 
 37 | def handle_request(request):
 38 |     if request["rawPath"] == "/api/contact":
 39 |         logger.info("Processing Contact Form request.")
 40 |         return handle_contact(request)
 41 |     elif request["rawPath"] == "/api/predict":
 42 |         logger.info("Processing Prediction Form request.")
 43 |         return handle_predict(request)
 44 |     else:
 45 |         logger.info("Request outside of scope.")
 46 |         return HTTPStatus.BAD_REQUEST, json.dumps({"message": "Unsupported path."})
 47 | 
 48 | 
 49 | def handle_contact(request):
 50 |     email = json.loads(request["body"])["email"]
 51 |     return HTTPStatus.OK, json.dumps(
 52 |         {
 53 |             "message": f"<b>Thank you!</b> We\'ve received your message from <b>{email}</b> and, we will respond shortly."
 54 |         }
 55 |     )
 56 | 
 57 | 
 58 | def handle_predict(request):
 59 |     df = pd.json_normalize(json.loads(request["body"]))
 60 |     logger.info(f"Received Request Body: {df}")
 61 |     s_pre = df["sex"][0]
 62 |     s_post = handle_encoding(s_pre)
 63 |     x = df.drop(columns=["sex"], axis=1)
 64 |     x_pre = x.to_numpy()
 65 |     x_post = normalize(x_pre).tolist()[0]
 66 |     payload = ",".join(map(str, x_post+s_post))
 67 |     logger.info(f"SageMaker Request Payload: {payload}")
 68 |     try:
 69 |         if ("inference-id" in request["headers"]):
 70 |             inference_id = request["headers"]["inference-id"]
 71 |             logger.info(f"Invoking SageMaker Endpoint with Ground Truth Inference ID: {inference_id}")
 72 |             response = sm.invoke_endpoint(
 73 |                 EndpointName=os.environ["sagemakerEndpoint"],
 74 |                 ContentType="text/csv",
 75 |                 Body=payload,
 76 |                 InferenceId=inference_id
 77 |             )
 78 |         else:
 79 |             logger.info("Invoking SageMaker Enspoint with no Ground Truth Inference ID")
 80 |             response = sm.invoke_endpoint(
 81 |                 EndpointName=os.environ["sagemakerEndpoint"],
 82 |                 ContentType="text/csv",
 83 |                 Body=payload
 84 |             )
 85 |         logger.debug(f"Sagemaker Response: {response}")
 86 |         prediction = response["Body"].read().decode("utf-8").split(".")[0]
 87 |         logger.info(f"SageMaker Endpoint Prediction: {prediction}")
 88 |         logger.debug(type(prediction))
 89 |         rings = round(int(prediction))
 90 |         age = rings + 1.5
 91 |         return HTTPStatus.OK, json.dumps(
 92 |             {
 93 |                 "message": f"We\'ve calcuated that the Abalone has <b>{rings}</b> rings, and is therefore approximately <b>{age}</b> years old."
 94 |             }
 95 |         )
 96 |     
 97 |     except ClientError as e:
 98 |         error_message = e.response["Error"]["Message"]
 99 |         logger.error(error_message)
100 |         return HTTPStatus.OK, json.dumps(
101 |             {
102 |                 "message": "<b>Age Calculator Unavailable!</b> Please try again later."
103 |             }
104 |         )
105 | 
106 | 
107 | def handle_encoding(sex):
108 |     if sex == "M" or sex == "m":
109 |         return [0., 0., 1.0]
110 |     elif sex == "F" or sex == "f":
111 |         return [1.0, 0., 0.]
112 |     elif sex == "I" or sex == "i":
113 |         return [0., 1.0, 0.]
114 | 


--------------------------------------------------------------------------------
/Chapter11/Files/lambda/formHandler/requirements.txt:
--------------------------------------------------------------------------------
1 | scikit-learn==0.23.2
2 | pandas==1.1.4
3 | numpy==1.20.2
4 | boto3==1.17.58


--------------------------------------------------------------------------------
/Chapter11/Files/scripts/invoke.py:
--------------------------------------------------------------------------------
 1 | import boto3
 2 | import os
 3 | import json
 4 | import time
 5 | import sys
 6 | import logging
 7 | 
 8 | sfn = boto3.client("stepfunctions")
 9 | logger = logging.getLogger()
10 | log_format = "%(levelname)s: [%(filename)s:%(lineno)s] %(message)s"
11 | logging.basicConfig(format=log_format, level=os.environ.get("LOGLEVEL", "INFO").upper())
12 | logger.info(f'Invoking ML Workflow: {os.environ["STATEMACHINE_ARN"]}')
13 | execution_arn = sfn.start_execution(
14 |     stateMachineArn=os.environ['STATEMACHINE_ARN'],
15 |     input=json.dumps(
16 |         {
17 |             "input": {
18 |                 "model_name": os.environ["MODEL_NAME"],
19 |                 "pipeline_name": os.environ["PIPELINE_NAME"],
20 |                 "stage_name": os.environ["STAGE_NAME"],
21 |                 "action_name": os.environ["ACTION_NAME"],
22 |                 "data_bucket": os.environ["DATA_BUCKET"]
23 |             }
24 |         }
25 |     )
26 | )["executionArn"]
27 | status = sfn.describe_execution(executionArn=execution_arn)["status"]
28 | while status == "RUNNING":
29 |     time.sleep(60)
30 |     logger.info("ML Workflow Status: {}".format(status))
31 |     status = sfn.describe_execution(executionArn=execution_arn)["status"]
32 | if status == "SUCCEEDED":
33 |     logger.info("ML Workflow Exection: {}".format(status))
34 |     sys.exit(0)
35 | else:
36 |     error_message = "ML Workflow execution: {}".format(status)
37 |     logger.error(error_message)
38 |     sys.exit(255)
39 | 


--------------------------------------------------------------------------------
/Chapter11/Files/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Automated-Machine-Learning-on-AWS/fa106b966823211a4588286656b6cf01cc6c14b2/Chapter11/Files/tests/__init__.py


--------------------------------------------------------------------------------
/Chapter11/Files/tests/requirements.txt:
--------------------------------------------------------------------------------
1 | requests
2 | pytest


--------------------------------------------------------------------------------
/Chapter11/Files/tests/system_tests.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import requests
 3 | import json
 4 | 
 5 | test_data = {
 6 |     'length': '0.455',
 7 |     'diameter': '0.365',
 8 |     'height': '0.095',
 9 |     'whole_weight': '0.514',
10 |     'shucked_weight': '0.2245',
11 |     'viscera_weight': '0.101',
12 |     'shell_weight': '0.15',
13 |     'sex': 'M'
14 | }
15 | 
16 | 
17 | def test_website():
18 |     with requests.get(os.environ["WEBSITE_URL"]) as response:
19 |         assert response.status_code == 200
20 |         assert response.headers["Content-Type"] == "text/html"
21 | 
22 | def test_prediction():
23 |     with requests.post(os.environ["API_URL"]+"api/predict", json=test_data) as response:
24 |         assert response.status_code == 200
25 |         assert response.headers["Content-Type"] == "application/json"
26 |         assert "We've calcuated that the Abalone has" in json.loads(response.content)["message"]
27 | 
28 | def test_errors():
29 |     with requests.get(os.environ["API_URL"]+"api/predict") as response:
30 |         assert response.status_code == 404
31 |         assert json.loads(response.content)["message"] == "Not Found"
32 |     with requests.post(os.environ["API_URL"]+"api/predict") as response:
33 |         assert response.status_code == 500
34 | 


--------------------------------------------------------------------------------
/Chapter11/Notebook/Simulating New Abalone Survey Data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Creating new `abalone` data using `CTGAN`\n",
  8 |     ">__NOTE:__ Recommend using the _Python 3 (Data Science)_ kernel, using an _ml.m5.4xlarge (16vCPU + 64MB)_ Instance Type. However, this will incur additional AWS usage costs."
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "metadata": {},
 14 |    "source": [
 15 |     "## Install `ctgan`"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": null,
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "%%capture\n",
 25 |     "!pip install ctgan \"s3fs<=0.4\""
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "## Load the Required Libraries"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": null,
 38 |    "metadata": {},
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "import io\n",
 42 |     "import boto3\n",
 43 |     "import warnings\n",
 44 |     "import pandas as pd\n",
 45 |     "from time import gmtime, strftime\n",
 46 |     "\n",
 47 |     "warnings.filterwarnings(\"ignore\")\n",
 48 |     "s3 = boto3.client(\"s3\")\n",
 49 |     "model_name = \"abalone\"\n",
 50 |     "column_names = [\n",
 51 |     "    \"sex\",\n",
 52 |     "    \"length\",\n",
 53 |     "    \"diameter\",\n",
 54 |     "    \"height\",\n",
 55 |     "    \"whole_weight\",\n",
 56 |     "    \"shucked_weight\",\n",
 57 |     "    \"viscera_weight\",\n",
 58 |     "    \"shell_weight\",\n",
 59 |     "    \"rings\"\n",
 60 |     "]"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {},
 66 |    "source": [
 67 |     "## Load the \"raw\" data"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {},
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "column_names = [\"sex\", \"length\", \"diameter\", \"height\", \"whole_weight\", \"shucked_weight\", \"viscera_weight\", \"shell_weight\", \"rings\"]\n",
 77 |     "abalone_data = pd.read_csv(\"http://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data\", names=column_names)"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "metadata": {},
 83 |    "source": [
 84 |     "## Fit the CTGAN Model on the `sex` target label\n",
 85 |     "\n",
 86 |     ">__NOTE:__ Fitting the `ctgan` model can up to 5 minutes, depending on the Kernel compute resources."
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": null,
 92 |    "metadata": {},
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "from ctgan import CTGAN\n",
 96 |     "\n",
 97 |     "ctgan = CTGAN()\n",
 98 |     "ctgan.fit(abalone_data, [\"sex\"])"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "markdown",
103 |    "metadata": {},
104 |    "source": [
105 |     "## Generate `100` samples of \"new\" data\n",
106 |     ">__NOTE:__ `100` new samples are used to realistially simulate the potential amount of new daily survey data"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "metadata": {},
113 |    "outputs": [],
114 |    "source": [
115 |     "samples = ctgan.sample(100)"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "markdown",
120 |    "metadata": {},
121 |    "source": [
122 |     "## Compare Datasets\n",
123 |     "### `raw` dataset"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": null,
129 |    "metadata": {},
130 |    "outputs": [],
131 |    "source": [
132 |     "abalone_data.describe()"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "markdown",
137 |    "metadata": {},
138 |    "source": [
139 |     "### `new` dataset"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": null,
145 |    "metadata": {},
146 |    "outputs": [],
147 |    "source": [
148 |     "samples.describe()"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "markdown",
153 |    "metadata": {},
154 |    "source": [
155 |     "## Upload the new data to test the Airflow DAG"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": null,
161 |    "metadata": {},
162 |    "outputs": [],
163 |    "source": [
164 |     "region_name = boto3.session.Session().region_name\n",
165 |     "data_bucket = f\"\"\"{boto3.client(\"ssm\", region_name=region_name).get_parameter(Name=\"DataBucket\")[\"Parameter\"][\"Value\"]}\"\"\"\n",
166 |     "new_data_key = f\"{model_name}_data/abalone.new\"\n",
167 |     "samples.to_csv(f\"s3://{data_bucket}/{new_data_key}\", header=False, index=False)"
168 |    ]
169 |   }
170 |  ],
171 |  "metadata": {
172 |   "instance_type": "ml.m5.4xlarge",
173 |   "kernelspec": {
174 |    "display_name": "Python 3 (Data Science)",
175 |    "language": "python",
176 |    "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/datascience-1.0"
177 |   },
178 |   "language_info": {
179 |    "codemirror_mode": {
180 |     "name": "ipython",
181 |     "version": 3
182 |    },
183 |    "file_extension": ".py",
184 |    "mimetype": "text/x-python",
185 |    "name": "python",
186 |    "nbconvert_exporter": "python",
187 |    "pygments_lexer": "ipython3",
188 |    "version": "3.7.10"
189 |   }
190 |  },
191 |  "nbformat": 4,
192 |  "nbformat_minor": 4
193 | }
194 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Packt
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ### [Packt Conference : Put Generative AI to work on Oct 11-13 (Virtual)](https://packt.link/JGIEY)
 3 | 
 4 | <b><p align='center'>[![Packt Conference](https://hub.packtpub.com/wp-content/uploads/2023/08/put-generative-ai-to-work-packt.png)](https://packt.link/JGIEY)</p></b> 
 5 | 3 Days, 20+ AI Experts, 25+ Workshops and Power Talks 
 6 | 
 7 | Code: <b>USD75OFF</b>
 8 | 
 9 | 
10 | 
11 | 
12 | # Automated Machine Learning on AWS
13 | 
14 | <a href="https://www.packtpub.com/product/automated-machine-learning-on-aws/9781801811828?utm_source=github&utm_medium=repository&utm_campaign=9781801811828"><img src="https://static.packt-cdn.com/products/9781801811828/cover/smaller" alt="Automated Machine Learning on AWS" height="256px" align="right"></a>
15 | 
16 | This is the code repository for [Automated Machine Learning on AWS](https://www.packtpub.com/product/automated-machine-learning-on-aws/9781801811828?utm_source=github&utm_medium=repository&utm_campaign=9781801811828), published by Packt.
17 | 
18 | **Fast-track the development of your production-ready machine learning applications the AWS way**
19 | 
20 | ## What is this book about?
21 | AWS provides a wide range of solutions to help automate a machine learning workflow with just a few lines of code. With this practical book, you'll learn how to automate a machine learning pipeline using the various AWS services.
22 | Automated Machine Learning on AWS begins with a quick overview of what the machine learning pipeline/process looks like and highlights the typical challenges that you may face when building a pipeline.
23 | 
24 | This book covers the following exciting features: 
25 | * Employ SageMaker Autopilot and Amazon SageMaker SDK to automate the machine learning process
26 | * Understand how to use AutoGluon to automate complicated model building tasks
27 | * Use the AWS CDK to codify the machine learning process
28 | * Create, deploy, and rebuild a CI/CD pipeline on AWS
29 | * Build an ML workflow using AWS Step Functions and the Data Science SDK
30 | * Leverage the Amazon SageMaker Feature Store to automate the machine learning software development life cycle (MLSDLC)
31 | * Discover how to use Amazon MWAA for a data-centric ML process
32 | 
33 | For supplemental content that covers Generative AI on AWS, as well as updates to AWS capabilities, such as SageMaker Pipelines, and advanced features for production ML model monitoring, take a look at [www.automatedmlonaws.com](https://www.automatedmlonaws.com/).
34 | 
35 | If you feel this book is for you, get your [copy](https://www.amazon.com/dp/1801811822) today!
36 | 
37 | <a href="https://www.packtpub.com/?utm_source=github&utm_medium=banner&utm_campaign=GitHubBanner"><img src="https://raw.githubusercontent.com/PacktPublishing/GitHub/master/GitHub.png" 
38 | alt="https://www.packtpub.com/" border="5" /></a>
39 | 
40 | 
41 | ## Instructions and Navigations
42 | All of the code is organized into folders.
43 | 
44 | The code will look like the following:
45 | ```
46 | import boto3
47 | import sagemaker
48 | aws_region = sagemaker.Session().boto_session.region_name
49 | !sm-docker build --build-arg REGION={aws_region} 
50 | ```
51 | 
52 | **Following is what you need for this book:**
53 | This book is for the novice as well as experienced machine learning practitioners looking to automate the process of building, training, and deploying machine learning-based solutions into production, using both purpose-built and other AWS services. 
54 | A basic understanding of the end-to-end machine learning process and concepts, Python programming, and AWS is necessary to make the most out of this book.	.	
55 | 
56 | With the following software and hardware list you can run all code files present in the book (Chapter 1-11).
57 | 
58 | ### Software and Hardware List
59 | 
60 | 
61 | | Chapter  | Software required                    | OS required                        |
62 | | -------- | ------------------------------------ | -----------------------------------|
63 | | 1-11	   | Python 3.7.10 (and above)            | Windows, Mac OS X, and Linux (Any) |
64 | | 1-11	   | AWS CLI 1.19.112 (and above)         | Windows, Mac OS X, and Linux (Any) |
65 | | 1-11	   | AWS CDK 2.3.0 (build beaa5b2)        | Windows, Mac OS X, and Linux (Any) |
66 | 
67 | It is recommended that you use an AWS Cloud9 integrated development environment as
68 | it meets the software/hardware and operating system requirements
69 | 
70 | We also provide a PDF file that has color images of the screenshots/diagrams used in this book. [Click here to download it](https://static.packt-cdn.com/downloads/9781801811828_ColorImages.pdf).
71 | 
72 | 
73 | ### Related products <Other books you may enjoy>
74 | * Automated Machine Learning with Microsoft Azure [[Packt]](https://www.packtpub.com/product/automated-machine-learning-with-microsoft-azure/9781800565319?utm_source=github&utm_medium=repository&utm_campaign=9781800565319) [[Amazon]](https://www.amazon.com/dp/B08VJKVS4B)
75 | 
76 | * Learn Amazon SageMaker - Second Edition [[Packt]](https://www.packtpub.com/product/learn-amazon-sagemaker-second-edition/9781801817950?utm_source=github&utm_medium=repository&utm_campaign=9781801817950) [[Amazon]](https://www.amazon.com/dp/B09CQ6MSRY)
77 | 
78 | ## Get to Know the Author
79 | **Trenton Potgieter** 
80 | is a senior AI/ML specialist at AWS and has been working in the field
81 | of ML since 2011. At AWS, he assists multiple AWS customers to create ML solutions
82 | and has contributed to various use cases, broadly spanning computer vision, knowledge
83 | graphs, and ML automation using MLOps methodologies. Trenton plays a key role in
84 | evangelizing the AWS ML services and shares best practices through forums such as
85 | AWS blogs, whitepapers, reference architectures, and public-speaking events. He has
86 | also actively been involved in leading, developing, and supporting an internal AWS
87 | community of MLOps-related subject matter experts.
88 | ### Download a free PDF
89 | 
90 |  <i>If you have already purchased a print or Kindle version of this book, you can get a DRM-free PDF version at no cost.<br>Simply click on the link to claim your free PDF.</i>
91 | <p align="center"> <a href="https://packt.link/free-ebook/9781801811828">https://packt.link/free-ebook/9781801811828 </a> </p>
92 | 


--------------------------------------------------------------------------------