├── DRAFT ├── assets │ └── stackoverflow.jpg ├── minimum_viable_custom_model.ipynb ├── custom_estimator_mnist.ipynb └── keras-bow-model-multi-label-hypertune.ipynb └── housing_prices ├── assets └── TFHierarchy.png ├── cloud-ml-housing-prices-hp-tuning.ipynb └── cloud-ml-housing-prices.ipynb /DRAFT/assets/stackoverflow.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vijaykyr/tensorflow_teaching_examples/HEAD/DRAFT/assets/stackoverflow.jpg -------------------------------------------------------------------------------- /housing_prices/assets/TFHierarchy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vijaykyr/tensorflow_teaching_examples/HEAD/housing_prices/assets/TFHierarchy.png -------------------------------------------------------------------------------- /housing_prices/cloud-ml-housing-prices-hp-tuning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Automatic Hyperparameter tuning\n", 8 | "\n", 9 | "This notebook will show you how to extend the code in the cloud-ml-housing-prices notebook to take advantage of Cloud ML Engine's [automatic hyperparameter tuning](https://cloud.google.com/ml-engine/docs/using-hyperparameter-tuning).\n", 10 | "\n", 11 | "We will use it to determine the ideal number of hidden units to use in our neural network.\n", 12 | "\n", 13 | "Cloud ML Engine uses bayesian optimization to find the hyperparameter settings for you. You can read the details of how it works [here.](https://cloud.google.com/blog/big-data/2017/08/hyperparameter-tuning-in-cloud-machine-learning-engine-using-bayesian-optimization)\n", 14 | "\n" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "### 1) Modify Tensorflow Code\n", 22 | "\n", 23 | "We need to make code changes to:\n", 24 | "1. Expose any hyperparameter we wish to tune as a command line argument (this is how CMLE passes new values)\n", 25 | "2. Modify the output_dir so each hyperparameter 'trial' gets written to a unique directory\n", 26 | "\n", 27 | "These changes are illustrated below. Any change from the original code has a **#NEW** comment next to it for easy reference" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 11, 33 | "metadata": { 34 | "collapsed": false 35 | }, 36 | "outputs": [ 37 | { 38 | "name": "stderr", 39 | "output_type": "stream", 40 | "text": [ 41 | "mkdir: cannot create directory ‘trainer’: File exists\n" 42 | ] 43 | } 44 | ], 45 | "source": [ 46 | "%%bash\n", 47 | "mkdir trainer\n", 48 | "touch trainer/__init__.py" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 43, 54 | "metadata": { 55 | "collapsed": false 56 | }, 57 | "outputs": [ 58 | { 59 | "name": "stdout", 60 | "output_type": "stream", 61 | "text": [ 62 | "Overwriting trainer/task.py\n" 63 | ] 64 | } 65 | ], 66 | "source": [ 67 | "%%writefile trainer/task.py\n", 68 | "\n", 69 | "import argparse\n", 70 | "import pandas as pd\n", 71 | "import tensorflow as tf\n", 72 | "import os #NEW\n", 73 | "import json #NEW\n", 74 | "from tensorflow.contrib.learn.python.learn import learn_runner\n", 75 | "from tensorflow.contrib.learn.python.learn.utils import saved_model_export_utils\n", 76 | "\n", 77 | "print(tf.__version__)\n", 78 | "tf.logging.set_verbosity(tf.logging.ERROR)\n", 79 | "\n", 80 | "data_train = pd.read_csv(\n", 81 | " filepath_or_buffer='https://storage.googleapis.com/vijay-public/boston_housing/housing_train.csv',\n", 82 | " names=[\"CRIM\",\"ZN\",\"INDUS\",\"CHAS\",\"NOX\",\"RM\",\"AGE\",\"DIS\",\"RAD\",\"TAX\",\"PTRATIO\",\"MEDV\"])\n", 83 | "\n", 84 | "data_test = pd.read_csv(\n", 85 | " filepath_or_buffer='https://storage.googleapis.com/vijay-public/boston_housing/housing_test.csv',\n", 86 | " names=[\"CRIM\",\"ZN\",\"INDUS\",\"CHAS\",\"NOX\",\"RM\",\"AGE\",\"DIS\",\"RAD\",\"TAX\",\"PTRATIO\",\"MEDV\"])\n", 87 | "\n", 88 | "FEATURES = [\"CRIM\", \"ZN\", \"INDUS\", \"NOX\", \"RM\",\n", 89 | " \"AGE\", \"DIS\", \"TAX\", \"PTRATIO\"]\n", 90 | "LABEL = \"MEDV\"\n", 91 | "\n", 92 | "feature_cols = [tf.feature_column.numeric_column(k)\n", 93 | " for k in FEATURES] #list of Feature Columns\n", 94 | "\n", 95 | "def generate_estimator(output_dir):\n", 96 | " return tf.estimator.DNNRegressor(feature_columns=feature_cols, \n", 97 | " hidden_units=[args.hidden_units_1, args.hidden_units_2], #NEW (use command line parameters for hidden units)\n", 98 | " model_dir=output_dir)\n", 99 | "\n", 100 | "def generate_input_fn(data_set):\n", 101 | " def input_fn():\n", 102 | " features = {k: tf.constant(data_set[k].values) for k in FEATURES}\n", 103 | " labels = tf.constant(data_set[LABEL].values)\n", 104 | " return features, labels\n", 105 | " return input_fn\n", 106 | "\n", 107 | "def serving_input_fn():\n", 108 | " #feature_placeholders are what the caller of the predict() method will have to provide\n", 109 | " feature_placeholders = {\n", 110 | " column.name: tf.placeholder(column.dtype, [None])\n", 111 | " for column in feature_cols\n", 112 | " }\n", 113 | " \n", 114 | " #features are what we actually pass to the estimator\n", 115 | " features = {\n", 116 | " # Inputs are rank 1 so that we can provide scalars to the server\n", 117 | " # but Estimator expects rank 2, so we expand dimension\n", 118 | " key: tf.expand_dims(tensor, -1)\n", 119 | " for key, tensor in feature_placeholders.items()\n", 120 | " }\n", 121 | " return tf.estimator.export.ServingInputReceiver(\n", 122 | " features, feature_placeholders\n", 123 | " )\n", 124 | "\n", 125 | "train_spec = tf.estimator.TrainSpec(\n", 126 | " input_fn=generate_input_fn(data_train),\n", 127 | " max_steps=3000)\n", 128 | "\n", 129 | "exporter = tf.estimator.LatestExporter('Servo', serving_input_fn)\n", 130 | "\n", 131 | "eval_spec=tf.estimator.EvalSpec(\n", 132 | " input_fn=generate_input_fn(data_test),\n", 133 | " steps=1,\n", 134 | " exporters=exporter)\n", 135 | "\n", 136 | "######START CLOUD ML ENGINE BOILERPLATE######\n", 137 | "if __name__ == '__main__':\n", 138 | " parser = argparse.ArgumentParser()\n", 139 | " # Input Arguments\n", 140 | " parser.add_argument(\n", 141 | " '--output_dir',\n", 142 | " help='GCS location to write checkpoints and export models',\n", 143 | " required=True\n", 144 | " )\n", 145 | " parser.add_argument(\n", 146 | " '--job-dir',\n", 147 | " help='this model ignores this field, but it is required by gcloud',\n", 148 | " default='junk'\n", 149 | " )\n", 150 | " parser.add_argument(\n", 151 | " '--hidden_units_1', #NEW (expose hyperparameter to command line)\n", 152 | " help='number of neurons in first hidden layer',\n", 153 | " type = int,\n", 154 | " default=10\n", 155 | " )\n", 156 | " parser.add_argument(\n", 157 | " '--hidden_units_2', #NEW (expose hyperparameter to command line)\n", 158 | " help='number of neurons in second hidden layer',\n", 159 | " type = int,\n", 160 | " default=10\n", 161 | " )\n", 162 | " args = parser.parse_args()\n", 163 | " arguments = args.__dict__\n", 164 | " output_dir = arguments.pop('output_dir')\n", 165 | " output_dir = os.path.join(#NEW (give each trial its own output_dir)\n", 166 | " output_dir,\n", 167 | " json.loads(\n", 168 | " os.environ.get('TF_CONFIG', '{}')\n", 169 | " ).get('task', {}).get('trial', '')\n", 170 | " )\n", 171 | "######END CLOUD ML ENGINE BOILERPLATE######\n", 172 | "\n", 173 | " #initiate training job\n", 174 | " tf.estimator.train_and_evaluate(generate_estimator(output_dir), train_spec, eval_spec)\n" 175 | ] 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "metadata": {}, 180 | "source": [ 181 | "### 2) Define Hyperparameter Configuration File\n", 182 | "\n", 183 | "Here you specify:\n", 184 | "\n", 185 | "1. Which hyperparamters to tune\n", 186 | "2. The min and max range to search between\n", 187 | "3. The metric to optimize\n", 188 | "4. The number of trials to run" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 40, 194 | "metadata": { 195 | "collapsed": false 196 | }, 197 | "outputs": [ 198 | { 199 | "name": "stdout", 200 | "output_type": "stream", 201 | "text": [ 202 | "Overwriting config.yaml\n" 203 | ] 204 | } 205 | ], 206 | "source": [ 207 | "%%writefile config.yaml\n", 208 | "trainingInput:\n", 209 | " hyperparameters:\n", 210 | " goal: MINIMIZE\n", 211 | " hyperparameterMetricTag: average_loss\n", 212 | " maxTrials: 5\n", 213 | " maxParallelTrials: 1\n", 214 | " params:\n", 215 | " - parameterName: hidden_units_1\n", 216 | " type: INTEGER\n", 217 | " minValue: 1\n", 218 | " maxValue: 100\n", 219 | " scaleType: UNIT_LOG_SCALE\n", 220 | " - parameterName: hidden_units_2\n", 221 | " type: INTEGER\n", 222 | " minValue: 1\n", 223 | " maxValue: 100\n", 224 | " scaleType: UNIT_LOG_SCALE" 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": { 230 | "collapsed": true 231 | }, 232 | "source": [ 233 | "### 3) Train" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 33, 239 | "metadata": { 240 | "collapsed": true 241 | }, 242 | "outputs": [], 243 | "source": [ 244 | "GCS_BUCKET = 'gs://vijays-sandbox-ml' #CHANGE THIS TO YOUR BUCKET\n", 245 | "PROJECT = 'vijays-sandbox' #CHANGE THIS TO YOUR PROJECT ID\n", 246 | "REGION = 'us-central1' #OPTIONALLY CHANGE THIS" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 34, 252 | "metadata": { 253 | "collapsed": true 254 | }, 255 | "outputs": [], 256 | "source": [ 257 | "import os\n", 258 | "os.environ['GCS_BUCKET'] = GCS_BUCKET\n", 259 | "os.environ['PROJECT'] = PROJECT\n", 260 | "os.environ['REGION'] = REGION" 261 | ] 262 | }, 263 | { 264 | "cell_type": "markdown", 265 | "metadata": {}, 266 | "source": [ 267 | "#### Run local\n", 268 | "It's a best practice to first run locally to check for errors. Note you can ignore the warnings in this case, as long as there are no errors." 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": 44, 274 | "metadata": { 275 | "collapsed": false 276 | }, 277 | "outputs": [ 278 | { 279 | "name": "stdout", 280 | "output_type": "stream", 281 | "text": [ 282 | "1.5.0\n" 283 | ] 284 | }, 285 | { 286 | "name": "stderr", 287 | "output_type": "stream", 288 | "text": [ 289 | "/usr/local/lib/python2.7/dist-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n", 290 | " from ._conv import register_converters as _register_converters\n", 291 | "2018-03-13 23:10:57.249216: I tensorflow/core/platform/cpu_feature_guard.cc:137] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA\n" 292 | ] 293 | } 294 | ], 295 | "source": [ 296 | "%%bash\n", 297 | "gcloud ml-engine local train \\\n", 298 | " --module-name=trainer.task \\\n", 299 | " --package-path=trainer \\\n", 300 | " -- \\\n", 301 | " --output_dir='./output'" 302 | ] 303 | }, 304 | { 305 | "cell_type": "markdown", 306 | "metadata": {}, 307 | "source": [ 308 | "#### Run on cloud (1 cloud ML unit)" 309 | ] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": 41, 314 | "metadata": { 315 | "collapsed": false 316 | }, 317 | "outputs": [ 318 | { 319 | "name": "stderr", 320 | "output_type": "stream", 321 | "text": [ 322 | "Updated property [core/project].\n" 323 | ] 324 | } 325 | ], 326 | "source": [ 327 | "%%bash\n", 328 | "gcloud config set project $PROJECT" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": 47, 334 | "metadata": { 335 | "collapsed": false 336 | }, 337 | "outputs": [ 338 | { 339 | "name": "stdout", 340 | "output_type": "stream", 341 | "text": [ 342 | "jobId: housing_180313_232321\n", 343 | "state: QUEUED\n" 344 | ] 345 | }, 346 | { 347 | "name": "stderr", 348 | "output_type": "stream", 349 | "text": [ 350 | "Job [housing_180313_232321] submitted successfully.\n", 351 | "Your job is still active. You may view the status of your job with the command\n", 352 | "\n", 353 | " $ gcloud ml-engine jobs describe housing_180313_232321\n", 354 | "\n", 355 | "or continue streaming the logs with the command\n", 356 | "\n", 357 | " $ gcloud ml-engine jobs stream-logs housing_180313_232321\n" 358 | ] 359 | } 360 | ], 361 | "source": [ 362 | "%%bash\n", 363 | "JOBNAME=housing_$(date -u +%y%m%d_%H%M%S)\n", 364 | "\n", 365 | "gcloud ml-engine jobs submit training $JOBNAME \\\n", 366 | " --region=$REGION \\\n", 367 | " --module-name=trainer.task \\\n", 368 | " --package-path=./trainer \\\n", 369 | " --job-dir=$GCS_BUCKET/$JOBNAME/ \\\n", 370 | " --runtime-version 1.4 \\\n", 371 | " --config config.yaml \\\n", 372 | " -- \\\n", 373 | " --output_dir=$GCS_BUCKET/$JOBNAME/output\n" 374 | ] 375 | }, 376 | { 377 | "cell_type": "markdown", 378 | "metadata": {}, 379 | "source": [ 380 | "### 4) Inspect Results\n", 381 | "\n", 382 | "In cloud console (https://console.cloud.google.com/mlengine/jobs) you will see the output of each trial, which hyperparameters were choosen, and what the resulting loss was. Trials will be shown in order of performance, with the best trial on top." 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": null, 388 | "metadata": { 389 | "collapsed": true 390 | }, 391 | "outputs": [], 392 | "source": [] 393 | } 394 | ], 395 | "metadata": { 396 | "anaconda-cloud": {}, 397 | "kernelspec": { 398 | "display_name": "Python 2", 399 | "language": "python", 400 | "name": "python2" 401 | }, 402 | "language_info": { 403 | "codemirror_mode": { 404 | "name": "ipython", 405 | "version": 2 406 | }, 407 | "file_extension": ".py", 408 | "mimetype": "text/x-python", 409 | "name": "python", 410 | "nbconvert_exporter": "python", 411 | "pygments_lexer": "ipython2", 412 | "version": "2.7.12" 413 | } 414 | }, 415 | "nbformat": 4, 416 | "nbformat_minor": 2 417 | } 418 | -------------------------------------------------------------------------------- /DRAFT/minimum_viable_custom_model.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "colab_type": "text", 7 | "deletable": true, 8 | "editable": true, 9 | "id": "4f3CKqFUqL2-", 10 | "slideshow": { 11 | "slide_type": "slide" 12 | } 13 | }, 14 | "source": [ 15 | "# Custom Estimator" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 27, 21 | "metadata": { 22 | "collapsed": false, 23 | "deletable": true, 24 | "editable": true 25 | }, 26 | "outputs": [ 27 | { 28 | "name": "stdout", 29 | "output_type": "stream", 30 | "text": [ 31 | "1.8.0\n" 32 | ] 33 | } 34 | ], 35 | "source": [ 36 | "import shutil\n", 37 | "import tensorflow as tf\n", 38 | "print(tf.__version__)" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": { 44 | "deletable": true, 45 | "editable": true 46 | }, 47 | "source": [ 48 | "#### Generate Toy Dataset\n", 49 | "X1+X2 = Y" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 28, 55 | "metadata": { 56 | "collapsed": false, 57 | "deletable": true, 58 | "editable": true 59 | }, 60 | "outputs": [ 61 | { 62 | "name": "stdout", 63 | "output_type": "stream", 64 | "text": [ 65 | "Overwriting data_train.csv\n" 66 | ] 67 | } 68 | ], 69 | "source": [ 70 | "%%writefile data_train.csv\n", 71 | "X1,X2,Y\n", 72 | "2,3,5\n", 73 | "1,3,4\n", 74 | "3,-1,2\n", 75 | "4,0,4\n", 76 | "-2,2,0\n", 77 | "2,2,4" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 29, 83 | "metadata": { 84 | "collapsed": false, 85 | "deletable": true, 86 | "editable": true 87 | }, 88 | "outputs": [ 89 | { 90 | "name": "stdout", 91 | "output_type": "stream", 92 | "text": [ 93 | "Overwriting data_eval.csv\n" 94 | ] 95 | } 96 | ], 97 | "source": [ 98 | "%%writefile data_eval.csv\n", 99 | "X1,X2,Y\n", 100 | "3,2,5\n", 101 | "3,1,4\n", 102 | "-2,-1,-2" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": { 108 | "deletable": true, 109 | "editable": true 110 | }, 111 | "source": [ 112 | "#### Input Fn" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 30, 118 | "metadata": { 119 | "collapsed": true, 120 | "deletable": true, 121 | "editable": true 122 | }, 123 | "outputs": [], 124 | "source": [ 125 | "def csv_input_fn(csv_path, batch_size,mode):\n", 126 | " def parse_csv(line):\n", 127 | " CSV_TYPES = [[0.0], [0.0],[0.0]]\n", 128 | " \n", 129 | " fields = tf.decode_csv(line, record_defaults=CSV_TYPES,field_delim=',')\n", 130 | " \n", 131 | " label = fields.pop(-1) #last value is label\n", 132 | " label = tf.expand_dims(label,-1) #to be consistent shape with predictions\n", 133 | " \n", 134 | " #combine features into single tensor\n", 135 | " features = tf.stack(fields,0)\n", 136 | " \n", 137 | " return features, label\n", 138 | " \n", 139 | " # Create a dataset containing the text lines.\n", 140 | " dataset = tf.data.TextLineDataset(csv_path).skip(1) #skip header\n", 141 | "\n", 142 | " # Parse each line.\n", 143 | " dataset = dataset.map(parse_csv)\n", 144 | "\n", 145 | " # Shuffle, repeat, and batch the examples.\n", 146 | " if(mode == tf.estimator.ModeKeys.TRAIN):\n", 147 | " dataset = dataset.shuffle(batch_size*10)\n", 148 | " dataset = dataset.repeat()\n", 149 | " \n", 150 | " dataset = dataset.batch(batch_size)\n", 151 | "\n", 152 | " return dataset" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "#### Custom Estimator" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 31, 165 | "metadata": { 166 | "collapsed": false, 167 | "deletable": true, 168 | "editable": true 169 | }, 170 | "outputs": [], 171 | "source": [ 172 | "# Create the custom estimator\n", 173 | "def custom_estimator(features, labels, mode, params):\n", 174 | " print('custom_estimator: features: {}'.format(features))\n", 175 | " print('custom_estimator: labels:{}'.format(labels))\n", 176 | " \n", 177 | " predictions = tf.layers.dense(features,1,activation=None)\n", 178 | " print('custom_estimator: predictions: {}'.format(predictions))\n", 179 | " \n", 180 | " # 2. Loss function, training/eval ops\n", 181 | " if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:\n", 182 | " loss = tf.losses.mean_squared_error(labels, predictions)\n", 183 | " optimizer = tf.train.FtrlOptimizer(learning_rate=0.1)\n", 184 | " train_op = tf.contrib.layers.optimize_loss(\n", 185 | " loss = loss,\n", 186 | " global_step = tf.train.get_global_step(),\n", 187 | " learning_rate = 0.01,\n", 188 | " optimizer = optimizer)\n", 189 | " \n", 190 | " eval_metric_ops = {\n", 191 | " \"rmse\": tf.metrics.root_mean_squared_error(labels, predictions)\n", 192 | " }\n", 193 | " else:\n", 194 | " loss = None\n", 195 | " train_op = None\n", 196 | " eval_metric_ops = None\n", 197 | " \n", 198 | " predictions_dict = {'predictions':predictions,'features':features}\n", 199 | " \n", 200 | " return tf.estimator.EstimatorSpec(\n", 201 | " mode = mode,\n", 202 | " predictions = predictions_dict,\n", 203 | " loss = loss,\n", 204 | " train_op = train_op,\n", 205 | " eval_metric_ops = eval_metric_ops,\n", 206 | " )" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 32, 212 | "metadata": { 213 | "collapsed": false, 214 | "deletable": true, 215 | "editable": true 216 | }, 217 | "outputs": [], 218 | "source": [ 219 | "# Create custom estimator's train and evaluate function\n", 220 | "def train_and_evaluate(output_dir,args):\n", 221 | " estimator = tf.estimator.Estimator(model_fn=custom_estimator, \n", 222 | " model_dir=output_dir)\n", 223 | " train_spec = tf.estimator.TrainSpec(input_fn= lambda:csv_input_fn(\n", 224 | " args['train_path'],\n", 225 | " args['batch_size'],\n", 226 | " tf.estimator.ModeKeys.TRAIN),\n", 227 | " max_steps = args['train_steps'])\n", 228 | " eval_spec = tf.estimator.EvalSpec(input_fn = lambda:csv_input_fn(\n", 229 | " args['eval_path'], \n", 230 | " args['batch_size'],\n", 231 | " tf.estimator.ModeKeys.EVAL),\n", 232 | " steps = None)\n", 233 | " tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 33, 239 | "metadata": { 240 | "collapsed": false, 241 | "deletable": true, 242 | "editable": true 243 | }, 244 | "outputs": [ 245 | { 246 | "name": "stdout", 247 | "output_type": "stream", 248 | "text": [ 249 | "INFO:tensorflow:Using default config.\n", 250 | "INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_train_distribute': None, '_is_chief': True, '_cluster_spec': , '_evaluation_master': '', '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_ps_replicas': 0, '_tf_random_seed': None, '_master': '', '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_model_dir': 'trained', '_global_id_in_cluster': 0, '_save_summary_steps': 100}\n", 251 | "WARNING:tensorflow:Estimator's model_fn () includes params argument, but params are not passed to Estimator.\n", 252 | "INFO:tensorflow:Running training and evaluation locally (non-distributed).\n", 253 | "INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after 600 secs (eval_spec.throttle_secs) or training is finished.\n", 254 | "INFO:tensorflow:Calling model_fn.\n", 255 | "custom_estimator: features: Tensor(\"IteratorGetNext:0\", shape=(?, 2), dtype=float32)\n", 256 | "custom_estimator: labels:Tensor(\"IteratorGetNext:1\", shape=(?, 1), dtype=float32)\n", 257 | "custom_estimator: predictions: Tensor(\"dense/BiasAdd:0\", shape=(?, 1), dtype=float32)\n", 258 | "INFO:tensorflow:Done calling model_fn.\n", 259 | "INFO:tensorflow:Create CheckpointSaverHook.\n", 260 | "INFO:tensorflow:Graph was finalized.\n", 261 | "INFO:tensorflow:Running local_init_op.\n", 262 | "INFO:tensorflow:Done running local_init_op.\n", 263 | "INFO:tensorflow:Saving checkpoints for 1 into trained/model.ckpt.\n", 264 | "INFO:tensorflow:loss = 14.381937, step = 1\n", 265 | "INFO:tensorflow:Saving checkpoints for 100 into trained/model.ckpt.\n", 266 | "INFO:tensorflow:Loss for final step: 0.16213264.\n", 267 | "INFO:tensorflow:Calling model_fn.\n", 268 | "custom_estimator: features: Tensor(\"IteratorGetNext:0\", shape=(?, 2), dtype=float32)\n", 269 | "custom_estimator: labels:Tensor(\"IteratorGetNext:1\", shape=(?, 1), dtype=float32)\n", 270 | "custom_estimator: predictions: Tensor(\"dense/BiasAdd:0\", shape=(?, 1), dtype=float32)\n", 271 | "INFO:tensorflow:Done calling model_fn.\n", 272 | "INFO:tensorflow:Starting evaluation at 2018-06-29-04:35:59\n", 273 | "INFO:tensorflow:Graph was finalized.\n", 274 | "INFO:tensorflow:Restoring parameters from trained/model.ckpt-100\n", 275 | "INFO:tensorflow:Running local_init_op.\n", 276 | "INFO:tensorflow:Done running local_init_op.\n", 277 | "INFO:tensorflow:Finished evaluation at 2018-06-29-04:36:00\n", 278 | "INFO:tensorflow:Saving dict for global step 100: global_step = 100, loss = 0.04452223, rmse = 0.21100292\n" 279 | ] 280 | } 281 | ], 282 | "source": [ 283 | "args = {\n", 284 | " 'train_path': 'data_train.csv',\n", 285 | " 'eval_path': 'data_eval.csv',\n", 286 | " 'batch_size': 4,\n", 287 | " 'train_steps': 100,\n", 288 | "}\n", 289 | "OUTDIR = 'trained'\n", 290 | "shutil.rmtree(OUTDIR, ignore_errors = True) # start fresh each time\n", 291 | "train_and_evaluate(OUTDIR,args)" 292 | ] 293 | }, 294 | { 295 | "cell_type": "markdown", 296 | "metadata": { 297 | "deletable": true, 298 | "editable": true 299 | }, 300 | "source": [ 301 | "#### Inspect Weights\n", 302 | "\n", 303 | "The tensors named dense/kernel and dense/bias are the weights and bias for the model" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": 34, 309 | "metadata": { 310 | "collapsed": false, 311 | "deletable": true, 312 | "editable": true 313 | }, 314 | "outputs": [ 315 | { 316 | "name": "stdout", 317 | "output_type": "stream", 318 | "text": [ 319 | "tensor_name: dense/kernel\n", 320 | "[[0.37332565]\n", 321 | " [1.1088971 ]]\n", 322 | "tensor_name: dense/bias\n", 323 | "[0.94920313]\n" 324 | ] 325 | } 326 | ], 327 | "source": [ 328 | "from tensorflow.python.tools import inspect_checkpoint\n", 329 | "inspect_checkpoint.print_tensors_in_checkpoint_file(\"custom_estimator_trained_model/model.ckpt-100\", tensor_name='dense/kernel', all_tensors=False)\n", 330 | "inspect_checkpoint.print_tensors_in_checkpoint_file(\"custom_estimator_trained_model/model.ckpt-100\", tensor_name='dense/bias', all_tensors=False)" 331 | ] 332 | }, 333 | { 334 | "cell_type": "markdown", 335 | "metadata": { 336 | "deletable": true, 337 | "editable": true 338 | }, 339 | "source": [ 340 | "#### Get predictions" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": 35, 346 | "metadata": { 347 | "collapsed": false, 348 | "deletable": true, 349 | "editable": true 350 | }, 351 | "outputs": [ 352 | { 353 | "name": "stdout", 354 | "output_type": "stream", 355 | "text": [ 356 | "INFO:tensorflow:Using default config.\n", 357 | "INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_train_distribute': None, '_is_chief': True, '_cluster_spec': , '_evaluation_master': '', '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_ps_replicas': 0, '_tf_random_seed': None, '_master': '', '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_model_dir': 'trained', '_global_id_in_cluster': 0, '_save_summary_steps': 100}\n", 358 | "WARNING:tensorflow:Estimator's model_fn () includes params argument, but params are not passed to Estimator.\n", 359 | "INFO:tensorflow:Calling model_fn.\n", 360 | "custom_estimator: features: Tensor(\"IteratorGetNext:0\", shape=(?, 2), dtype=float32)\n", 361 | "custom_estimator: labels:None\n", 362 | "custom_estimator: predictions: Tensor(\"dense/BiasAdd:0\", shape=(?, 1), dtype=float32)\n", 363 | "INFO:tensorflow:Done calling model_fn.\n", 364 | "INFO:tensorflow:Graph was finalized.\n", 365 | "INFO:tensorflow:Restoring parameters from trained/model.ckpt-100\n", 366 | "INFO:tensorflow:Running local_init_op.\n", 367 | "INFO:tensorflow:Done running local_init_op.\n", 368 | "{'predictions': array([[ 4.748355 ],\n", 369 | " [ 3.998592 ],\n", 370 | " [-1.7349727]], dtype=float32), 'features': array([[ 3., 2.],\n", 371 | " [ 3., 1.],\n", 372 | " [-2., -1.]], dtype=float32)}\n" 373 | ] 374 | } 375 | ], 376 | "source": [ 377 | "#load checkpoint\n", 378 | "estimator = tf.estimator.Estimator(model_fn=custom_estimator, \n", 379 | " model_dir=OUTDIR) \n", 380 | "\n", 381 | "predictions = estimator.predict(\n", 382 | " input_fn = lambda:csv_input_fn(\n", 383 | " args['eval_path'], \n", 384 | " args['batch_size'],\n", 385 | " tf.estimator.ModeKeys.EVAL),\n", 386 | " yield_single_examples=False\n", 387 | " )\n", 388 | "print(predictions.next())" 389 | ] 390 | } 391 | ], 392 | "metadata": { 393 | "colab": { 394 | "default_view": {}, 395 | "name": "first_steps_with_tensor_flow.ipynb", 396 | "provenance": [], 397 | "version": "0.3.2", 398 | "views": {} 399 | }, 400 | "kernelspec": { 401 | "display_name": "Python 2", 402 | "language": "python", 403 | "name": "python2" 404 | }, 405 | "language_info": { 406 | "codemirror_mode": { 407 | "name": "ipython", 408 | "version": 2 409 | }, 410 | "file_extension": ".py", 411 | "mimetype": "text/x-python", 412 | "name": "python", 413 | "nbconvert_exporter": "python", 414 | "pygments_lexer": "ipython2", 415 | "version": "2.7.14" 416 | } 417 | }, 418 | "nbformat": 4, 419 | "nbformat_minor": 0 420 | } 421 | -------------------------------------------------------------------------------- /DRAFT/custom_estimator_mnist.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 133, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "1.6.0\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "from matplotlib import pyplot as plt\n", 18 | "import numpy as np\n", 19 | "import shutil\n", 20 | "import tensorflow as tf\n", 21 | "print(tf.__version__)" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "### Download and explore MNIST data" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": {}, 35 | "outputs": [ 36 | { 37 | "name": "stdout", 38 | "output_type": "stream", 39 | "text": [ 40 | "Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz\n", 41 | "11493376/11490434 [==============================]11493376/11490434 [==============================] - 3s 0us/step\n", 42 | "\n" 43 | ] 44 | } 45 | ], 46 | "source": [ 47 | "data = tf.keras.datasets.mnist.load_data(path='mnist.npz') \n", 48 | "# Tuple of Numpy arrays: ((x_train, y_train), (x_test, y_test))" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 60, 54 | "metadata": { 55 | "collapsed": true 56 | }, 57 | "outputs": [], 58 | "source": [ 59 | "x = data[0][0] #60K 28x28 images\n", 60 | "y = data[0][1]\n", 61 | "x_test = data[1][0] #10K 28x28 images\n", 62 | "y_test = data[1][1]" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "Show amount of data" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 61, 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "name": "stdout", 79 | "output_type": "stream", 80 | "text": [ 81 | "x:(60000, 28, 28)\n", 82 | "y:(60000,)\n", 83 | "x_test:(10000, 28, 28)\n", 84 | "y_test:(10000,)\n" 85 | ] 86 | } 87 | ], 88 | "source": [ 89 | "print('x:{}'.format(x.shape))\n", 90 | "print('y:{}'.format(y.shape))\n", 91 | "print('x_test:{}'.format(x_test.shape))\n", 92 | "print('y_test:{}'.format(y_test.shape))" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "Split into x and y into 80% train and 20% eval.\n", 100 | "\n", 101 | "We'll save x_test and y_test as our hold out data" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 157, 107 | "metadata": {}, 108 | "outputs": [ 109 | { 110 | "name": "stdout", 111 | "output_type": "stream", 112 | "text": [ 113 | "x_train:(48009, 28, 28)\n", 114 | "y_train:(48009,)\n", 115 | "x_eval:(11991, 28, 28)\n", 116 | "y_eval:(11991,)\n", 117 | "x_test:(10000, 28, 28)\n", 118 | "y_test:(10000,)\n" 119 | ] 120 | } 121 | ], 122 | "source": [ 123 | "# Split into train and eval\n", 124 | "msk = np.random.rand(len(x)) < 0.8 #numpy vector of booleans\n", 125 | "x_train = x[msk] #can use an numpy vector to filter a matrix\n", 126 | "y_train = y[msk]\n", 127 | "x_eval = x[~msk]\n", 128 | "y_eval = y[~msk]\n", 129 | "print('x_train:{}'.format(x_train.shape))\n", 130 | "print('y_train:{}'.format(y_train.shape))\n", 131 | "print('x_eval:{}'.format(x_eval.shape))\n", 132 | "print('y_eval:{}'.format(y_eval.shape))\n", 133 | "print('x_test:{}'.format(x_test.shape))\n", 134 | "print('y_test:{}'.format(y_test.shape))" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "Display one training example" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 159, 147 | "metadata": {}, 148 | "outputs": [ 149 | { 150 | "name": "stdout", 151 | "output_type": "stream", 152 | "text": [ 153 | "Label:4\n" 154 | ] 155 | }, 156 | { 157 | "data": { 158 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAADWBJREFUeJzt3W+MHPV9x/HPx8fZjp2gcCa+XsDBkEAkhNRDupg2/CmV\nCSKIyqBEVpBKHAnhPMg/pDyAuq1KlQclUROKmgjpAm5MlUBaJQg/IGnwKQpCRcYHcTBgUggxwY7x\nOTGRTTD+++2DG6IDbmfXu7M7e/6+X5J1u/Ob2flo5M/N7s7e/hwRApDPvLoDAKgH5QeSovxAUpQf\nSIryA0lRfiApyg8kRfmBpCg/kNQpvdzZfC+IhVrcy10CqbyhP+pwHHIr63ZUfttXSbpT0oCkuyPi\n9rL1F2qxLvLKTnYJoMTmmGh53baf9tsekPQtSR+XdL6k622f3+7jAeitTl7zr5D0QkS8GBGHJd0v\naVU1sQB0WyflP0PSyzPu7yyWvYXttbYnbU8e0aEOdgegSl1/tz8ixiNiLCLGBrWg27sD0KJOyr9L\n0rIZ988slgGYAzop/xZJ59o+2/Z8SZ+StLGaWAC6re1LfRFx1PbnJf2Ppi/1rY+IZypLBqCrOrrO\nHxEPSXqooiwAeoiP9wJJUX4gKcoPJEX5gaQoP5AU5QeSovxAUpQfSIryA0lRfiApyg8kRfmBpCg/\nkBTlB5Ki/EBSlB9IivIDSVF+ICnKDyRF+YGkKD+QFOUHkqL8QFKUH0iK8gNJUX4gKcoPJEX5gaQo\nP5BUR7P02t4h6YCkY5KORsRYFaGAKvzxkxc1HPvq1+4q3fYrqz9dOh6TT7eVqZ90VP7CX0fE7yp4\nHAA9xNN+IKlOyx+SNtl+wvbaKgIB6I1On/ZfEhG7bC+V9LDt5yLikZkrFL8U1krSQi3qcHcAqtLR\nmT8idhU/pyQ9IGnFLOuMR8RYRIwNakEnuwNQobbLb3ux7fe8eVvSlZLm/lugQBKdPO0flvSA7Tcf\n53sR8eNKUgHourbLHxEvSvrzCrN01cFV73hF8tbxJQOl40PrH6syDnpgaqzxE9uv7PibHibpT1zq\nA5Ki/EBSlB9IivIDSVF+ICnKDyRVxV/1zQm/vaz899yiD/6h/AHWVxgG1ZhXfnk2PnCw4djKpc+V\nbjvhj7YVaS7hzA8kRfmBpCg/kBTlB5Ki/EBSlB9IivIDSaW5zv/P1/x36fhXt1/ZoySoysAHzyod\nf+6vGn84Y/Txvy3d9v1btrWVaS7hzA8kRfmBpCg/kBTlB5Ki/EBSlB9IivIDSaW5zj/oo3VHQMVO\nufv1trc9+KtTK0wyN3HmB5Ki/EBSlB9IivIDSVF+ICnKDyRF+YGkml7nt71e0jWSpiLigmLZkKTv\nS1ouaYek1RHxavdiNnf8ktHS8UsXPtqjJOiV5Yt/3/a2yzYdqzDJ3NTKmf87kq5627JbJU1ExLmS\nJor7AOaQpuWPiEck7Xvb4lWSNhS3N0i6tuJcALqs3df8wxGxu7j9iqThivIA6JGO3/CLiJAUjcZt\nr7U9aXvyiA51ujsAFWm3/Htsj0hS8XOq0YoRMR4RYxExNqgFbe4OQNXaLf9GSWuK22skPVhNHAC9\n0rT8tu+T9JikD9veaftGSbdL+pjt5yVdUdwHMIc0vc4fEdc3GFpZcZaOvHTNu0rHlw4s6lESVOWU\n5R8oHf/k0Ma2H/tdvy7/WEqGTwHwCT8gKcoPJEX5gaQoP5AU5QeSovxAUifNV3ef8qEDHW3/xnPv\nrSgJqvLyvy0uHb94wfHS8Xv2n9l48A/724l0UuHMDyRF+YGkKD+QFOUHkqL8QFKUH0iK8gNJnTTX\n+Tu1dLL8mjFmN3D6ktLxPZ84r+HY0Oqdpdv+7Lx7mux9YenoXd9q/L2yS/f8b5PHPvlx5geSovxA\nUpQfSIryA0lRfiApyg8kRfmBpLjOXzg4VP57sPwvyztz/NILS8djwKXjL1/ReCakw+8/UrrtvPnl\nX1L9k0v/vXR8sDyaXjnWONs/vnhd6bb7jpd/9mLRvPLsw5sbf8dDw/nlEuHMDyRF+YGkKD+QFOUH\nkqL8QFKUH0iK8gNJNb3Ob3u9pGskTUXEBcWy2yTdJGlvsdq6iHioWyFbceiNwdLx402u7P7HujtK\nxzd+fvSEM7XqliV3l47PU/nF9INxuOHYb4+VXwv/5t7LS8ev2HRz6fh7fz6/dHzkJ3sajvml8r/n\n37u9fNr14YHyzzDElm2l49m1cub/jqSrZll+R0SMFv9qLT6AE9e0/BHxiKR9PcgCoIc6ec3/BdtP\n2V5v+7TKEgHoiXbLf5ekcySNStot6euNVrS91vak7ckjOtTm7gBUra3yR8SeiDgWEcclfVvSipJ1\nxyNiLCLGBtX4jzwA9FZb5bc9MuPudZKeriYOgF5p5VLffZIul3S67Z2S/knS5bZHNf2XkTskfbaL\nGQF0gSN695fNp3ooLvLKnu1vpl//y1+Wji/7yK4eJTlxe39UMs+8pCXPNL7ePf/HW6qOU5ldt3y0\ndPwXX/xm6fj9r72vdPzeDy874Uxz3eaY0P7Y1+RbFqbxCT8gKcoPJEX5gaQoP5AU5QeSovxAUmm+\nuvvsv3us7ghtG9Fv6o7QFYsu29t8pRL/8NNPlI6fp8c7evyTHWd+ICnKDyRF+YGkKD+QFOUHkqL8\nQFKUH0gqzXV+nHzOepCJtjvBmR9IivIDSVF+ICnKDyRF+YGkKD+QFOUHkqL8QFKUH0iK8gNJUX4g\nKcoPJEX5gaQoP5AU5QeSavr3/LaXSbpX0rCkkDQeEXfaHpL0fUnLJe2QtDoiXu1eVGQz4PJz06vn\nDZaO/9mPqkxz8mnlzH9U0pcj4nxJfyHpc7bPl3SrpImIOFfSRHEfwBzRtPwRsTsinixuH5C0XdIZ\nklZJ2lCstkHStd0KCaB6J/Sa3/ZySRdK2ixpOCJ2F0OvaPplAYA5ouXy2363pB9Iujki9s8ci4jQ\n9PsBs2231vak7ckjOtRRWADVaan8tgc1XfzvRsQPi8V7bI8U4yOSpmbbNiLGI2IsIsYGtaCKzAAq\n0LT8ti3pHknbI+IbM4Y2SlpT3F4j6cHq4wHolla+uvtiSTdI2mZ7a7FsnaTbJf2X7RslvSRpdXci\nIqtjcbx8BT6l0pGm5Y+IRyW5wfDKauMA6BV+dwJJUX4gKcoPJEX5gaQoP5AU5QeSYopuzFmvf+T1\nuiPMaZz5gaQoP5AU5QeSovxAUpQfSIryA0lRfiAprvOjbzX76m50hqMLJEX5gaQoP5AU5QeSovxA\nUpQfSIryA0lxnR+1ObTpfaXjx0abfG8/OsKZH0iK8gNJUX4gKcoPJEX5gaQoP5AU5QeSckSUr2Av\nk3SvpGFJIWk8Iu60fZukmyTtLVZdFxEPlT3WqR6Ki8ys3kC3bI4J7Y99bmXdVj7kc1TSlyPiSdvv\nkfSE7YeLsTsi4l/bDQqgPk3LHxG7Je0ubh+wvV3SGd0OBqC7Tug1v+3lki6UtLlY9AXbT9leb/u0\nBtustT1pe/KIDnUUFkB1Wi6/7XdL+oGkmyNiv6S7JJ0jaVTTzwy+Ptt2ETEeEWMRMTaoBRVEBlCF\nlspve1DTxf9uRPxQkiJiT0Qci4jjkr4taUX3YgKoWtPy27akeyRtj4hvzFg+MmO16yQ9XX08AN3S\nyrv9F0u6QdI221uLZeskXW97VNOX/3ZI+mxXEgLoilbe7X9U0mzXDUuv6QPob3zCD0iK8gNJUX4g\nKcoPJEX5gaQoP5AU5QeSovxAUpQfSIryA0lRfiApyg8kRfmBpCg/kFTTr+6udGf2XkkvzVh0uqTf\n9SzAienXbP2aSyJbu6rMdlZElM99Xuhp+d+xc3syIsZqC1CiX7P1ay6JbO2qKxtP+4GkKD+QVN3l\nH695/2X6NVu/5pLI1q5astX6mh9Afeo+8wOoSS3lt32V7V/afsH2rXVkaMT2DtvbbG+1PVlzlvW2\np2w/PWPZkO2HbT9f/Jx1mrSast1me1dx7LbavrqmbMts/9T2s7afsf2lYnmtx64kVy3HredP+20P\nSPo/SR+TtFPSFknXR8SzPQ3SgO0dksYiovZrwrYvk/SapHsj4oJi2dck7YuI24tfnKdFxC19ku02\nSa/VPXNzMaHMyMyZpSVdK+kzqvHYleRarRqOWx1n/hWSXoiIFyPisKT7Ja2qIUffi4hHJO172+JV\nkjYUtzdo+j9PzzXI1hciYndEPFncPiDpzZmlaz12JblqUUf5z5D08oz7O9VfU36HpE22n7C9tu4w\nsxgupk2XpFckDdcZZhZNZ27upbfNLN03x66dGa+rxht+73RJRIxK+rikzxVPb/tSTL9m66fLNS3N\n3Nwrs8ws/Sd1Hrt2Z7yuWh3l3yVp2Yz7ZxbL+kJE7Cp+Tkl6QP03+/CeNydJLX5O1ZznT/pp5ubZ\nZpZWHxy7fprxuo7yb5F0ru2zbc+X9ClJG2vI8Q62FxdvxMj2YklXqv9mH94oaU1xe42kB2vM8hb9\nMnNzo5mlVfOx67sZryOi5/8kXa3pd/x/Jenv68jQINc5kn5R/Hum7myS7tP008Ajmn5v5EZJSyRN\nSHpe0iZJQ32U7T8lbZP0lKaLNlJTtks0/ZT+KUlbi39X133sSnLVctz4hB+QFG/4AUlRfiApyg8k\nRfmBpCg/kBTlB5Ki/EBSlB9I6v8BZOIGXzoUqLUAAAAASUVORK5CYII=\n", 159 | "text/plain": [ 160 | "" 161 | ] 162 | }, 163 | "metadata": {}, 164 | "output_type": "display_data" 165 | } 166 | ], 167 | "source": [ 168 | "example = 1 #try changing this to see new data\n", 169 | "print('Label:{}'.format(y_train[example]))\n", 170 | "plt.imshow(x_train[example])\n", 171 | "plt.show()" 172 | ] 173 | }, 174 | { 175 | "cell_type": "markdown", 176 | "metadata": {}, 177 | "source": [ 178 | "### Define input functions" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 180, 184 | "metadata": {}, 185 | "outputs": [], 186 | "source": [ 187 | "train_input_fn = tf.estimator.inputs.numpy_input_fn(\n", 188 | " x={\"x\": x_train.astype('float32')}, #TODO: figure out why this needs to be float32. float16 spins forever, int gives error\n", 189 | " y=y_train.astype('int32'),\n", 190 | " batch_size=100,\n", 191 | " num_epochs=None, #can i change this to 1 and specify epochs at train time?\n", 192 | " shuffle=True)" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 181, 198 | "metadata": {}, 199 | "outputs": [], 200 | "source": [ 201 | "eval_input_fn = tf.estimator.inputs.numpy_input_fn(\n", 202 | " x={\"x\": x_eval.astype('float32')},\n", 203 | " y=y_eval.astype('int32'),\n", 204 | " num_epochs=1,\n", 205 | " shuffle=False)" 206 | ] 207 | }, 208 | { 209 | "cell_type": "markdown", 210 | "metadata": {}, 211 | "source": [ 212 | "### Define model function" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 183, 218 | "metadata": { 219 | "collapsed": true 220 | }, 221 | "outputs": [], 222 | "source": [ 223 | "#Implementation from: https://www.tensorflow.org/tutorials/layers#building_the_cnn_mnist_classifier\n", 224 | "def cnn_model_fn(features, labels, mode):\n", 225 | " \"\"\"Model function for CNN.\"\"\"\n", 226 | " # Input Layer\n", 227 | " input_layer = tf.expand_dims(features[\"x\"], -1)\n", 228 | "\n", 229 | " # Convolutional Layer #1\n", 230 | " conv1 = tf.layers.conv2d(\n", 231 | " inputs=input_layer,\n", 232 | " filters=32,\n", 233 | " kernel_size=[5, 5],\n", 234 | " padding=\"same\",\n", 235 | " activation=tf.nn.relu)\n", 236 | "\n", 237 | " # Pooling Layer #1\n", 238 | " pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)\n", 239 | "\n", 240 | " # Convolutional Layer #2 and Pooling Layer #2\n", 241 | " conv2 = tf.layers.conv2d(\n", 242 | " inputs=pool1,\n", 243 | " filters=64,\n", 244 | " kernel_size=[5, 5],\n", 245 | " padding=\"same\",\n", 246 | " activation=tf.nn.relu)\n", 247 | " pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)\n", 248 | "\n", 249 | " # Dense Layer\n", 250 | " pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])\n", 251 | " dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)\n", 252 | " dropout = tf.layers.dropout(\n", 253 | " inputs=dense, rate=0.4, training= (mode == tf.estimator.ModeKeys.TRAIN))\n", 254 | "\n", 255 | " # Logits Layer\n", 256 | " logits = tf.layers.dense(inputs=dropout, units=10)\n", 257 | "\n", 258 | " predictions = {\n", 259 | " # Generate predictions (for PREDICT and EVAL mode)\n", 260 | " \"classes\": tf.argmax(input=logits, axis=1),\n", 261 | " # Add `softmax_tensor` to the graph. It is used for PREDICT and by the\n", 262 | " # `logging_hook`.\n", 263 | " \"probabilities\": tf.nn.softmax(logits, name=\"softmax_tensor\")\n", 264 | " }\n", 265 | "\n", 266 | " if mode == tf.estimator.ModeKeys.PREDICT:\n", 267 | " return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)\n", 268 | "\n", 269 | " # Calculate Loss (for both TRAIN and EVAL modes)\n", 270 | " loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)\n", 271 | "\n", 272 | " # Configure the Training Op (for TRAIN mode)\n", 273 | " if mode == tf.estimator.ModeKeys.TRAIN:\n", 274 | " optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)\n", 275 | " train_op = optimizer.minimize(\n", 276 | " loss=loss,\n", 277 | " global_step=tf.train.get_global_step())\n", 278 | " return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)\n", 279 | "\n", 280 | " # Add evaluation metrics (for EVAL mode)\n", 281 | " if mode == tf.estimator.ModeKeys.EVAL:\n", 282 | " eval_metric_ops = {\n", 283 | " \"accuracy\": tf.metrics.accuracy(\n", 284 | " labels=labels, predictions=predictions[\"classes\"])}\n", 285 | " return tf.estimator.EstimatorSpec(\n", 286 | " mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)" 287 | ] 288 | }, 289 | { 290 | "cell_type": "markdown", 291 | "metadata": {}, 292 | "source": [ 293 | "### Instantiate Estimator" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": 184, 299 | "metadata": {}, 300 | "outputs": [ 301 | { 302 | "name": "stdout", 303 | "output_type": "stream", 304 | "text": [ 305 | "INFO:tensorflow:Using default config.\n", 306 | "INFO:tensorflow:Using config: {'_model_dir': 'trained_custom', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': , '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}\n" 307 | ] 308 | } 309 | ], 310 | "source": [ 311 | "OUTDIR = \"trained_custom\"\n", 312 | "mnist_classifier = tf.estimator.Estimator(\n", 313 | " model_fn=cnn_model_fn, model_dir=OUTDIR)" 314 | ] 315 | }, 316 | { 317 | "cell_type": "markdown", 318 | "metadata": {}, 319 | "source": [ 320 | "### Train and Eval" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": 185, 326 | "metadata": {}, 327 | "outputs": [ 328 | { 329 | "name": "stdout", 330 | "output_type": "stream", 331 | "text": [ 332 | "INFO:tensorflow:Calling model_fn.\n", 333 | "INFO:tensorflow:Done calling model_fn.\n", 334 | "INFO:tensorflow:Create CheckpointSaverHook.\n", 335 | "INFO:tensorflow:Graph was finalized.\n", 336 | "INFO:tensorflow:Running local_init_op.\n", 337 | "INFO:tensorflow:Done running local_init_op.\n", 338 | "INFO:tensorflow:Saving checkpoints for 1 into trained_custom/model.ckpt.\n", 339 | "INFO:tensorflow:loss = 43.540283, step = 1\n", 340 | "INFO:tensorflow:Saving checkpoints for 50 into trained_custom/model.ckpt.\n", 341 | "INFO:tensorflow:Loss for final step: 1.2389966.\n", 342 | "INFO:tensorflow:Calling model_fn.\n", 343 | "INFO:tensorflow:Done calling model_fn.\n", 344 | "INFO:tensorflow:Starting evaluation at 2018-03-16-02:57:42\n", 345 | "INFO:tensorflow:Graph was finalized.\n", 346 | "INFO:tensorflow:Restoring parameters from trained_custom/model.ckpt-50\n", 347 | "INFO:tensorflow:Running local_init_op.\n", 348 | "INFO:tensorflow:Done running local_init_op.\n", 349 | "INFO:tensorflow:Finished evaluation at 2018-03-16-02:57:49\n", 350 | "INFO:tensorflow:Saving dict for global step 50: accuracy = 0.8248687, global_step = 50, loss = 0.5431505\n", 351 | "{'accuracy': 0.8248687, 'loss': 0.5431505, 'global_step': 50}\n" 352 | ] 353 | } 354 | ], 355 | "source": [ 356 | "shutil.rmtree(OUTDIR, ignore_errors = True) # start fresh each time\n", 357 | "mnist_classifier.train(\n", 358 | " input_fn=train_input_fn,\n", 359 | " steps=50) #example uses 20000\n", 360 | "eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)\n", 361 | "print(eval_results)" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": null, 367 | "metadata": { 368 | "collapsed": true 369 | }, 370 | "outputs": [], 371 | "source": [] 372 | } 373 | ], 374 | "metadata": { 375 | "kernelspec": { 376 | "display_name": "Python 3", 377 | "language": "python", 378 | "name": "python3" 379 | }, 380 | "language_info": { 381 | "codemirror_mode": { 382 | "name": "ipython", 383 | "version": 3 384 | }, 385 | "file_extension": ".py", 386 | "mimetype": "text/x-python", 387 | "name": "python", 388 | "nbconvert_exporter": "python", 389 | "pygments_lexer": "ipython3", 390 | "version": "3.6.2" 391 | } 392 | }, 393 | "nbformat": 4, 394 | "nbformat_minor": 2 395 | } 396 | -------------------------------------------------------------------------------- /housing_prices/cloud-ml-housing-prices.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Predicting Housing Prices using Tensorflow + Cloud ML Engine\n", 8 | "\n", 9 | "This notebook will show you how to create a tensorflow model, train it on the cloud in a distributed fashion across multiple CPUs or GPUs, explore the results using Tensorboard, and finally deploy the model for online prediction. We will demonstrate this by building a model to predict housing prices.\n" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": { 16 | "collapsed": false 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "import pandas as pd\n", 21 | "import tensorflow as tf" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": { 28 | "collapsed": false, 29 | "slideshow": { 30 | "slide_type": "-" 31 | } 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "print(tf.__version__)" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "## Tensorflow APIs\n", 43 | "\n", 44 | "(image: https://www.tensorflow.org/images/tensorflow_programming_environment.png)\n", 45 | "\n", 46 | "Tensorflow is a heirarchical framework. The further down the heirarchy you go, the more flexibility you have, but that more code you have to write. A best practice is start at the highest level of abstraction. Then if you need additional flexibility for some reason drop down one layer. \n", 47 | "\n", 48 | "For this tutorial we will be operating at the highest level of Tensorflow abstraction, using the Estimator API." 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "## Steps\n", 56 | "\n", 57 | "1. Load raw data\n", 58 | "\n", 59 | "2. Write Tensorflow Code\n", 60 | "\n", 61 | " 1. Define Feature Columns\n", 62 | " \n", 63 | " 2. Define Estimator\n", 64 | "\n", 65 | " 3. Define Input Function\n", 66 | " \n", 67 | " 4. Define Serving Function\n", 68 | "\n", 69 | " 5. Define Train and Eval Function\n", 70 | "\n", 71 | "3. Package Code\n", 72 | "\n", 73 | "4. Train\n", 74 | "\n", 75 | "5. Inspect Results\n", 76 | "\n", 77 | "6. Deploy Model\n", 78 | "\n", 79 | "7. Get Predictions" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "### 1) Load Raw Data\n", 87 | "\n", 88 | "This is a publically available dataset on housing prices in Boston area suburbs circa 1978. It is hosted in a Google Cloud Storage bucket.\n", 89 | "\n", 90 | "For datasets too large to fit in memory you would read the data in batches. Tensorflow provides a queueing mechanism for this which is documented [here](https://www.tensorflow.org/programmers_guide/reading_data).\n", 91 | "\n", 92 | "In our case the dataset is small enough to fit in memory so we will simply read it into a pandas dataframe." 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": { 99 | "collapsed": true 100 | }, 101 | "outputs": [], 102 | "source": [ 103 | "#downlad data from GCS and store as pandas dataframe \n", 104 | "data_train = pd.read_csv(\n", 105 | " filepath_or_buffer='https://storage.googleapis.com/vijay-public/boston_housing/housing_train.csv',\n", 106 | " names=[\"CRIM\",\"ZN\",\"INDUS\",\"CHAS\",\"NOX\",\"RM\",\"AGE\",\"DIS\",\"RAD\",\"TAX\",\"PTRATIO\",\"MEDV\"])\n", 107 | "\n", 108 | "data_test = pd.read_csv(\n", 109 | " filepath_or_buffer='https://storage.googleapis.com/vijay-public/boston_housing/housing_test.csv',\n", 110 | " names=[\"CRIM\",\"ZN\",\"INDUS\",\"CHAS\",\"NOX\",\"RM\",\"AGE\",\"DIS\",\"RAD\",\"TAX\",\"PTRATIO\",\"MEDV\"])" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": { 117 | "collapsed": false 118 | }, 119 | "outputs": [], 120 | "source": [ 121 | "data_train.head()" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "#### Column Descriptions:\n", 129 | "\n", 130 | "1. CRIM: per capita crime rate by town \n", 131 | "2. ZN: proportion of residential land zoned for lots over 25,000 sq.ft. \n", 132 | "3. INDUS: proportion of non-retail business acres per town \n", 133 | "4. CHAS: Charles River dummy variable (= 1 if tract bounds river; 0 otherwise) \n", 134 | "5. NOX: nitric oxides concentration (parts per 10 million) \n", 135 | "6. RM: average number of rooms per dwelling \n", 136 | "7. AGE: proportion of owner-occupied units built prior to 1940 \n", 137 | "8. DIS: weighted distances to five Boston employment centres \n", 138 | "9. RAD: index of accessibility to radial highways \n", 139 | "10. TAX: full-value property-tax rate per $10,000 \n", 140 | "11. PTRATIO: pupil-teacher ratio by town \n", 141 | "12. MEDV: Median value of owner-occupied homes" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "### 2) Write Tensorflow Code" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "#### 2.A Define Feature Columns\n", 156 | "\n", 157 | "Feature columns are your Estimator's data \"interface.\" They tell the estimator in what format they should expect data and how to interpret it (is it one-hot? sparse? dense? continous?). https://www.tensorflow.org/api_docs/python/tf/feature_column\n", 158 | "\n", 159 | "\n" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": { 166 | "collapsed": true 167 | }, 168 | "outputs": [], 169 | "source": [ 170 | "FEATURES = [\"CRIM\", \"ZN\", \"INDUS\", \"NOX\", \"RM\",\n", 171 | " \"AGE\", \"DIS\", \"TAX\", \"PTRATIO\"]\n", 172 | "LABEL = \"MEDV\"\n", 173 | "\n", 174 | "feature_cols = [tf.feature_column.numeric_column(k)\n", 175 | " for k in FEATURES] #list of Feature Columns" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "#### 2.B Define Estimator\n", 183 | "\n", 184 | "An Estimator is what actually implements your training, eval and prediction loops. Every estimator has the following methods:\n", 185 | "\n", 186 | "- fit() for training\n", 187 | "- eval() for evaluation\n", 188 | "- predict() for prediction\n", 189 | "- export_savedmodel() for writing model state to disk\n", 190 | "\n", 191 | "Tensorflow has several canned estimator that already implement these methods (DNNClassifier, LogisticClassifier etc..) or you can implement a custom estimator. Instructions on how to implement a custom estimator [here](https://www.tensorflow.org/extend/estimators) and see an example [here](https://github.com/GoogleCloudPlatform/training-data-analyst/blob/master/blogs/timeseries/rnn_cloudmle.ipynb).\n", 192 | "\n", 193 | "For simplicity we will use a canned estimator. To instantiate an estimator simply pass it what Feature Columns to expect and specify a directory for it to output to.\n", 194 | "\n", 195 | "Notice we wrap the estimator with a function. This is to allow us to specify the 'output_dir' at runtime, instead of having to hardcode it here" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": null, 201 | "metadata": { 202 | "collapsed": true 203 | }, 204 | "outputs": [], 205 | "source": [ 206 | "def generate_estimator(output_dir):\n", 207 | " return tf.estimator.DNNRegressor(feature_columns=feature_cols,\n", 208 | " hidden_units=[10, 10],\n", 209 | " model_dir=output_dir)" 210 | ] 211 | }, 212 | { 213 | "cell_type": "markdown", 214 | "metadata": {}, 215 | "source": [ 216 | "#### 2.C Define Input Function\n", 217 | "\n", 218 | "Now that you have an estimator and it knows what type of data to expect and how to intepret, you need to actually pass the data to it! This is the job of the input function. \n", 219 | "\n", 220 | "The input function returns a (features, label) tuple\n", 221 | "- features: A python dictionary. Each key is a feature column name and its value is the tensor containing the data for that Feature\n", 222 | "- label: A Tensor containing the label column" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": { 229 | "collapsed": true 230 | }, 231 | "outputs": [], 232 | "source": [ 233 | "def generate_input_fn(data_set):\n", 234 | " def input_fn():\n", 235 | " features = {k: tf.constant(data_set[k].values) for k in FEATURES}\n", 236 | " labels = tf.constant(data_set[LABEL].values)\n", 237 | " return features, labels\n", 238 | " return input_fn" 239 | ] 240 | }, 241 | { 242 | "cell_type": "markdown", 243 | "metadata": {}, 244 | "source": [ 245 | "#### 2.D Define Serving Input Function\n", 246 | "\n", 247 | "To predict with the model, we need to define a serving input function which will be used to read inputs from a user at prediction time. \n", 248 | "\n", 249 | "Why do we need a separate serving function? Don't we input the same features during training as in serving?\n", 250 | "\n", 251 | "Yes, but we may be *receiving* data in a different format during serving. The serving input function preforms transormations neccessary to get the data provided at prediction time into the format compatible with the Estimator API.\n", 252 | "\n", 253 | "returns a (features, inputs) tuple\n", 254 | "- features: A dict of features to be passed to the Estimator\n", 255 | "- inputs: A dictionary of inputs the predictions server should expect from the user" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "metadata": { 262 | "collapsed": true 263 | }, 264 | "outputs": [], 265 | "source": [ 266 | "def serving_input_fn():\n", 267 | " #feature_placeholders are what the caller of the predict() method will have to provide\n", 268 | " feature_placeholders = {\n", 269 | " column.name: tf.placeholder(column.dtype, [None])\n", 270 | " for column in feature_cols\n", 271 | " }\n", 272 | " \n", 273 | " #features are what we actually pass to the estimator\n", 274 | " features = {\n", 275 | " # Inputs are rank 1 so that we can provide scalars to the server\n", 276 | " # but Estimator expects rank 2, so we expand dimension\n", 277 | " key: tf.expand_dims(tensor, -1)\n", 278 | " for key, tensor in feature_placeholders.items()\n", 279 | " }\n", 280 | " return tf.estimator.export.ServingInputReceiver(\n", 281 | " features, feature_placeholders\n", 282 | " )" 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "metadata": {}, 288 | "source": [ 289 | "#### 2.E Define Train and Eval Function\n", 290 | "\n", 291 | "Finally to train and evaluate we use tf.estimator.train_and_evaluate()\n", 292 | "\n", 293 | "This function is special because it provides consistent behavior across local and distributed environments.\n", 294 | "\n", 295 | "Meaning if you run on multiple CPUs or GPUs, it takes care of parrallelizing the computation graph across these devices for you! \n", 296 | "\n", 297 | "The tran_and_evaluate() function requires three arguments:\n", 298 | "- estimator: we already defined this earlier\n", 299 | "- train_spec: specifies the training input function\n", 300 | "- eval_spec: specifies the eval input function, and also an 'exporter' which uses our serving_input_fn for serving the model\n", 301 | "\n", 302 | "**Note running this cell will give an error because we haven't specified an output_dir, we will do that later**" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": null, 308 | "metadata": { 309 | "collapsed": false 310 | }, 311 | "outputs": [], 312 | "source": [ 313 | "train_spec = tf.estimator.TrainSpec(\n", 314 | " input_fn=generate_input_fn(data_train),\n", 315 | " max_steps=3000)\n", 316 | "\n", 317 | "exporter = tf.estimator.LatestExporter('Servo', serving_input_fn)\n", 318 | "\n", 319 | "eval_spec=tf.estimator.EvalSpec(\n", 320 | " input_fn=generate_input_fn(data_test),\n", 321 | " steps=1,\n", 322 | " exporters=exporter)\n", 323 | "\n", 324 | "tf.estimator.train_and_evaluate(generate_estimator(output_dir), train_spec, eval_spec)" 325 | ] 326 | }, 327 | { 328 | "cell_type": "markdown", 329 | "metadata": {}, 330 | "source": [ 331 | "### 3) Package Code\n", 332 | "\n", 333 | "You've now written all the tensoflow code you need!\n", 334 | "\n", 335 | "To make it compatible with Cloud ML Engine we'll combine the above tensorflow code into a single python file with two simple changes\n", 336 | "\n", 337 | "1. Add some boilerplate code to parse the command line arguments required for gcloud.\n", 338 | "2. Use the learn_runner.run() function to run the experiment\n", 339 | "\n", 340 | "We also add an empty \\__init__\\.py file to the folder. This is just the python convention for identifying modules." 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": null, 346 | "metadata": { 347 | "collapsed": false 348 | }, 349 | "outputs": [], 350 | "source": [ 351 | "%%bash\n", 352 | "mkdir trainer\n", 353 | "touch trainer/__init__.py" 354 | ] 355 | }, 356 | { 357 | "cell_type": "code", 358 | "execution_count": 84, 359 | "metadata": { 360 | "collapsed": false 361 | }, 362 | "outputs": [ 363 | { 364 | "name": "stdout", 365 | "output_type": "stream", 366 | "text": [ 367 | "Overwriting trainer/task.py\n" 368 | ] 369 | } 370 | ], 371 | "source": [ 372 | "%%writefile trainer/task.py\n", 373 | "\n", 374 | "import argparse\n", 375 | "import pandas as pd\n", 376 | "import tensorflow as tf\n", 377 | "from tensorflow.contrib.learn.python.learn import learn_runner\n", 378 | "from tensorflow.contrib.learn.python.learn.utils import saved_model_export_utils\n", 379 | "\n", 380 | "print(tf.__version__)\n", 381 | "tf.logging.set_verbosity(tf.logging.ERROR)\n", 382 | "\n", 383 | "data_train = pd.read_csv(\n", 384 | " filepath_or_buffer='https://storage.googleapis.com/vijay-public/boston_housing/housing_train.csv',\n", 385 | " names=[\"CRIM\",\"ZN\",\"INDUS\",\"CHAS\",\"NOX\",\"RM\",\"AGE\",\"DIS\",\"RAD\",\"TAX\",\"PTRATIO\",\"MEDV\"])\n", 386 | "\n", 387 | "data_test = pd.read_csv(\n", 388 | " filepath_or_buffer='https://storage.googleapis.com/vijay-public/boston_housing/housing_test.csv',\n", 389 | " names=[\"CRIM\",\"ZN\",\"INDUS\",\"CHAS\",\"NOX\",\"RM\",\"AGE\",\"DIS\",\"RAD\",\"TAX\",\"PTRATIO\",\"MEDV\"])\n", 390 | "\n", 391 | "FEATURES = [\"CRIM\", \"ZN\", \"INDUS\", \"NOX\", \"RM\",\n", 392 | " \"AGE\", \"DIS\", \"TAX\", \"PTRATIO\"]\n", 393 | "LABEL = \"MEDV\"\n", 394 | "\n", 395 | "feature_cols = [tf.feature_column.numeric_column(k)\n", 396 | " for k in FEATURES] #list of Feature Columns\n", 397 | "\n", 398 | "def generate_estimator(output_dir):\n", 399 | " return tf.estimator.DNNRegressor(feature_columns=feature_cols,\n", 400 | " hidden_units=[10, 10],\n", 401 | " model_dir=output_dir)\n", 402 | "\n", 403 | "def generate_input_fn(data_set):\n", 404 | " def input_fn():\n", 405 | " features = {k: tf.constant(data_set[k].values) for k in FEATURES}\n", 406 | " labels = tf.constant(data_set[LABEL].values)\n", 407 | " return features, labels\n", 408 | " return input_fn\n", 409 | "\n", 410 | "def serving_input_fn():\n", 411 | " #feature_placeholders are what the caller of the predict() method will have to provide\n", 412 | " feature_placeholders = {\n", 413 | " column.name: tf.placeholder(column.dtype, [None])\n", 414 | " for column in feature_cols\n", 415 | " }\n", 416 | " \n", 417 | " #features are what we actually pass to the estimator\n", 418 | " features = {\n", 419 | " # Inputs are rank 1 so that we can provide scalars to the server\n", 420 | " # but Estimator expects rank 2, so we expand dimension\n", 421 | " key: tf.expand_dims(tensor, -1)\n", 422 | " for key, tensor in feature_placeholders.items()\n", 423 | " }\n", 424 | " return tf.estimator.export.ServingInputReceiver(\n", 425 | " features, feature_placeholders\n", 426 | " )\n", 427 | "\n", 428 | "train_spec = tf.estimator.TrainSpec(\n", 429 | " input_fn=generate_input_fn(data_train),\n", 430 | " max_steps=3000)\n", 431 | "\n", 432 | "exporter = tf.estimator.LatestExporter('Servo', serving_input_fn)\n", 433 | "\n", 434 | "eval_spec=tf.estimator.EvalSpec(\n", 435 | " input_fn=generate_input_fn(data_test),\n", 436 | " steps=1,\n", 437 | " exporters=exporter)\n", 438 | "\n", 439 | "######START CLOUD ML ENGINE BOILERPLATE######\n", 440 | "if __name__ == '__main__':\n", 441 | " parser = argparse.ArgumentParser()\n", 442 | " # Input Arguments\n", 443 | " parser.add_argument(\n", 444 | " '--output_dir',\n", 445 | " help='GCS location to write checkpoints and export models',\n", 446 | " required=True\n", 447 | " )\n", 448 | " parser.add_argument(\n", 449 | " '--job-dir',\n", 450 | " help='this model ignores this field, but it is required by gcloud',\n", 451 | " default='junk'\n", 452 | " )\n", 453 | " args = parser.parse_args()\n", 454 | " arguments = args.__dict__\n", 455 | " output_dir = arguments.pop('output_dir')\n", 456 | "######END CLOUD ML ENGINE BOILERPLATE######\n", 457 | "\n", 458 | " #initiate training job\n", 459 | " tf.estimator.train_and_evaluate(generate_estimator(output_dir), train_spec, eval_spec)\n" 460 | ] 461 | }, 462 | { 463 | "cell_type": "markdown", 464 | "metadata": { 465 | "collapsed": true 466 | }, 467 | "source": [ 468 | "### 4) Train\n", 469 | "Now that our code is packaged we can invoke it using the gcloud command line tool to run the training. \n", 470 | "\n", 471 | "Note: Since our dataset is so small and our model is simple the overhead of provisioning the cluster is longer than the actual training time. Accordingly you'll notice the single VM cloud training takes longer than the local training, and the distributed cloud training takes longer than single VM cloud. For larger datasets and more complex models this will reverse" 472 | ] 473 | }, 474 | { 475 | "cell_type": "markdown", 476 | "metadata": {}, 477 | "source": [ 478 | "#### Set Environment Vars\n", 479 | "We'll create environment variables for our project name GCS Bucket and reference this in future commands.\n", 480 | "\n", 481 | "If you do not have a GCS bucket, you can create one using [these](https://cloud.google.com/storage/docs/creating-buckets) instructions." 482 | ] 483 | }, 484 | { 485 | "cell_type": "code", 486 | "execution_count": 85, 487 | "metadata": { 488 | "collapsed": true 489 | }, 490 | "outputs": [], 491 | "source": [ 492 | "GCS_BUCKET = 'gs://BUCKET_NAME' #CHANGE THIS TO YOUR BUCKET\n", 493 | "PROJECT = 'PROJECT_ID' #CHANGE THIS TO YOUR PROJECT ID\n", 494 | "REGION = 'us-central1' #OPTIONALLY CHANGE THIS" 495 | ] 496 | }, 497 | { 498 | "cell_type": "code", 499 | "execution_count": 86, 500 | "metadata": { 501 | "collapsed": true 502 | }, 503 | "outputs": [], 504 | "source": [ 505 | "import os\n", 506 | "os.environ['GCS_BUCKET'] = GCS_BUCKET\n", 507 | "os.environ['PROJECT'] = PROJECT\n", 508 | "os.environ['REGION'] = REGION" 509 | ] 510 | }, 511 | { 512 | "cell_type": "markdown", 513 | "metadata": {}, 514 | "source": [ 515 | "#### Run local\n", 516 | "It's a best practice to first run locally on a small dataset to check for errors. Note you can ignore the warnings in this case, as long as there are no errors." 517 | ] 518 | }, 519 | { 520 | "cell_type": "code", 521 | "execution_count": 87, 522 | "metadata": { 523 | "collapsed": false 524 | }, 525 | "outputs": [ 526 | { 527 | "name": "stdout", 528 | "output_type": "stream", 529 | "text": [ 530 | "1.5.0\n" 531 | ] 532 | }, 533 | { 534 | "name": "stderr", 535 | "output_type": "stream", 536 | "text": [ 537 | "/usr/local/lib/python2.7/dist-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n", 538 | " from ._conv import register_converters as _register_converters\n", 539 | "2018-03-05 18:56:25.561527: I tensorflow/core/platform/cpu_feature_guard.cc:137] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA\n" 540 | ] 541 | } 542 | ], 543 | "source": [ 544 | "%%bash\n", 545 | "gcloud ai-platform local train \\\n", 546 | " --module-name=trainer.task \\\n", 547 | " --package-path=trainer \\\n", 548 | " -- \\\n", 549 | " --output_dir='./output'" 550 | ] 551 | }, 552 | { 553 | "cell_type": "markdown", 554 | "metadata": {}, 555 | "source": [ 556 | "#### Run on cloud (1 cloud ML unit)" 557 | ] 558 | }, 559 | { 560 | "cell_type": "markdown", 561 | "metadata": {}, 562 | "source": [ 563 | "First we specify which GCP project to use." 564 | ] 565 | }, 566 | { 567 | "cell_type": "code", 568 | "execution_count": 88, 569 | "metadata": { 570 | "collapsed": false 571 | }, 572 | "outputs": [ 573 | { 574 | "name": "stderr", 575 | "output_type": "stream", 576 | "text": [ 577 | "Updated property [core/project].\n" 578 | ] 579 | } 580 | ], 581 | "source": [ 582 | "%%bash\n", 583 | "gcloud config set project $PROJECT" 584 | ] 585 | }, 586 | { 587 | "cell_type": "markdown", 588 | "metadata": {}, 589 | "source": [ 590 | "Then we specify which GCS bucket to write to and a job name.\n", 591 | "Job names submitted to the ml engine must be project unique, so we append the system date/time. Update the cell below to point to a GCS bucket you own." 592 | ] 593 | }, 594 | { 595 | "cell_type": "code", 596 | "execution_count": 89, 597 | "metadata": { 598 | "collapsed": false 599 | }, 600 | "outputs": [ 601 | { 602 | "name": "stdout", 603 | "output_type": "stream", 604 | "text": [ 605 | "jobId: housing_180305_185634\n", 606 | "state: QUEUED\n" 607 | ] 608 | }, 609 | { 610 | "name": "stderr", 611 | "output_type": "stream", 612 | "text": [ 613 | "Job [housing_180305_185634] submitted successfully.\n", 614 | "Your job is still active. You may view the status of your job with the command\n", 615 | "\n", 616 | " $ gcloud ai-platform jobs describe housing_180305_185634\n", 617 | "\n", 618 | "or continue streaming the logs with the command\n", 619 | "\n", 620 | " $ gcloud ai-platform jobs stream-logs housing_180305_185634\n" 621 | ] 622 | } 623 | ], 624 | "source": [ 625 | "%%bash\n", 626 | "JOBNAME=housing_$(date -u +%y%m%d_%H%M%S)\n", 627 | "\n", 628 | "gcloud ai-platform jobs submit training $JOBNAME \\\n", 629 | " --region=$REGION \\\n", 630 | " --module-name=trainer.task \\\n", 631 | " --package-path=./trainer \\\n", 632 | " --job-dir=$GCS_BUCKET/$JOBNAME/ \\\n", 633 | " --runtime-version 1.4 \\\n", 634 | " -- \\\n", 635 | " --output_dir=$GCS_BUCKET/$JOBNAME/output\n" 636 | ] 637 | }, 638 | { 639 | "cell_type": "markdown", 640 | "metadata": {}, 641 | "source": [ 642 | "#### Run on cloud (10 cloud ML units)\n", 643 | "Because we are using the TF Estimators interface, distributed computing just works! The only change we need to make to run in a distributed fashion is to add the [--scale-tier](https://cloud.google.com/ml/pricing#ml_training_units_by_scale_tier) argument. Cloud ML Engine then takes care of distributing the training across devices for you!\n" 644 | ] 645 | }, 646 | { 647 | "cell_type": "code", 648 | "execution_count": 90, 649 | "metadata": { 650 | "collapsed": false 651 | }, 652 | "outputs": [ 653 | { 654 | "name": "stdout", 655 | "output_type": "stream", 656 | "text": [ 657 | "jobId: housing_180305_185638\n", 658 | "state: QUEUED\n" 659 | ] 660 | }, 661 | { 662 | "name": "stderr", 663 | "output_type": "stream", 664 | "text": [ 665 | "Job [housing_180305_185638] submitted successfully.\n", 666 | "Your job is still active. You may view the status of your job with the command\n", 667 | "\n", 668 | " $ gcloud ai-platform jobs describe housing_180305_185638\n", 669 | "\n", 670 | "or continue streaming the logs with the command\n", 671 | "\n", 672 | " $ gcloud ai-platform jobs stream-logs housing_180305_185638\n" 673 | ] 674 | } 675 | ], 676 | "source": [ 677 | "%%bash\n", 678 | "JOBNAME=housing_$(date -u +%y%m%d_%H%M%S)\n", 679 | "\n", 680 | "gcloud ai-platform jobs submit training $JOBNAME \\\n", 681 | " --region=$REGION \\\n", 682 | " --module-name=trainer.task \\\n", 683 | " --package-path=./trainer \\\n", 684 | " --job-dir=$GCS_BUCKET/$JOBNAME \\\n", 685 | " --runtime-version 1.4 \\\n", 686 | " --scale-tier=STANDARD_1 \\\n", 687 | " -- \\\n", 688 | " --output_dir=$GCS_BUCKET/$JOBNAME/output" 689 | ] 690 | }, 691 | { 692 | "cell_type": "markdown", 693 | "metadata": {}, 694 | "source": [ 695 | "#### Run on cloud GPU (3 cloud ML units)" 696 | ] 697 | }, 698 | { 699 | "cell_type": "markdown", 700 | "metadata": {}, 701 | "source": [ 702 | "Also works with GPUs!\n", 703 | "\n", 704 | "\"BASIC_GPU\" corresponds to one Tesla K80 at the time of this writing, hardware subject to change. 1 GPU is charged as 3 cloud ML units." 705 | ] 706 | }, 707 | { 708 | "cell_type": "code", 709 | "execution_count": 78, 710 | "metadata": { 711 | "collapsed": false 712 | }, 713 | "outputs": [ 714 | { 715 | "name": "stdout", 716 | "output_type": "stream", 717 | "text": [ 718 | "jobId: housing_180305_183840\n", 719 | "state: QUEUED\n" 720 | ] 721 | }, 722 | { 723 | "name": "stderr", 724 | "output_type": "stream", 725 | "text": [ 726 | "Job [housing_180305_183840] submitted successfully.\n", 727 | "Your job is still active. You may view the status of your job with the command\n", 728 | "\n", 729 | " $ gcloud ai-platform jobs describe housing_180305_183840\n", 730 | "\n", 731 | "or continue streaming the logs with the command\n", 732 | "\n", 733 | " $ gcloud ai-platform jobs stream-logs housing_180305_183840\n" 734 | ] 735 | } 736 | ], 737 | "source": [ 738 | "%%bash\n", 739 | "JOBNAME=housing_$(date -u +%y%m%d_%H%M%S)\n", 740 | "\n", 741 | "gcloud ai-platform jobs submit training $JOBNAME \\\n", 742 | " --region=$REGION \\\n", 743 | " --module-name=trainer.task \\\n", 744 | " --package-path=./trainer \\\n", 745 | " --job-dir=$GCS_BUCKET/$JOBNAME \\\n", 746 | " --runtime-version 1.4 \\\n", 747 | " --scale-tier=BASIC_GPU \\\n", 748 | " -- \\\n", 749 | " --output_dir=$GCS_BUCKET/$JOBNAME/output" 750 | ] 751 | }, 752 | { 753 | "cell_type": "markdown", 754 | "metadata": {}, 755 | "source": [ 756 | "#### Run on 8 cloud GPUs (24 cloud ML units)\n", 757 | "To train across multiple GPUs you use a [custom scale tier](https://cloud.google.com/ml/docs/concepts/training-overview#job_configuration_parameters).\n", 758 | "\n", 759 | "You specify the number and types of machines you want to run on in a config.yaml, then reference that config.yaml via the --config config.yaml command line argument.\n", 760 | "\n", 761 | "Here I am specifying a master node with machine type complex_model_m_gpu and one worker node of the same type. Each complex_model_m_gpu has 4 GPUs so this job will run on 2x4=8 GPUs total. \n", 762 | "\n", 763 | "WARNING: The default project quota is 10 cloud ML units, so unless you have requested a quota increase you will get a quota exceeded error. This command is just for illustrative purposes." 764 | ] 765 | }, 766 | { 767 | "cell_type": "code", 768 | "execution_count": 79, 769 | "metadata": { 770 | "collapsed": false 771 | }, 772 | "outputs": [ 773 | { 774 | "name": "stdout", 775 | "output_type": "stream", 776 | "text": [ 777 | "Overwriting config.yaml\n" 778 | ] 779 | } 780 | ], 781 | "source": [ 782 | "%%writefile config.yaml\n", 783 | "trainingInput:\n", 784 | " scaleTier: CUSTOM\n", 785 | " masterType: complex_model_m_gpu\n", 786 | " workerType: complex_model_m_gpu\n", 787 | " workerCount: 1" 788 | ] 789 | }, 790 | { 791 | "cell_type": "code", 792 | "execution_count": 80, 793 | "metadata": { 794 | "collapsed": false 795 | }, 796 | "outputs": [ 797 | { 798 | "name": "stdout", 799 | "output_type": "stream", 800 | "text": [ 801 | "jobId: housing_180305_183843\n", 802 | "state: QUEUED\n" 803 | ] 804 | }, 805 | { 806 | "name": "stderr", 807 | "output_type": "stream", 808 | "text": [ 809 | "Job [housing_180305_183843] submitted successfully.\n", 810 | "Your job is still active. You may view the status of your job with the command\n", 811 | "\n", 812 | " $ gcloud ai-platform jobs describe housing_180305_183843\n", 813 | "\n", 814 | "or continue streaming the logs with the command\n", 815 | "\n", 816 | " $ gcloud ai-platform jobs stream-logs housing_180305_183843\n" 817 | ] 818 | } 819 | ], 820 | "source": [ 821 | "%%bash\n", 822 | "JOBNAME=housing_$(date -u +%y%m%d_%H%M%S)\n", 823 | "\n", 824 | "gcloud ai-platform jobs submit training $JOBNAME \\\n", 825 | " --region=$REGION \\\n", 826 | " --module-name=trainer.task \\\n", 827 | " --package-path=./trainer \\\n", 828 | " --job-dir=$GCS_BUCKET/$JOBNAME \\\n", 829 | " --runtime-version 1.4 \\\n", 830 | " --config config.yaml \\\n", 831 | " -- \\\n", 832 | " --output_dir=$GCS_BUCKET/$JOBNAME/output" 833 | ] 834 | }, 835 | { 836 | "cell_type": "markdown", 837 | "metadata": {}, 838 | "source": [ 839 | "### 5) Inspect Results Using Tensorboard\n", 840 | "\n", 841 | "Tensorboard is a utility that allows you to visualize your results.\n", 842 | "\n", 843 | "Expand the 'loss' graph. What is your evaluation loss? This is squared error, so take the square root of it to get the average error in dollars. Does this seem like a reasonable margin of error for predicting a housing price?\n", 844 | "\n", 845 | "To activate TensorBoard within the JupyterLab UI navigate to **File** - **New Launcher**. Then double-click the 'Tensorboard' icon on the bottom row.\n", 846 | "\n", 847 | "TensorBoard 1 will appear in the new tab. Navigate through the three tabs to see the active TensorBoard. The 'Graphs' and 'Projector' tabs offer very interesting information including the ability to replay the tests.\n", 848 | "\n", 849 | "You may close the TensorBoard tab when you are finished exploring." 850 | ] 851 | }, 852 | { 853 | "cell_type": "markdown", 854 | "metadata": {}, 855 | "source": [ 856 | "### 6) Deploy Model For Predictions\n", 857 | "\n", 858 | "Cloud ML Engine has a prediction service that will wrap our tensorflow model with a REST API and allow remote clients to get predictions.\n", 859 | "\n", 860 | "You can deploy the model from the Google Cloud Console GUI, or you can use the gcloud command line tool. We will use the latter method. Note this will take up to 5 minutes." 861 | ] 862 | }, 863 | { 864 | "cell_type": "code", 865 | "execution_count": 96, 866 | "metadata": { 867 | "collapsed": false 868 | }, 869 | "outputs": [ 870 | { 871 | "name": "stderr", 872 | "output_type": "stream", 873 | "text": [ 874 | "Creating version (this might take a few minutes)......\n", 875 | "............................................................................................done.\n" 876 | ] 877 | } 878 | ], 879 | "source": [ 880 | "%%bash\n", 881 | "MODEL_NAME=\"housing_prices\"\n", 882 | "MODEL_VERSION=\"v1\"\n", 883 | "MODEL_LOCATION=output/export/Servo/$(ls output/export/Servo | tail -1) \n", 884 | "\n", 885 | "#gcloud ai-platform versions delete ${MODEL_VERSION} --model ${MODEL_NAME} #Uncomment to overwrite existing version\n", 886 | "#gcloud ai-platform models delete ${MODEL_NAME} #Uncomment to overwrite existing model\n", 887 | "gcloud ai-platform models create ${MODEL_NAME} --regions $REGION\n", 888 | "gcloud ai-platform versions create ${MODEL_VERSION} --model ${MODEL_NAME} --origin ${MODEL_LOCATION} --staging-bucket=$GCS_BUCKET" 889 | ] 890 | }, 891 | { 892 | "cell_type": "markdown", 893 | "metadata": {}, 894 | "source": [ 895 | "### 7) Get Predictions\n", 896 | "\n", 897 | "There are two flavors of the ML Engine Prediction Service: Batch and online.\n", 898 | "\n", 899 | "Online prediction is more appropriate for latency sensitive requests as results are returned quickly and synchronously. \n", 900 | "\n", 901 | "Batch prediction is more appropriate for large prediction requests that you only need to run a few times a day.\n", 902 | "\n", 903 | "The prediction services expects prediction requests in standard JSON format so first we will create a JSON file with a couple of housing records.\n" 904 | ] 905 | }, 906 | { 907 | "cell_type": "code", 908 | "execution_count": 68, 909 | "metadata": { 910 | "collapsed": false 911 | }, 912 | "outputs": [ 913 | { 914 | "name": "stdout", 915 | "output_type": "stream", 916 | "text": [ 917 | "Writing records.json\n" 918 | ] 919 | } 920 | ], 921 | "source": [ 922 | "%%writefile records.json\n", 923 | "{\"CRIM\": 0.00632,\"ZN\": 18.0,\"INDUS\": 2.31,\"NOX\": 0.538, \"RM\": 6.575, \"AGE\": 65.2, \"DIS\": 4.0900, \"TAX\": 296.0, \"PTRATIO\": 15.3}\n", 924 | "{\"CRIM\": 0.00332,\"ZN\": 0.0,\"INDUS\": 2.31,\"NOX\": 0.437, \"RM\": 7.7, \"AGE\": 40.0, \"DIS\": 5.0900, \"TAX\": 250.0, \"PTRATIO\": 17.3}" 925 | ] 926 | }, 927 | { 928 | "cell_type": "markdown", 929 | "metadata": {}, 930 | "source": [ 931 | "Now we will pass this file to the prediction service using the gcloud command line tool. Results are returned immediatley!" 932 | ] 933 | }, 934 | { 935 | "cell_type": "code", 936 | "execution_count": 69, 937 | "metadata": { 938 | "collapsed": false 939 | }, 940 | "outputs": [ 941 | { 942 | "name": "stdout", 943 | "output_type": "stream", 944 | "text": [ 945 | "PREDICTIONS\r\n", 946 | "[26098.3671875]\r\n", 947 | "[20871.384765625]\r\n", 948 | "\r\n", 949 | "\r\n", 950 | "Updates are available for some Cloud SDK components. To install them,\r\n", 951 | "please run:\r\n", 952 | " $ gcloud components update\r\n", 953 | "\r\n" 954 | ] 955 | } 956 | ], 957 | "source": [ 958 | "!gcloud ai-platform predict --model housing_prices --json-instances records.json" 959 | ] 960 | }, 961 | { 962 | "cell_type": "markdown", 963 | "metadata": {}, 964 | "source": [ 965 | "### Conclusion\n", 966 | "\n", 967 | "#### What we covered\n", 968 | "1. How to use Tensorflow's high level Estimator API\n", 969 | "2. How to deploy tensorflow code for distributed training in the cloud\n", 970 | "3. How to evaluate results using TensorBoard\n", 971 | "4. How deploy the resulting model to the cloud for online prediction\n", 972 | "\n", 973 | "#### What we didn't cover\n", 974 | "1. How to leverage larger than memory datasets using Tensorflow's queueing system\n", 975 | "2. How to create synthetic features from our raw data to aid learning (Feature Engineering)\n", 976 | "3. How to improve model performance by finding the ideal hyperparameters using Cloud ML Engine's [HyperTune](https://cloud.google.com/ml-engine/docs/how-tos/using-hyperparameter-tuning) feature\n", 977 | "\n", 978 | "This lab is a great start, but adding in the above concepts is critical in getting your models to production ready quality. These concepts are covered in Google's 1-week on-demand Tensorflow + Cloud ML course: https://www.coursera.org/learn/serverless-machine-learning-gcp" 979 | ] 980 | } 981 | ], 982 | "metadata": { 983 | "anaconda-cloud": {}, 984 | "kernelspec": { 985 | "display_name": "Python 2", 986 | "language": "python", 987 | "name": "python2" 988 | }, 989 | "language_info": { 990 | "codemirror_mode": { 991 | "name": "ipython", 992 | "version": 2 993 | }, 994 | "file_extension": ".py", 995 | "mimetype": "text/x-python", 996 | "name": "python", 997 | "nbconvert_exporter": "python", 998 | "pygments_lexer": "ipython2", 999 | "version": "2.7.12" 1000 | } 1001 | }, 1002 | "nbformat": 4, 1003 | "nbformat_minor": 2 1004 | } 1005 | -------------------------------------------------------------------------------- /DRAFT/keras-bow-model-multi-label-hypertune.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "colab": { 7 | "autoexec": { 8 | "startup": false, 9 | "wait_interval": 0 10 | } 11 | }, 12 | "colab_type": "code", 13 | "collapsed": true, 14 | "id": "frTMl3sShA3P" 15 | }, 16 | "source": [ 17 | "# Multi Label Text Classification Using Keras + Cloud ML Engine \n", 18 | "\n", 19 | "\n", 20 | "\n", 21 | "#### This notebook will demonstrate the following steps:\n", 22 | "\n", 23 | "1. Load Raw Data\n", 24 | "2. Explore Data\n", 25 | "3. Preprocess Data\n", 26 | "4. Construct a model that learns to tag Stack Overflow posts\n", 27 | "5. Use Cloud ML Engine's automatic hyperparameter tuning feature to refine the model\n", 28 | "6. Inspect the results using tensorboard\n", 29 | "7. And deploy the final model to production using Cloud ML Engine's online prediction service\n", 30 | "\n", 31 | "This notebook is intended to be run on Google Cloud Datalab: https://cloud.google.com/datalab/docs/quickstarts\n", 32 | "Datalab will have the required libraries installed by default for this code to work. If you choose to run this code outside of Datalab you may run in to version and dependency issues which you will need to resolve." 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 111, 38 | "metadata": { 39 | "collapsed": false 40 | }, 41 | "outputs": [ 42 | { 43 | "data": { 44 | "text/html": [ 45 | "\n", 46 | " \n", 47 | " \n", 54 | " " 55 | ], 56 | "text/plain": [ 57 | "" 58 | ] 59 | }, 60 | "metadata": {}, 61 | "output_type": "display_data" 62 | }, 63 | { 64 | "name": "stdout", 65 | "output_type": "stream", 66 | "text": [ 67 | "Requirement already up-to-date: tensorflow==1.4 in /usr/local/lib/python2.7/dist-packages\n", 68 | "Requirement already up-to-date: mock>=2.0.0 in /usr/local/lib/python2.7/dist-packages (from tensorflow==1.4)\n", 69 | "Requirement already up-to-date: tensorflow-tensorboard<0.5.0,>=0.4.0rc1 in /usr/local/lib/python2.7/dist-packages (from tensorflow==1.4)\n", 70 | "Requirement already up-to-date: numpy>=1.12.1 in /usr/local/lib/python2.7/dist-packages (from tensorflow==1.4)\n", 71 | "Requirement already up-to-date: backports.weakref>=1.0rc1 in /usr/local/lib/python2.7/dist-packages (from tensorflow==1.4)\n", 72 | "Requirement already up-to-date: wheel in /usr/local/lib/python2.7/dist-packages (from tensorflow==1.4)\n", 73 | "Requirement already up-to-date: six>=1.10.0 in /usr/local/lib/python2.7/dist-packages (from tensorflow==1.4)\n", 74 | "Requirement already up-to-date: protobuf>=3.3.0 in /usr/local/lib/python2.7/dist-packages (from tensorflow==1.4)\n", 75 | "Requirement already up-to-date: enum34>=1.1.6 in /usr/local/lib/python2.7/dist-packages (from tensorflow==1.4)\n", 76 | "Requirement already up-to-date: funcsigs>=1; python_version < \"3.3\" in /usr/local/lib/python2.7/dist-packages (from mock>=2.0.0->tensorflow==1.4)\n", 77 | "Requirement already up-to-date: pbr>=0.11 in /usr/local/lib/python2.7/dist-packages (from mock>=2.0.0->tensorflow==1.4)\n", 78 | "Requirement already up-to-date: bleach==1.5.0 in /usr/local/lib/python2.7/dist-packages (from tensorflow-tensorboard<0.5.0,>=0.4.0rc1->tensorflow==1.4)\n", 79 | "Requirement already up-to-date: markdown>=2.6.8 in /usr/local/lib/python2.7/dist-packages (from tensorflow-tensorboard<0.5.0,>=0.4.0rc1->tensorflow==1.4)\n", 80 | "Requirement already up-to-date: futures>=3.1.1; python_version < \"3.2\" in /usr/local/lib/python2.7/dist-packages (from tensorflow-tensorboard<0.5.0,>=0.4.0rc1->tensorflow==1.4)\n", 81 | "Requirement already up-to-date: html5lib==0.9999999 in /usr/local/lib/python2.7/dist-packages (from tensorflow-tensorboard<0.5.0,>=0.4.0rc1->tensorflow==1.4)\n", 82 | "Requirement already up-to-date: werkzeug>=0.11.10 in /usr/local/lib/python2.7/dist-packages (from tensorflow-tensorboard<0.5.0,>=0.4.0rc1->tensorflow==1.4)\n", 83 | "Collecting setuptools (from protobuf>=3.3.0->tensorflow==1.4)\n", 84 | " Downloading setuptools-36.7.2-py2.py3-none-any.whl (482kB)\n", 85 | "\u001b[K 100% |████████████████████████████████| 491kB 2.0MB/s \n", 86 | "\u001b[?25hInstalling collected packages: setuptools\n", 87 | " Found existing installation: setuptools 36.7.1\n", 88 | " Uninstalling setuptools-36.7.1:\n", 89 | " Successfully uninstalled setuptools-36.7.1\n", 90 | "Successfully installed setuptools-36.7.2\n" 91 | ] 92 | } 93 | ], 94 | "source": [ 95 | "# This code was tested with TensorFlow v1.4\n", 96 | "# The import statements will not work with earlier versions, because Keras is in tf.contrib in those versions\n", 97 | "!pip install --upgrade tensorflow==1.4" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 112, 103 | "metadata": { 104 | "colab": { 105 | "autoexec": { 106 | "startup": false, 107 | "wait_interval": 0 108 | }, 109 | "height": 321, 110 | "output_extras": [ 111 | { 112 | "item_id": 1 113 | } 114 | ] 115 | }, 116 | "colab_type": "code", 117 | "collapsed": false, 118 | "executionInfo": { 119 | "elapsed": 2880, 120 | "status": "error", 121 | "timestamp": 1505781339378, 122 | "user": { 123 | "displayName": "Sara Robinson", 124 | "photoUrl": "//lh4.googleusercontent.com/-RR9n0dvbwgI/AAAAAAAAAAI/AAAAAAAAMYM/SOr5ZExpvXE/s50-c-k-no/photo.jpg", 125 | "userId": "112510032804989247452" 126 | }, 127 | "user_tz": 240 128 | }, 129 | "id": "783h64rGhA3T", 130 | "outputId": "d447b2ab-e321-4ee5-abd4-de2c0116302f" 131 | }, 132 | "outputs": [ 133 | { 134 | "data": { 135 | "text/html": [ 136 | "\n", 137 | " \n", 138 | " \n", 145 | " " 146 | ], 147 | "text/plain": [ 148 | "" 149 | ] 150 | }, 151 | "metadata": {}, 152 | "output_type": "display_data" 153 | }, 154 | { 155 | "name": "stdout", 156 | "output_type": "stream", 157 | "text": [ 158 | "You have TensorFlow version 1.4.0\n" 159 | ] 160 | } 161 | ], 162 | "source": [ 163 | "from __future__ import absolute_import\n", 164 | "from __future__ import division\n", 165 | "from __future__ import print_function\n", 166 | "\n", 167 | "import itertools\n", 168 | "import os\n", 169 | "\n", 170 | "%matplotlib inline\n", 171 | "import matplotlib.pyplot as plt\n", 172 | "import numpy as np\n", 173 | "import pandas as pd\n", 174 | "import tensorflow as tf\n", 175 | "import googleapiclient.discovery\n", 176 | "\n", 177 | "from sklearn.preprocessing import LabelBinarizer, LabelEncoder\n", 178 | "from sklearn.metrics import confusion_matrix\n", 179 | "\n", 180 | "from tensorflow.python.keras.models import Sequential\n", 181 | "from tensorflow.python.keras.layers import Dense, Activation, Dropout\n", 182 | "from tensorflow.python.keras.preprocessing import text, sequence\n", 183 | "from tensorflow.python.keras import utils\n", 184 | "from tensorflow.contrib.saved_model.python.saved_model.utils import simple_save\n", 185 | "\n", 186 | "from collections import Counter\n", 187 | "\n", 188 | "print(\"You have TensorFlow version\", tf.__version__)" 189 | ] 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "metadata": {}, 194 | "source": [ 195 | "### 1) Load Raw Data\n", 196 | "\n", 197 | "We will use a publically available dataset of Stack Overflow posts. It is hosted for free on Google's Big Query platform [here](https://bigquery.cloud.google.com/table/bigquery-public-data:stackoverflow.posts_questions?pli=1&tab=details).\n", 198 | "\n", 199 | "We will extract a subset of this data using [this](https://bigquery.cloud.google.com/savedquery/38969729279:919b6f9f680b4cc6ace82632eeb357fd) query.\n", 200 | "\n", 201 | "Datalab has a built-in library to easily load data from Big Query which we will use below." 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 114, 207 | "metadata": { 208 | "collapsed": false 209 | }, 210 | "outputs": [ 211 | { 212 | "data": { 213 | "text/html": [ 214 | "\n", 215 | " \n", 216 | " \n", 223 | " " 224 | ], 225 | "text/plain": [ 226 | "" 227 | ] 228 | }, 229 | "metadata": {}, 230 | "output_type": "display_data" 231 | }, 232 | { 233 | "name": "stdout", 234 | "output_type": "stream", 235 | "text": [ 236 | "Loaded 1000 rows\n" 237 | ] 238 | }, 239 | { 240 | "data": { 241 | "text/html": [ 242 | "
\n", 243 | "\n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | "
tagspost
0c#how to speed up port scanner c# i created ve...
1c#program does not contain a static ‘main’ metho...
2c#c# default value before generic i have one ol...
3c#unable to cast object of type htmlagilitypack...
4phphow to match non-keyboard characters using php...
5phpphp script for inserting images i ve got thes...
6phpphp list structure let s say i have an array ...
7phpreverse a string in php without using any stri...
8phpwhen would empty() return false and $var == ...
9phpproper way of using output of one class in ano...
\n", 304 | "
" 305 | ], 306 | "text/plain": [ 307 | " tags post\n", 308 | "0 c# how to speed up port scanner c# i created ve...\n", 309 | "1 c# program does not contain a static ‘main’ metho...\n", 310 | "2 c# c# default value before generic i have one ol...\n", 311 | "3 c# unable to cast object of type htmlagilitypack...\n", 312 | "4 php how to match non-keyboard characters using php...\n", 313 | "5 php php script for inserting images i ve got thes...\n", 314 | "6 php php list structure let s say i have an array ...\n", 315 | "7 php reverse a string in php without using any stri...\n", 316 | "8 php when would empty() return false and $var == ...\n", 317 | "9 php proper way of using output of one class in ano..." 318 | ] 319 | }, 320 | "execution_count": 114, 321 | "metadata": {}, 322 | "output_type": "execute_result" 323 | } 324 | ], 325 | "source": [ 326 | "import google.datalab.bigquery as bq\n", 327 | "\n", 328 | "query = \"\"\"\n", 329 | "SELECT tags, TRIM(LOWER(REGEXP_REPLACE(CONCAT(title, \\' \\', body), r\\'[\\\"\\\\n\\\\\\'?,]|

|

\\',\\\" \\\"))) as post \n", 330 | "FROM `bigquery-public-data.stackoverflow.posts_questions`\n", 331 | "WHERE REGEXP_CONTAINS(tags, r\\\"javascript|java|c#|php|android|jquery|python\\\") \n", 332 | "LIMIT 1000\n", 333 | "\"\"\"\n", 334 | "\n", 335 | "data = bq.Query(query).execute(output_options=bq.QueryOutput.dataframe()).result()\n", 336 | "NUM_ROWS = data.shape[0]\n", 337 | "print(\"Loaded {} rows\".format(NUM_ROWS))\n", 338 | "data.head(10)" 339 | ] 340 | }, 341 | { 342 | "cell_type": "markdown", 343 | "metadata": {}, 344 | "source": [ 345 | "If you're running from a jupyter notebook on your laptop/workstation (as opposed to datalab) I've hosted the data as a static .CSV file in a publically accessible URL for convencience." 346 | ] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": 115, 351 | "metadata": { 352 | "colab": { 353 | "autoexec": { 354 | "startup": false, 355 | "wait_interval": 0 356 | } 357 | }, 358 | "colab_type": "code", 359 | "collapsed": false, 360 | "id": "c7te21f7hA3V" 361 | }, 362 | "outputs": [ 363 | { 364 | "data": { 365 | "text/html": [ 366 | "\n", 367 | " \n", 368 | " \n", 375 | " " 376 | ], 377 | "text/plain": [ 378 | "" 379 | ] 380 | }, 381 | "metadata": {}, 382 | "output_type": "display_data" 383 | } 384 | ], 385 | "source": [ 386 | "# alternative way to download data for non-datalab users\n", 387 | "# uncomment below lines to run\n", 388 | "\n", 389 | "#data = pd.read_csv(\"https://storage.googleapis.com/vijay-public/text_classification/results-1000.csv\")\n", 390 | "#NUM_ROWS = data.shape[0]\n", 391 | "#print(\"Loaded {} rows\".format(NUM_ROWS))\n", 392 | "#data.head()" 393 | ] 394 | }, 395 | { 396 | "cell_type": "markdown", 397 | "metadata": {}, 398 | "source": [ 399 | "### 2) Data Exploration \n", 400 | "\n", 401 | "For multi-label exploration it's useful to get an idea of the distribution of our labels. Here we will count the number of occurences of each of most common labels. " 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": 116, 407 | "metadata": { 408 | "collapsed": false 409 | }, 410 | "outputs": [ 411 | { 412 | "data": { 413 | "text/html": [ 414 | "\n", 415 | " \n", 416 | " \n", 423 | " " 424 | ], 425 | "text/plain": [ 426 | "" 427 | ] 428 | }, 429 | "metadata": {}, 430 | "output_type": "display_data" 431 | }, 432 | { 433 | "name": "stdout", 434 | "output_type": "stream", 435 | "text": [ 436 | "5 most common classes:\n" 437 | ] 438 | }, 439 | { 440 | "data": { 441 | "text/plain": [ 442 | "[(u'javascript', 214),\n", 443 | " (u'php', 176),\n", 444 | " (u'java', 166),\n", 445 | " (u'c#', 163),\n", 446 | " (u'android', 148)]" 447 | ] 448 | }, 449 | "execution_count": 116, 450 | "metadata": {}, 451 | "output_type": "execute_result" 452 | } 453 | ], 454 | "source": [ 455 | "#Generate list of N most common labels\n", 456 | "NUM_CLASSES = 5\n", 457 | "labels_list = []\n", 458 | "\n", 459 | "counts = Counter('|'.join(data['tags'].tolist()).split('|'))\n", 460 | "classes = counts.most_common(NUM_CLASSES)\n", 461 | "\n", 462 | "for i in range(0,NUM_CLASSES):\n", 463 | " labels_list.append(classes[i][0])\n", 464 | " \n", 465 | "print (\"{} most common classes:\".format(NUM_CLASSES))\n", 466 | "classes" 467 | ] 468 | }, 469 | { 470 | "cell_type": "markdown", 471 | "metadata": {}, 472 | "source": [ 473 | "Since our ML algorithm will expect numbers as labels, not words, we define utility function to switch back and forth between the human-friendly text and machine-friendly vector representation of the labels. " 474 | ] 475 | }, 476 | { 477 | "cell_type": "code", 478 | "execution_count": 117, 479 | "metadata": { 480 | "collapsed": false 481 | }, 482 | "outputs": [ 483 | { 484 | "data": { 485 | "text/html": [ 486 | "\n", 487 | " \n", 488 | " \n", 495 | " " 496 | ], 497 | "text/plain": [ 498 | "" 499 | ] 500 | }, 501 | "metadata": {}, 502 | "output_type": "display_data" 503 | }, 504 | { 505 | "name": "stdout", 506 | "output_type": "stream", 507 | "text": [ 508 | "php\n", 509 | "[0 1 0 0 0]\n" 510 | ] 511 | }, 512 | { 513 | "data": { 514 | "text/plain": [ 515 | "[u'php']" 516 | ] 517 | }, 518 | "execution_count": 117, 519 | "metadata": {}, 520 | "output_type": "execute_result" 521 | } 522 | ], 523 | "source": [ 524 | "#labels_list: A list of the valid classes\n", 525 | "#tags: A list of tags for a post\n", 526 | "#returns an ndarray with ones for the active classes\n", 527 | "def labels_to_array(tags,labels_list=labels_list):\n", 528 | " array = np.zeros(len(labels_list),dtype=np.int8)\n", 529 | " tags = tags.split('|') #split tags from pipe separated string into list\n", 530 | " for tag in tags:\n", 531 | " try:\n", 532 | " array[labels_list.index(tag)] = 1\n", 533 | " except ValueError: \n", 534 | " None\n", 535 | " return array\n", 536 | "\n", 537 | "#translate machine readable array back to human labels\n", 538 | "def array_to_labels(array, labels_list=labels_list, threshold = 1):\n", 539 | " labels = []\n", 540 | " i=0\n", 541 | " for flag in array:\n", 542 | " if flag >= threshold: \n", 543 | " labels.append(labels_list[i])\n", 544 | " i=i+1\n", 545 | " return labels\n", 546 | "\n", 547 | "#test utility functions\n", 548 | "print(data['tags'][4])\n", 549 | "array = labels_to_array(data['tags'][4])\n", 550 | "print(array)\n", 551 | "array_to_labels(array)" 552 | ] 553 | }, 554 | { 555 | "cell_type": "code", 556 | "execution_count": 119, 557 | "metadata": { 558 | "collapsed": false 559 | }, 560 | "outputs": [ 561 | { 562 | "data": { 563 | "text/html": [ 564 | "\n", 565 | " \n", 566 | " \n", 573 | " " 574 | ], 575 | "text/plain": [ 576 | "" 577 | ] 578 | }, 579 | "metadata": {}, 580 | "output_type": "display_data" 581 | }, 582 | { 583 | "data": { 584 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAEcCAYAAADQqlM0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XmcXFWd9/FPhx0TIpFO0ARBMuGr6CiILOMyA4JK0DE+\njmwuEOAZGQXFwQ2cQWXGUXAXnRlQEAMOgrgRRxwZxEdcUVYXnG/YQhLykAQ7QQKyJT1/3NOh0vZS\nN91VXd35vl+vvLruqXPr/u6tSv3qnHPvuV29vb1EREQ0a9JYBxAREeNLEkdERNSSxBEREbUkcURE\nRC1JHBERUUsSR0RE1JLEEZs1Sb+R9Jdt2taFknok/bwd24tola5cxxFjQdKLgbOBZwOPA78D3mH7\nhjGM6QPAbNvHtOC1XwxcAuxh++EBnj8WuAB4qBStAn4IfMT2bU1u40Jgqe33j07UY7ud6FxpcUTb\nSZoCfBv4DLAjMBM4E3ikBdvaYrRfcxPtBiweKGk0+KntHYCpwCHAH4EbJO3ZhvgimpYWR7SdpH2A\n/7Y9bZDnjwX+FrgROAZYDpxs+5ry/HzgPcAsYCXwUdufL8/9FfBl4LPA3wNXAacCXwJeDKwHfmP7\nr0r9u4ATgK2AhSWER4A7gH8BTrP9gobY3gm8yPZrB4j7qcC5ZTu/L3GdL+l44F+BLamSwSdsnznA\nPp9g+y/7lX8b+KPtI8ryV4GXANsCtwBvsf07SX9btrEeeBT4ge15kt5bjuV0YAnwj7a/VV5rNlUr\nZ6+yzvdtH12eeyZwDrBPOcbvt335YNvpfyxiYkuLI8bCImCdpC9JOlTSkweosz9wO/AU4IPANxrq\nrQAOK7/OjwM+JWmvhnV3Bp4MPB14M/BOYGl5renA+/pvzPb3gA8Dl9newfbeVIlkN0lqqPoG4KJB\n9utSqi/nnYHDgQ9LOsj2F4G/A35WXvvMQdYfyDeoEkWfK4HZZT9upOr+wvYXgP+gSlY7NHyZ306V\n6HagatV9WdKM8tw/A9+z/WSqJPxZAEnbUyXcLwM7AUcD/ybpWUNsJzYjSRzRdrYf4Ilf/58HVkq6\nQlJ3Q7UVts+xvc72VwEDryzrf9f24vL4R1Rfco1fruuAD9h+zPYjwGPAU4FnlNf7SZNxPgpcBrwR\nQNKzgV2B7/SvK2kW8ELgvWW7twDnA29q6qAMbjmwoWVm+0u2H7L9GPBPwPNK199g+/B12yvK48uB\n24D9ytOPAbtKmmn7Uds/LeWvAu6yfZHtXts3A18HXjfCfYkJYsuxDiA2T7YNHA8gaQ+qX7GfpvpF\nD3BPv1XuBp5W6s8F3g/sQfXjZzvgVw11V5Uv1j4fpfq1fZWkXuALts9uMtSLqH7Vn0GVQL7a77X7\nPA3osf1QQ9ndVF09IzET6AGQNImqVfQ6qpZAb/m3E/DAQCtLOoaqy263UvSkUh/g3cCHgF9I6gE+\naftCquR4QCkD6AK2YPCWVmxm0uKIMWd7EdUYxHMaimf2q/Z0YLmkrYGvUSWDbts7At+l+nLrs9HA\nne0Hbb/L9mzgr4FTJR00QCh/MuBn+zrgUUkvAV4PXDzIbiwHpkl6Ur+Y+yfAul4LXFsev4Eq/peW\n7qXdqPa7b983il/S06ladG+1vWM5Vr/tq297pe03255J1ZX2b5J2p+rW+3+2p5V/O5ZuqZMH2k5s\nftLiiLYrYwavpBpPuEfSLlT96D9rqDZd0tuAfwf+D/BMqi6ircu/+2yvL62PlwO/HmJ7rwT+x/Yd\nwFqq038fH6DqCuAQSV22G78cLwY+BzzW0J2zEdvLJP0U+IikdwOiGnR//TCHo1FXiXcSVdJ5J/CX\nwAHl+clUA/erS4L6CBt/ia8Adm9YfhJVd+B95TWPpSE5S3od1bjLPcCaUncd8J9lP95INW7TBTwP\neKC0FPtvJzYzaXHEWHiAavD7OkkPAD+l6mp6V0Od64A5wH1Ug7h/Y3uN7bXA24HLS1fKUcAVw2xv\nDnB12dZPgH8tYyOw8Rfv5VRfkr+XdH1D+cVUX7jDddUcDTyDqvXxdeCMvjPBmnSApD8A9wM/oEoU\n+9q+tTx/EdXg+z3Ab6iOW6MLgGeXiwy/Yft3wCeBnwP3Ul0z8+OG+vtSvQd/AL4FvN323eUYv5zq\n2C4v/84CthloOzX2LyaIlp+OK+nvqX55raf6VXgcVX/wpVTn8N8IvMn246Ub4iKqfuH7gCNtL2lp\ngNFxBjs1daxI2pbqV/bzS6slYrPW0haHpKcBb6P6D/dcqq6xo6muGP6EbVE1kU8oq5xANcA4h2qg\n9KOtjC+iSW8FfpmkEVFpR1fVFsCTJG1JdfbLcuAgqqY8wALgNeXxvLIM1QDowW2IL2JQ5QLBt1GN\nN0QELU4ctpcDn+CJftn7qbqm1theX6ot44kzaGZSndGB7XXAGkkDXl0cE5ftBZ3STWX7GeXfLWMd\nS0SnaHVX1ZOpWhG7Uo1rPAmYO0DVvoGWrn7lXeTUv4iIjtLq03EPAe603XcB0zeprq59sqRJpdUx\ni6r7CqrWxy5U5+tvAexge/VQG+jt7e3t6uqfbyIiYhib/MXZ6sSxhOoUw22pzj8/GPgl1ZxBh1NN\n53AsT5xOubAsX1eeH/ZUxq6uLlatGvCi2dgE3d1TcjxHSY7l6MrxHF3d3YPOVDOsVo9x/IJqkPsm\nqpk8u6iuZD2N6urdRVTz8FxQVrkA2EnSbcA7Sr2IiOggE2Fa9d78Chk9+VU3enIsR1eO5+jq7p6y\nyV1VuXI8IiJqSeKIiIhakjgiIqKWJI6IiKgliSMiImpJ4oiIiFpyI6eIFli3bh2LFi2ip2ftWIfC\nbrvtzhZbbDHWYcQEksQR0QKLF9/JKR9byPZTp49pHA/dv5LPvPvVzJ49Z0zjiIkliSOiRbafOp3J\nO/a/dXrE+JcxjoiIqCWJIyIiakniiIiIWpI4IiKiliSOiIioJYkjIiJqSeKIiIhakjgiIqKWJI6I\niKgliSMiImpp6ZQjkvYALgN6gS5gd+AM4OJSviuwGDjC9v1lnXOAucCDwHzbN7cyxoiIqKelLQ7b\ni2zvbfv5wD5UyeCbwGnA1bYFXAOcDiBpLjDb9hzgRODcVsYXERH1tbOr6hDgDttLgXnAglK+oCxT\n/l4EYPs6YKqkGW2MMSIihtHOxHEkcEl5PMP2CgDb9wJ9c0/PBJY2rHNPKYuIiA7RlsQhaSvg1cDl\npah3kKpdA5QNVjciIsZAu+7HMRe4wfZ9ZXmFpBm2V0jaGVhZypcBuzSsNwtYPtyLd3dPGdVgN3c5\nniO3evXksQ5hg2nTJk+Y93Si7Md4167EcTTwlYblhcB84Ozy94qG8pOAyyQdAKzp69IayqpVD4xm\nrJu17u4pOZ6joBNuGdunp2fthHhP89kcXSNJwi3vqpK0HdXA+Dcais8GXibJwMHAWQC2rwTuknQ7\ncB7w1lbHFxER9bS8xWH7j0B3v7IeqmQyUP2TWx1TRERsulw5HhERtSRxRERELUkcERFRSxJHRETU\nksQRERG1JHFEREQtSRwREVFLEkdERNSSxBEREbUkcURERC1JHBERUUsSR0RE1JLEERERtSRxRERE\nLUkcERFRSxJHRETUksQRERG1JHFEREQtSRwREVFLy+85LmkqcD7wHGA9cDywCLgM2BVYDBxh+/5S\n/xxgLvAgMN/2za2OMSIimteOFsdngCttPwt4HvA/wGnA1bYFXAOcDiBpLjDb9hzgRODcNsQXERE1\ntDRxSJoCvMT2hQC2Hy8ti3nAglJtQVmm/L2o1L0OmCppRitjjIiIelrdVbU7cJ+kC6laG9cD7wBm\n2F4BYPteSdNL/ZnA0ob17yllK1ocZ0RENKnViWNL4PnASbavl/Qpqm6q3kHqdw1QNljdDbq7p2x6\nhPEncjxHbvXqyWMdwgbTpk2eMO/pRNmP8a7ViWMZsNT29WX561SJY4WkGbZXSNoZWNlQf5eG9WcB\ny4fbyKpVD4xiyJu37u4pOZ6joKdn7ViHsEFPz9oJ8Z7mszm6RpKEWzrGUbqjlkraoxQdDPwWWAjM\nL2XzgSvK44XAMQCSDgDW9HVpRUREZ2j56bjA24H/kLQVcCdwHLAF8FVJxwNLgMMBbF8p6TBJt1Od\njntcG+KLiIgaWp44bN8C7DvAU4cMUv/k1kYUEREjkSvHIyKiliSOiIioJYkjIiJqSeKIiIhakjgi\nIqKWJI6IiKgliSMiImoZNnFIOlLSDuXxP0n6L0n7tD60iIjoRM20OP7R9h8k7Qe8gmra88+2NqyI\niOhUzSSOx8rflwHn274E2LZ1IUVERCdrJnH0SjoSOAq4upRt3bqQIiKikzWTOE4GjqZqbdwlaQ7w\ng9aGFRERnaqZSQ63s/2avgXbt0n6ZgtjioiIDtZMi+PjA5R9bLQDiYiI8WHQFoekPwP2AHaQdFjD\nU1OB7VsdWEREdKahuqpeRHV3vhnAuxvK/wC8q4UxRUREBxs0cdheACyQNN/2l9oXUkREdLJmxjju\nlDQZQNIJks6V9IwWxxURER2qmcTxOeBBSc8G3kl1j/ALWhpVRER0rGZOx33cdq+kucC/2/6spMOb\n3YCkxcD9wHrgMdv7SdoRuAzYFVgMHGH7/lL/HGAu8CAw3/bNNfYnIiJarJkWx5aS9gdeC1xTyrao\nsY31wIG297a9Xyk7Dbjatsprng5QktNs23OAE4Fza2wnIiLaoJnEcQZwHvBz27+VtAdwe41tdA2w\nnXnAgvJ4QVnuK78IwPZ1wFRJM2psKyIiWmzYrirbVwBXNCwvomp9NKsX+J6kXuA82+cDM2yvKK93\nr6Tppe5MYGnDuveUshU1thcRES00bOKQtD1Vq+MQqiRwNfAh2w81uY0XluTQDVwlyeV1BtI1QNlg\ndTfo7p7SZCjRjBzPkVu9evJYh7DBtGmTJ8x7OlH2Y7xrZnD8s6XeO8ry/6U60+r4ZjZg+97yd5Wk\nbwH7ASskzbC9QtLOwMpSfRmwS8Pqs4Dlw21j1aoHmgklmtDdPSXHcxT09Kwd6xA26OlZOyHe03w2\nR9dIknAziWNf28/tW5D0U+CWZl68tFYm2V4r6UnAy4EzgYVUV6WfXf72dYUtBE4CLpN0ALCmr0sr\nIiI6QzOD413lS7/P9gzcpTSQGcCPJd0E/Bz4tu2rqBLGy0q31cHAWQC2rwTuknQ71YD8W5vcTkRE\ntEkzLY4vAz+TdCnVeMNRlDOfhmP7LmCvAcp7qMZMBlrn5GZeOyIixsawLQ7bZwPvAaYBOwHvtZ1p\n1SMiNlNDtjgk/TnV1Oq/sv1f7QkpIiI62aAtDklvB35E1dr4paQj2hZVRER0rKG6qv4OeI7t/YEX\n8sTpuBERsRkbKnE8YnsZgO1bgW3bE1JERHSyocY4+t8ydqPlcupsRERsZoZKHEvY+JaxSxuWe4Ek\njoiIzdBQt449qJ2BRETE+NDMleMREREbJHFEREQtSRwREVHLUBcAfqX8PaV94URERKcbqsXxnPL3\n2HYEEhER48NQp+NeL+l+YDtJKxvKu4Be29MHWS8iIiawQVscto+jmuDwNmDfhn8vKH8jImIzNOTs\nuOXWrvvb7pz7YEZExJhq5kZO20o6n+rGS+uBq4FTbK9qaWQREdGRmjkd9zxgEfA8YG+qrqvzWhlU\nRER0rmZaHLNt/03D8gck3dyqgCIiorM1kzgmSZpueyWApOnUvHBQ0iTgemCZ7VdL2g24FNgRuBF4\nk+3HJW1NdT/zfYD7gCNtL6mzrYiIaK1mEsDHgZskfV7SecANwEdrbucU4NaG5bOBT9gWsAY4oZSf\nAPTYngN8ehO2ExERLTZs4rB9EfBy4FfAb4BX2P5ysxuQNAs4DDi/ofilwNfL4wXAa8rjeWUZ4GvA\nwc1uJyIi2qOZrips/xb47SZu41NU9/GYCiDpKcBq2+vL88uAmeXxTKr7fmB7naQ1kqbZ7tnEbUdE\nxChrKnFsKkmvBFbYvlnSgaW4q/xr1NvwXKOuhucG1d09ZSRhRj85niO3evXksQ5hg2nTJk+Y93Si\n7Md419LEAbwIeHW55ex2wBSqsYupkiaVVscsYHmpvwzYBVguaQtgB9urh9vIqlUPtCT4zVF395Qc\nz1HQ09M518z29KydEO9pPpujayRJeMgxDkmT+t13vBbb77P9dNu7A0cB19h+I/AD4PBS7VjgivJ4\nIU9Mqng4cM2mbjsiIlpjyMRRWgQfasF2TwNOlbQImAZcUMovAHaSdBvwjlIvIiI6SDNdVTdL2s/2\nL0ayIds/BH5YHt8F7D9AnUeAI0aynYiIaK1mEsc+wE9KK2BDx63t/VoWVUREdKxmEsfbWx5FRESM\nG8MmjtLFhKTuzIgbERHDXjkuaX9Jd1PNKYWkF0j6fMsji4iIjtTMXFWfBOZSTTqI7euprs+IiIjN\nUDOJY2vbt/Yre7QVwUREROdrJnE8ImkyZeoPSXsCD7c0qoiI6FjNnFX1L8BVwNMkfQk4FHhjK4OK\niIjO1cxZVd+VZOAVVJMOfsj27S2PLCIiOlKzkxwuBX5E1V21uGXRREREx2vmdNwXA3dS3XjpW8Cd\nkl7Y6sAiIqIzNTM4/q/AG2yr3NL1DcC/tzasiIjoVM0kDmxf2/D4R60LJyIiOl0zieO/Jb2hb0HS\n64HvtS6kiIjoZIMOjktaRTUY3kV174zzy1PbUF1F/p7WhxcREZ1mqLOqXtC2KCIiYtwYNHHYvrud\ngURExPgw7HUckl4EnAXMLvW7gF7b01scW0REdKBmLgC8EPgH4AZgXWvDiYiITtdM4lht+/JNeXFJ\n2wDXAluXbX3N9pmSdgMuBXakus/Hm2w/Lmlr4CKq29XeBxxpe8mmbDsiIlqjmdNxL5H0d5KmSdq+\n718zL277EeAg23sDewFzJe0PnA18wraANcAJZZUTgJ5yoeGngY/W3aGIiGitZhLHSuDjwCrgAWBt\n+dsU2w+Vh9tQtTp6gYOopjABWAC8pjyeV5YBvgYc3Ox2IiKiPZrpqvoIcCBwo+31dTcgaRLV+Mhs\nqulL7gDWNLzWMmBmeTyTakJFbK+TtEbSNNs9dbcbERGt0UziWF5uF7tJSoLYW9IOwDeBZw1Qrbf8\n7epX3tXw3KC6u6dsangxgBzPkVu9evJYh7DBtGmTJ8x7OlH2Y7xrJnF8X9LZwGU03PlvgNvJDsn2\nHyT9EDgAeLKkSSWpzAKWl2rLgF2A5ZK2AHawvXq41161qumesxhGd/eUHM9R0NOzdqxD2KCnZ+2E\neE/z2RxdI0nCzSSOvrv9HdFQ1gvsPtyKknYCHrN9v6TtgEOorgn5AXA4VTI6FriirLKwLF9Xnr+m\nifgiIqKNmrkD4DNG8PpPBRaUcY5JwGW2r5T0O+BSSf8M3ARcUOpfAFws6Tbg98BRI9h2RES0QDNX\nju85UHkzXVW2fw08f4Dyu4D9Byh/hI1bNhER0WGa6ar6TsPjbYEZwN3ASFoiERExTtXuqpJ0MDC3\nZRFFRERHa+oOgI1sfx94aQtiiYiIcaDuGMckYF+qq8AjImIzVHeM43HgdqpTZiMiYjPU6tNxIyJi\nghnqnuMDnobbp+6V4xERMTEM1eL4zgBlvcAUYBqwRUsiioiIjjbUPcf7n4b7JOBU4CTgky2OKyIi\nOlQzZ1VtCbwFeC9wJbCP7XtaHVhERHSmIROHpGOADwK/BF5qe1E7goqIiM411OD4r4DJVInjemDL\nxgHzDI5HRGyehmpx7EA1GH5m+dt4k6WmplWPiIiJZ6jB8d3aGEdERIwTteeqioiIzVsSR0RE1JLE\nERERtSRxRERELUkcERFRSzPTqm8ySbOAi4CdgXXAF2yfI2lH4DJgV2AxcITt+8s651DdYfBBYL7t\nm1sZY0RE1NPqFsfjwKm29wT+AjhJ0jOB04CrbQu4BjgdQNJcYLbtOcCJwLktji8iImpqaeKwfW9f\ni8H2WuB3wCxgHrCgVFtQlil/Lyr1rwOmSprRyhgjIqKeto1xSNoN2Av4OTDD9gqokgswvVSbCSxt\nWO2eUhYRER2ipWMcfSRNBr4GnGJ7raTeQap2DVA2WN0NurunjCS86CfHc+RWr5481iFsMG3a5Anz\nnk6U/RjvWp44yrTsXwMutn1FKV4haYbtFZJ2BlaW8mXALg2rzwKWD7eNVaseGM2QN2vd3VNyPEdB\nT8/asQ5hg56etRPiPc1nc3SNJAm3o6vqi8Cttj/TULYQmF8ezweuaCg/BkDSAcCavi6tiIjoDK0+\nHfdFwBuAX0u6iarb6X3A2cBXJR0PLAEOB7B9paTDJN1OdTruccNt4/0f/hzr1relx21QO3dP5ajX\nvmpMY4iIaJeWfuPa/gmD35v8kEHWObnONn626GG2fcqcuqGNqvv+cNeYbj8iop1y5XhERNSSxBER\nEbUkcURERC1JHBERUUsSR0RE1JLEERERtSRxRERELUkcERFRSxJHRETUksQRERG1JHFEREQtSRwR\nEVFLEkdERNSSxBEREbUkcURERC1JHBERUUsSR0RE1JLEERERtbT6nuMXAK8CVth+binbEbgM2BVY\nDBxh+/7y3DnAXKr7jc+3fXMr44uIiPpa3eK4EHhFv7LTgKttC7gGOB1A0lxgtu05wInAuS2OLSIi\nNkFLE4ftHwOr+xXPAxaUxwvKcl/5RWW964Cpkma0Mr6IiKhvLMY4ptteAWD7XmB6KZ8JLG2od08p\ni4iIDtJJg+NdA5T1tj2KiIgYUksHxwexQtIM2ysk7QysLOXLgF0a6s0Clrc9uk2w9dZb0d09ZazD\nGDUTaV/GyurVk8c6hA2mTZs8Yd7TibIf4107EkcXG7cmFgLzgbPL3ysayk8CLpN0ALCmr0ur0z36\n6GOsWvXAWIcxKrq7p0yYfRlLPT1rxzqEDXp61k6I9zSfzdE1kiTc6tNxLwEOBJ4iaQnwAeAs4HJJ\nxwNLgMMBbF8p6TBJt1OdjntcK2OLiIhN09LEYfv1gzx1yCD1T25hOBERMQo6aXA8IiLGgSSOiIio\nJYkjIiJqSeKIiIhakjgiIqKWJI6IiKgliSMiImpJ4oiIiFrGYq6qiIjN0rp161i8+M6xDgOA7u7n\nb/K6SRwREW2yePGdnPKxhWw/dfrwlVvooftXct3XkzgiIsaF7adOZ/KO4/tWQxnjiIiIWpI4IiKi\nliSOiIioJYkjIiJqSeKIiIhakjgiIqKWJI6IiKgliSMiImrpuAsAJR0KfJoqqV1g++wxDikiIhp0\nVItD0iTgc8ArgGcDR0t65thGFRERjToqcQD7AbfZvtv2Y8ClwLwxjikiIhp0WuKYCSxtWF5WyiIi\nokN02hhH1wBlvUOt0Lv2btbzcIvCac66Keu4447bxjSG0bJ69WR6etaOdRjj3pIld/PQ/SvHOgwe\nun8lS5bcPdZhjIqJ8NnspM/FSHT19g75vdxWkg4APmj70LJ8GtCbAfKIiM7RaS2OXwJ/JmlX4P8D\nRwFHj21IERHRqKPGOGyvA04GrgJ+C1xq+3djG1VERDTqqK6qiIjofB3V4oiIiM6XxBEREbUkcURE\nRC2ddlbVoIabw0rS1sBFwD7AfcCRtpe0PdBxoIljeSzwMaoLMAE+Z/uL7Y1y/JB0AfAqYIXt5w5S\n5xxgLvAgMN/2zW0McdwY7lhK+ivgCuDOUvQN2x9qY4jjiqRZVN+LOwPrgC/YPmeAerU+n+OixdHk\nHFYnAD2251B9KX60vVGODzXmA7vU9vPLvySNoV1IdTwHJGkuMLt8Nk8Ezm1XYOPQkMeyuLbhs5mk\nMbTHgVNt7wn8BXBS///vm/L5HBeJg+bmsJoHLCiPvwYc3Mb4xpNm5wMb6Cr+GIDtHwOrh6gyj+pX\nH7avA6ZKmtGO2MabJo4l5LPZNNv39rUebK8FfsefTuNU+/M5XhJHM3NYbahTrgdZI2lae8IbV5qd\nD+y1km6W9NXS3I1N1/+Y30PmYBuJAyTdJOk7kvYc62DGC0m7AXsB1/V7qvbnc7wkjmbmsOpfp2uA\nOtHcsVwI7GZ7L+D7PNGSi01Tew62GNQNwK6296bqcv3WGMczLkiaTNUTc0ppeTSq/fkcL4ljGfD0\nhuVZwPJ+dZYCuwBI2gLYwfZwTd7N0bDH0vbq0o0F8AWqEw5i0y2jfDaLgT6/0QTba20/VB5/F9gq\nPQtDk7QlVdK42PYVA1Sp/fkcL4ljwxxW5eypo6h+FTf6NnBseXw4cE0b4xtPhj2WknZuWJwH3NrG\n+MarLgbve18IHAMbJvJcY3tFuwIbhwY9lo1975L2A7ps97QrsHHqi8Cttj8zyPO1P5/jZsqRcgrp\nZ3jiFNKzJJ0J/NL2f0raBrgY2Bv4PXCU7cVjFnAHa+JYfhh4NfAY0AO8xfaisYu4s0m6BDgQeAqw\nAvgAsDXVzM6fL3U+BxxKdbrjcbZvHJtoO9twx1LSScBbqD6bfwT+vgzoxgAkvQi4Fvg1VfdTL/A+\nYFdG8PkcN4kjIiI6w3jpqoqIiA6RxBEREbUkcURERC1JHBERUUsSR0RE1JLEERERtSRxREeTtFjS\nr/qV3TWacxSViyFXjdbr1djuhZJ+LekrAzz3A0mH1Xy99ZK2r7nOmOx7jG9JHNHpeoHJko5pw3ZG\npExZ32zdGcBrbf+57aNHuu1iU/chF3NFLePmRk6xWfsg8EFJl9h+vPEJSXcBr7R9a//l8vjLVFPs\nPw04HZgOvB7YkeoK2Z+Ul+qS9HHgZWX5pDLFd9/9Cv4B2AZ4lOr+BteVmwqdQzXx3l7APwJX9ovv\nGOBdwHrgDqr7HTxMNSXOdpJuBBYMMR3ERiSdChxJ9X/3YeCttm/p2wfgPZLmAdsC/2D7G2W9/YCz\ngCml7gds9491O6oJLfekujLbto9qJq7YvKTFEZ2uF7ieao6tt2zC+lvbfiHwOqoJGx+xvT9VIjir\nod5TgJtsPw94G/AVSVtJ2h04AzjU9r7A3wJfbVhvT+DcclOh/l/EzwY+AhxSZhr+LdXdFNcCh1HN\nCfT8ZpNGscD2/rb3Ad7Pn95057Eyc+w84POSdpI0tdQ7uuzDXwPnSdqh37qvAKbYfk55jRNrxBWb\nkbQ4otMh+KA3AAACJ0lEQVT1TXZ3BnCNpLp3I7ys/L0R2I4nvvRvAGY31HvE9n8A2L5W0kOAgJcA\nuwPXSuqLZZKk7vL4Ntu/GGTbBwHfsb2yLJ8H3DJI3WbtK+l0YBpVK2ZOv+cvKPuwSNINwAFUtwx9\nBvDdhn1YB/wZ1bxufW4BniXps8APge+MMNaYoJI4YlwoX4RXAqeycZ/842zcct6236oPl/XXS9qw\nTPXFOdTnf1LZThfwX7bn969QXq//vQ0aDXRPmPVD1B+SpK2Ay4EX275F0lN54r7wjdvs07gPt9g+\ncIDX3LXvse27SivpYKoW0YclPcf2o5sac0xM6aqK8eRM4CSe6KcHuB3YF0DSwcBQt7wc6GZffbaR\n9PryOi+hGs8wcBVwaONZXJJe0GS83wcOkzS9LL8ZuHqIeIazLbAFTySLkwaoc1yJcQ7wPKq7vf0U\nmCPpwL5K/fahq5TNBNbbXkiVoHeiatlEbCQtjuh0G36x275H0sVUX2p9zgAWSHob1YDz3QOt28Ty\nfcBekt5blo8qA/G3S3ojcIGkbamm+P4J1bjLkMoA/enA1ZLWA3ey8bjBcGczfUnSwzzRcjmMalzj\nekn3Ud2cp//+bFkG3LcD3mz7PgBJrwY+LulTVEnxDqqxjsY4/hw4q7SkJgEftn3vcPsZm59Mqx4R\nEbWkqyoiImpJ4oiIiFqSOCIiopYkjoiIqCWJIyIiakniiIiIWpI4IiKiliSOiIio5X8BHDQ2n1Hc\nZm4AAAAASUVORK5CYII=\n", 585 | "text/plain": [ 586 | "" 587 | ] 588 | }, 589 | "metadata": {}, 590 | "output_type": "display_data" 591 | } 592 | ], 593 | "source": [ 594 | "#Find label density\n", 595 | "label_counts = np.zeros(NUM_ROWS)\n", 596 | "for i in range(len(label_counts)):\n", 597 | " label_counts[i] = labels_to_array(data['tags'][i]).sum()\n", 598 | " \n", 599 | "plt.xlabel('Number of Labels')\n", 600 | "plt.ylabel('Number of Posts')\n", 601 | "plt.title('Sparsity of Dataset')\n", 602 | "plt.hist(label_counts)\n", 603 | "plt.show()" 604 | ] 605 | }, 606 | { 607 | "cell_type": "markdown", 608 | "metadata": {}, 609 | "source": [ 610 | "### 3) Data Preprocessing\n", 611 | "\n", 612 | "We will pre-process the data in the following ways\n", 613 | "\n", 614 | "1. Split it into train and test sets\n", 615 | "2. Generate a bag of words embedding from the 1000 most common words in the corpus\n", 616 | "3. Store the features and labels in their machine-friendly format" 617 | ] 618 | }, 619 | { 620 | "cell_type": "code", 621 | "execution_count": 120, 622 | "metadata": { 623 | "colab": { 624 | "autoexec": { 625 | "startup": false, 626 | "wait_interval": 0 627 | }, 628 | "output_extras": [ 629 | {} 630 | ] 631 | }, 632 | "colab_type": "code", 633 | "collapsed": false, 634 | "id": "h_SDal0khA3n", 635 | "outputId": "e6c311e5-c674-4cf2-f2dc-d6ceabfa6f83" 636 | }, 637 | "outputs": [ 638 | { 639 | "data": { 640 | "text/html": [ 641 | "\n", 642 | " \n", 643 | " \n", 650 | " " 651 | ], 652 | "text/plain": [ 653 | "" 654 | ] 655 | }, 656 | "metadata": {}, 657 | "output_type": "display_data" 658 | }, 659 | { 660 | "name": "stdout", 661 | "output_type": "stream", 662 | "text": [ 663 | "Train size: 800\n", 664 | "Test size: 200\n" 665 | ] 666 | } 667 | ], 668 | "source": [ 669 | "# Split data into train and test\n", 670 | "train_size = int(len(data) * .8)\n", 671 | "test_size = len(data)-train_size\n", 672 | "print (\"Train size: %d\" % train_size)\n", 673 | "print (\"Test size: %d\" % test_size)" 674 | ] 675 | }, 676 | { 677 | "cell_type": "code", 678 | "execution_count": 121, 679 | "metadata": { 680 | "colab": { 681 | "autoexec": { 682 | "startup": false, 683 | "wait_interval": 0 684 | } 685 | }, 686 | "colab_type": "code", 687 | "collapsed": false, 688 | "id": "anD38iilhA3r" 689 | }, 690 | "outputs": [ 691 | { 692 | "data": { 693 | "text/html": [ 694 | "\n", 695 | " \n", 696 | " \n", 703 | " " 704 | ], 705 | "text/plain": [ 706 | "" 707 | ] 708 | }, 709 | "metadata": {}, 710 | "output_type": "display_data" 711 | } 712 | ], 713 | "source": [ 714 | "train_posts = data['post'][:train_size]\n", 715 | "train_tags = data['tags'][:train_size]\n", 716 | "\n", 717 | "test_posts = data['post'][train_size:]\n", 718 | "test_tags = data['tags'][train_size:]" 719 | ] 720 | }, 721 | { 722 | "cell_type": "code", 723 | "execution_count": 122, 724 | "metadata": { 725 | "colab": { 726 | "autoexec": { 727 | "startup": false, 728 | "wait_interval": 0 729 | } 730 | }, 731 | "colab_type": "code", 732 | "collapsed": false, 733 | "id": "z4GblctFhA3u" 734 | }, 735 | "outputs": [ 736 | { 737 | "data": { 738 | "text/html": [ 739 | "\n", 740 | " \n", 741 | " \n", 748 | " " 749 | ], 750 | "text/plain": [ 751 | "" 752 | ] 753 | }, 754 | "metadata": {}, 755 | "output_type": "display_data" 756 | } 757 | ], 758 | "source": [ 759 | "max_words = 1000\n", 760 | "tokenize = text.Tokenizer(num_words=max_words, char_level=False)" 761 | ] 762 | }, 763 | { 764 | "cell_type": "code", 765 | "execution_count": 123, 766 | "metadata": { 767 | "colab": { 768 | "autoexec": { 769 | "startup": false, 770 | "wait_interval": 0 771 | } 772 | }, 773 | "colab_type": "code", 774 | "collapsed": false, 775 | "id": "YatMLCKXhA3x" 776 | }, 777 | "outputs": [ 778 | { 779 | "data": { 780 | "text/html": [ 781 | "\n", 782 | " \n", 783 | " \n", 790 | " " 791 | ], 792 | "text/plain": [ 793 | "" 794 | ] 795 | }, 796 | "metadata": {}, 797 | "output_type": "display_data" 798 | } 799 | ], 800 | "source": [ 801 | "tokenize.fit_on_texts(train_posts) # only fit on train\n", 802 | "x_train = tokenize.texts_to_matrix(train_posts)\n", 803 | "x_test = tokenize.texts_to_matrix(test_posts)" 804 | ] 805 | }, 806 | { 807 | "cell_type": "code", 808 | "execution_count": 124, 809 | "metadata": { 810 | "colab": { 811 | "autoexec": { 812 | "startup": false, 813 | "wait_interval": 0 814 | } 815 | }, 816 | "colab_type": "code", 817 | "collapsed": false, 818 | "id": "8quTsErLhA3z" 819 | }, 820 | "outputs": [ 821 | { 822 | "data": { 823 | "text/html": [ 824 | "\n", 825 | " \n", 826 | " \n", 833 | " " 834 | ], 835 | "text/plain": [ 836 | "" 837 | ] 838 | }, 839 | "metadata": {}, 840 | "output_type": "display_data" 841 | }, 842 | { 843 | "data": { 844 | "text/plain": [ 845 | "array([ 0., 0., 0., 0., 1.])" 846 | ] 847 | }, 848 | "execution_count": 124, 849 | "metadata": {}, 850 | "output_type": "execute_result" 851 | } 852 | ], 853 | "source": [ 854 | "# generate multi-label arrays\n", 855 | "y_train = np.zeros([train_size,NUM_CLASSES])\n", 856 | "for i in range(0,train_size):\n", 857 | " y_train[i] = labels_to_array(data['tags'][i])\n", 858 | "\n", 859 | "y_test = np.zeros([test_size,NUM_CLASSES])\n", 860 | "for i in range(0,test_size):\n", 861 | " y_test[i] = labels_to_array(data['tags'][i+train_size-1])\n", 862 | "y_test[0]" 863 | ] 864 | }, 865 | { 866 | "cell_type": "code", 867 | "execution_count": 125, 868 | "metadata": { 869 | "colab": { 870 | "autoexec": { 871 | "startup": false, 872 | "wait_interval": 0 873 | }, 874 | "output_extras": [ 875 | {} 876 | ] 877 | }, 878 | "colab_type": "code", 879 | "collapsed": false, 880 | "id": "XZFsdLYVhA33", 881 | "outputId": "882923f3-6705-46b5-be88-3d4fec2965f2" 882 | }, 883 | "outputs": [ 884 | { 885 | "data": { 886 | "text/html": [ 887 | "\n", 888 | " \n", 889 | " \n", 896 | " " 897 | ], 898 | "text/plain": [ 899 | "" 900 | ] 901 | }, 902 | "metadata": {}, 903 | "output_type": "display_data" 904 | }, 905 | { 906 | "name": "stdout", 907 | "output_type": "stream", 908 | "text": [ 909 | "x_train shape: (800, 1000)\n", 910 | "x_test shape: (200, 1000)\n", 911 | "y_train shape: (800, 5)\n", 912 | "y_test shape: (200, 5)\n" 913 | ] 914 | } 915 | ], 916 | "source": [ 917 | "# Inspect the dimenstions of our training and test data (this is helpful to debug)\n", 918 | "print('x_train shape:', x_train.shape)\n", 919 | "print('x_test shape:', x_test.shape)\n", 920 | "print('y_train shape:', y_train.shape)\n", 921 | "print('y_test shape:', y_test.shape)" 922 | ] 923 | }, 924 | { 925 | "cell_type": "markdown", 926 | "metadata": {}, 927 | "source": [ 928 | "### 4) Model Code" 929 | ] 930 | }, 931 | { 932 | "cell_type": "code", 933 | "execution_count": 126, 934 | "metadata": { 935 | "colab": { 936 | "autoexec": { 937 | "startup": false, 938 | "wait_interval": 0 939 | } 940 | }, 941 | "colab_type": "code", 942 | "collapsed": false, 943 | "id": "cBIkzTOZhA36" 944 | }, 945 | "outputs": [ 946 | { 947 | "data": { 948 | "text/html": [ 949 | "\n", 950 | " \n", 951 | " \n", 958 | " " 959 | ], 960 | "text/plain": [ 961 | "" 962 | ] 963 | }, 964 | "metadata": {}, 965 | "output_type": "display_data" 966 | } 967 | ], 968 | "source": [ 969 | "# This model trains very quickly and 2 epochs are already more than enough\n", 970 | "# Training for more epochs will likely lead to overfitting on this dataset\n", 971 | "# You can try tweaking these hyperparamaters when using this model with your own data\n", 972 | "batch_size = 32\n", 973 | "epochs = 2" 974 | ] 975 | }, 976 | { 977 | "cell_type": "code", 978 | "execution_count": 127, 979 | "metadata": { 980 | "colab": { 981 | "autoexec": { 982 | "startup": false, 983 | "wait_interval": 0 984 | }, 985 | "output_extras": [ 986 | {} 987 | ] 988 | }, 989 | "colab_type": "code", 990 | "collapsed": false, 991 | "id": "XdrFuwx4hA39", 992 | "outputId": "4b002559-2f06-4681-8f02-2e76e62d7a57" 993 | }, 994 | "outputs": [ 995 | { 996 | "data": { 997 | "text/html": [ 998 | "\n", 999 | " \n", 1000 | " \n", 1007 | " " 1008 | ], 1009 | "text/plain": [ 1010 | "" 1011 | ] 1012 | }, 1013 | "metadata": {}, 1014 | "output_type": "display_data" 1015 | } 1016 | ], 1017 | "source": [ 1018 | "# Build the model\n", 1019 | "model = Sequential()\n", 1020 | "model.add(Dense(512, input_shape=(max_words,)))\n", 1021 | "model.add(Activation('relu'))\n", 1022 | "model.add(Dropout(0.5))\n", 1023 | "model.add(Dense(NUM_CLASSES))\n", 1024 | "model.add(Activation('sigmoid')) #changed from softmax\n", 1025 | "\n", 1026 | "model.compile(loss='binary_crossentropy',\n", 1027 | " optimizer='adam',\n", 1028 | " metrics=['accuracy']) #changed from categorical_crossentropy" 1029 | ] 1030 | }, 1031 | { 1032 | "cell_type": "code", 1033 | "execution_count": 128, 1034 | "metadata": { 1035 | "colab": { 1036 | "autoexec": { 1037 | "startup": false, 1038 | "wait_interval": 0 1039 | }, 1040 | "output_extras": [ 1041 | {} 1042 | ] 1043 | }, 1044 | "colab_type": "code", 1045 | "collapsed": false, 1046 | "id": "rzi-9GaBhA4A", 1047 | "outputId": "9a56a130-8804-4ce0-ad47-38c4f40c81fa" 1048 | }, 1049 | "outputs": [ 1050 | { 1051 | "data": { 1052 | "text/html": [ 1053 | "\n", 1054 | " \n", 1055 | " \n", 1062 | " " 1063 | ], 1064 | "text/plain": [ 1065 | "" 1066 | ] 1067 | }, 1068 | "metadata": {}, 1069 | "output_type": "display_data" 1070 | }, 1071 | { 1072 | "name": "stdout", 1073 | "output_type": "stream", 1074 | "text": [ 1075 | "Train on 720 samples, validate on 80 samples\n", 1076 | "Epoch 1/2\n", 1077 | "720/720 [==============================] - 1s - loss: 0.4711 - acc: 0.8122 - val_loss: 0.4311 - val_acc: 0.8125\n", 1078 | "Epoch 2/2\n", 1079 | "720/720 [==============================] - 0s - loss: 0.3171 - acc: 0.8653 - val_loss: 0.3765 - val_acc: 0.8300\n" 1080 | ] 1081 | } 1082 | ], 1083 | "source": [ 1084 | "# model.fit trains the model\n", 1085 | "# The validation_split param tells Keras what % of our training data should be used in the validation set\n", 1086 | "# You can see the validation loss decreasing slowly when you run this\n", 1087 | "# Because val_loss is no longer decreasing we stop training to prevent overfitting\n", 1088 | "history = model.fit(x_train, y_train,\n", 1089 | " batch_size=batch_size,\n", 1090 | " epochs=epochs,\n", 1091 | " verbose=1,\n", 1092 | " validation_split=0.1)" 1093 | ] 1094 | }, 1095 | { 1096 | "cell_type": "code", 1097 | "execution_count": 129, 1098 | "metadata": { 1099 | "colab": { 1100 | "autoexec": { 1101 | "startup": false, 1102 | "wait_interval": 0 1103 | }, 1104 | "output_extras": [ 1105 | {} 1106 | ] 1107 | }, 1108 | "colab_type": "code", 1109 | "collapsed": false, 1110 | "id": "zjwBD8qFhA4D", 1111 | "outputId": "0dda5da5-44c4-4fbc-f2ad-01d642ca1914" 1112 | }, 1113 | "outputs": [ 1114 | { 1115 | "data": { 1116 | "text/html": [ 1117 | "\n", 1118 | " \n", 1119 | " \n", 1126 | " " 1127 | ], 1128 | "text/plain": [ 1129 | "" 1130 | ] 1131 | }, 1132 | "metadata": {}, 1133 | "output_type": "display_data" 1134 | }, 1135 | { 1136 | "name": "stdout", 1137 | "output_type": "stream", 1138 | "text": [ 1139 | " 32/200 [===>..........................] - ETA: 0sTest score: 0.530333137512\n", 1140 | "Test accuracy: 0.807000041008\n" 1141 | ] 1142 | } 1143 | ], 1144 | "source": [ 1145 | "# Evaluate the accuracy of our trained model\n", 1146 | "score = model.evaluate(x_test, y_test,\n", 1147 | " batch_size=batch_size, verbose=1)\n", 1148 | "print('Test score:', score[0])\n", 1149 | "print('Test accuracy:', score[1])" 1150 | ] 1151 | }, 1152 | { 1153 | "cell_type": "code", 1154 | "execution_count": 130, 1155 | "metadata": { 1156 | "colab": { 1157 | "autoexec": { 1158 | "startup": false, 1159 | "wait_interval": 0 1160 | }, 1161 | "output_extras": [ 1162 | {} 1163 | ] 1164 | }, 1165 | "colab_type": "code", 1166 | "collapsed": false, 1167 | "id": "f000lYoxhA4F", 1168 | "outputId": "21cd198f-1979-4b40-a2fd-891a1c0248db" 1169 | }, 1170 | "outputs": [ 1171 | { 1172 | "data": { 1173 | "text/html": [ 1174 | "\n", 1175 | " \n", 1176 | " \n", 1183 | " " 1184 | ], 1185 | "text/plain": [ 1186 | "" 1187 | ] 1188 | }, 1189 | "metadata": {}, 1190 | "output_type": "display_data" 1191 | }, 1192 | { 1193 | "name": "stdout", 1194 | "output_type": "stream", 1195 | "text": [ 1196 | "cannot cast from fragment to supportmapfragment
public class outdoorfragment extends fragment  { private googlemap googlemap; double latitude = 1.31039; double longitude = 103.7784;  public ...\n",
1197 |       "Actual label:android\n",
1198 |       "Predicted label: android\n",
1199 |       "\n",
1200 |       "bullet list (or similar to) in a linear view between textviews  i try to build a bullet list with listview and use it in my scrollable linearlayout but it doesn t expand the list (i have to scroll the ...\n",
1201 |       "Actual label:android\n",
1202 |       "Predicted label: android\n",
1203 |       "\n",
1204 |       "android slide up view panel like sms  i m new to android and i want to create a chat application in android. so far i have designed bottom of the edit text with submit button.  like blow link    \n",
1257 |        "          \n",
1264 |        "          "
1265 |       ],
1266 |       "text/plain": [
1267 |        ""
1268 |       ]
1269 |      },
1270 |      "metadata": {},
1271 |      "output_type": "display_data"
1272 |     },
1273 |     {
1274 |      "name": "stderr",
1275 |      "output_type": "stream",
1276 |      "text": [
1277 |       "mkdir: cannot create directory ‘trainer’: File exists\n"
1278 |      ]
1279 |     }
1280 |    ],
1281 |    "source": [
1282 |     "%%bash\n",
1283 |     "mkdir trainer\n",
1284 |     "touch trainer/__init__.py"
1285 |    ]
1286 |   },
1287 |   {
1288 |    "cell_type": "code",
1289 |    "execution_count": 144,
1290 |    "metadata": {
1291 |     "collapsed": false
1292 |    },
1293 |    "outputs": [
1294 |     {
1295 |      "data": {
1296 |       "text/html": [
1297 |        "\n",
1298 |        "          \n",
1299 |        "          \n",
1306 |        "          "
1307 |       ],
1308 |       "text/plain": [
1309 |        ""
1310 |       ]
1311 |      },
1312 |      "metadata": {},
1313 |      "output_type": "display_data"
1314 |     },
1315 |     {
1316 |      "name": "stdout",
1317 |      "output_type": "stream",
1318 |      "text": [
1319 |       "Overwriting trainer/task.py\n"
1320 |      ]
1321 |     }
1322 |    ],
1323 |    "source": [
1324 |     "%%writefile trainer/task.py\n",
1325 |     "\n",
1326 |     "from __future__ import absolute_import\n",
1327 |     "from __future__ import division\n",
1328 |     "from __future__ import print_function\n",
1329 |     "\n",
1330 |     "import itertools\n",
1331 |     "import argparse\n",
1332 |     "import json\n",
1333 |     "import time\n",
1334 |     "import os\n",
1335 |     "\n",
1336 |     "import numpy as np\n",
1337 |     "import pandas as pd\n",
1338 |     "import tensorflow as tf\n",
1339 |     "import google.datalab.bigquery as bq\n",
1340 |     "\n",
1341 |     "from sklearn.preprocessing import LabelBinarizer, LabelEncoder\n",
1342 |     "from sklearn.metrics import confusion_matrix\n",
1343 |     "\n",
1344 |     "from tensorflow.python.keras.models import Sequential\n",
1345 |     "from tensorflow.python.keras.layers import Dense, Activation, Dropout\n",
1346 |     "from tensorflow.python.keras.preprocessing import text, sequence\n",
1347 |     "from tensorflow.python.keras import utils\n",
1348 |     "\n",
1349 |     "from collections import Counter\n",
1350 |     "\n",
1351 |     "print(\"You have TensorFlow version\", tf.__version__)\n",
1352 |     "\n",
1353 |     "if __name__ == '__main__':\n",
1354 |     "### COMMAND LINE ARGUMENTS ###\n",
1355 |     "  parser = argparse.ArgumentParser()\n",
1356 |     "  \n",
1357 |     "  parser.add_argument(\n",
1358 |     "    '--train_batch_size', #hyperparameter\n",
1359 |     "    help='Batch size for training steps',\n",
1360 |     "    type=int,\n",
1361 |     "    default=32\n",
1362 |     "  )\n",
1363 |     "  parser.add_argument(\n",
1364 |     "    '--epochs', #hyperparamter\n",
1365 |     "    help='Number of epochs to train for',\n",
1366 |     "    type=int,\n",
1367 |     "    default=2\n",
1368 |     "  )\n",
1369 |     "  parser.add_argument(\n",
1370 |     "    '--neurons', #hyperparamter\n",
1371 |     "    help='Number of neurons in hidden layer',\n",
1372 |     "    type=int,\n",
1373 |     "    default=512\n",
1374 |     "  )\n",
1375 |     "  parser.add_argument(\n",
1376 |     "        '--output_dir',\n",
1377 |     "        help='GCS location to write checkpoints and export models',\n",
1378 |     "        required=True\n",
1379 |     "  )\n",
1380 |     "  parser.add_argument(\n",
1381 |     "          '--job-dir',\n",
1382 |     "          help='this model ignores this field, but it is required by gcloud',\n",
1383 |     "          default='junk'\n",
1384 |     "  )\n",
1385 |     "  args = parser.parse_args()\n",
1386 |     "  \n",
1387 |     "### DOWNLOAD DATA ###\n",
1388 |     "  query = \"\"\"\n",
1389 |     "  SELECT tags, TRIM(LOWER(REGEXP_REPLACE(CONCAT(title, \\' \\', body), r\\'[\\\"\\\\n\\\\\\'?,]|

|

\\',\\\" \\\"))) as post \n", 1390 | " FROM `bigquery-public-data.stackoverflow.posts_questions`\n", 1391 | " WHERE REGEXP_CONTAINS(tags, r\\\"javascript|java|c#|php|android|jquery|python\\\") \n", 1392 | " LIMIT 1000\n", 1393 | " \"\"\"\n", 1394 | "\n", 1395 | " #data = bq.Query(query).execute(output_options=bq.QueryOutput.dataframe()).result() #issues with ML Engine service account authentication\n", 1396 | " data = pd.read_csv(\"https://storage.googleapis.com/vijay-public/text_classification/results-1000.csv\")\n", 1397 | " NUM_ROWS = data.shape[0]\n", 1398 | " print(\"Loaded {} rows\".format(NUM_ROWS))\n", 1399 | " \n", 1400 | "### DATA PREPROCESSING ###\n", 1401 | " #Generate list of N most common labels\n", 1402 | " NUM_CLASSES = 5\n", 1403 | " labels_list = []\n", 1404 | "\n", 1405 | " counts = Counter('|'.join(data['tags'].tolist()).split('|'))\n", 1406 | " classes = counts.most_common(NUM_CLASSES)\n", 1407 | "\n", 1408 | " for i in range(0,NUM_CLASSES):\n", 1409 | " labels_list.append(classes[i][0])\n", 1410 | "\n", 1411 | " print (\"{} most common classes:\".format(NUM_CLASSES))\n", 1412 | " print(classes)\n", 1413 | "\n", 1414 | " #utility functions to extract classes and translate between \n", 1415 | " #human friendly (string) labels and machine friendly (array) labels\n", 1416 | "\n", 1417 | " #labels_list: A list of the valid classes\n", 1418 | " #tags: A list of tags for a post\n", 1419 | " #returns an ndarray with ones for the active classes\n", 1420 | " def labels_to_array(tags,labels_list=labels_list):\n", 1421 | " array = np.zeros(len(labels_list),dtype=np.int8)\n", 1422 | " tags = tags.split('|') #split tags from pipe separated string into list\n", 1423 | " for tag in tags:\n", 1424 | " try:\n", 1425 | " array[labels_list.index(tag)] = 1\n", 1426 | " except ValueError: \n", 1427 | " None\n", 1428 | " return array\n", 1429 | "\n", 1430 | " #translate machine readable array back to human labels\n", 1431 | " def array_to_labels(array, labels_list=labels_list, threshold = 1):\n", 1432 | " labels = []\n", 1433 | " i=0\n", 1434 | " for flag in array:\n", 1435 | " if flag >= threshold: \n", 1436 | " labels.append(labels_list[i])\n", 1437 | " i=i+1\n", 1438 | " return labels\n", 1439 | "\n", 1440 | " #split into training/test set \n", 1441 | " train_size = int(len(data) * .8)\n", 1442 | " test_size = len(data)-train_size\n", 1443 | " print (\"Train size: %d\" % train_size)\n", 1444 | " print (\"Test size: %d\" % test_size)\n", 1445 | "\n", 1446 | " train_posts = data['post'][:train_size]\n", 1447 | " train_tags = data['tags'][:train_size]\n", 1448 | "\n", 1449 | " test_posts = data['post'][train_size:]\n", 1450 | " test_tags = data['tags'][train_size:]\n", 1451 | "\n", 1452 | " #generate bag of words embedding\n", 1453 | " max_words = 1000\n", 1454 | " tokenize = text.Tokenizer(num_words=max_words, char_level=False)\n", 1455 | "\n", 1456 | " tokenize.fit_on_texts(train_posts) # only fit on train\n", 1457 | " x_train = tokenize.texts_to_matrix(train_posts)\n", 1458 | " x_test = tokenize.texts_to_matrix(test_posts)\n", 1459 | "\n", 1460 | " # generate multi-label arrays\n", 1461 | " y_train = np.zeros([train_size,NUM_CLASSES])\n", 1462 | " for i in range(0,train_size):\n", 1463 | " y_train[i] = labels_to_array(data['tags'][i])\n", 1464 | "\n", 1465 | " y_test = np.zeros([test_size,NUM_CLASSES])\n", 1466 | " for i in range(0,test_size):\n", 1467 | " y_test[i] = labels_to_array(data['tags'][i+train_size-1])\n", 1468 | " y_test[0]\n", 1469 | "\n", 1470 | " # Inspect the dimenstions of our training and test data (this is helpful to debug)\n", 1471 | " print('x_train shape:', x_train.shape)\n", 1472 | " print('x_test shape:', x_test.shape)\n", 1473 | " print('y_train shape:', y_train.shape)\n", 1474 | " print('y_test shape:', y_test.shape)\n", 1475 | "\n", 1476 | "### BUILD MODEL ###\n", 1477 | " #Set hyperparameters\n", 1478 | " batch_size = args.train_batch_size\n", 1479 | " epochs = args.epochs\n", 1480 | "\n", 1481 | "\n", 1482 | " model = Sequential()\n", 1483 | " model.add(Dense(args.neurons, input_shape=(max_words,)))\n", 1484 | " model.add(Activation('relu'))\n", 1485 | " #model.add(Dropout(0.5)) #this breaks SavedModel prediction\n", 1486 | " model.add(Dense(NUM_CLASSES))\n", 1487 | " model.add(Activation('sigmoid')) #changed from softmax\n", 1488 | "\n", 1489 | " model.compile(loss='binary_crossentropy',\n", 1490 | " optimizer='adam',\n", 1491 | " metrics=['accuracy']) #changed from categorical_crossentropy\n", 1492 | "\n", 1493 | " # model.fit trains the model\n", 1494 | " # The validation_split param tells Keras what % of our training data should be used in the validation set\n", 1495 | " # You can see the validation loss decreasing slowly when you run this\n", 1496 | " # Because val_loss is no longer decreasing we stop training to prevent overfitting\n", 1497 | "\n", 1498 | "\n", 1499 | " #Enable Tensorboard logging\n", 1500 | " Tensorboard = tf.keras.callbacks.TensorBoard(log_dir=args.output_dir + \"/tensorboard\")\n", 1501 | "\n", 1502 | " history = model.fit(x_train, y_train,\n", 1503 | " batch_size=batch_size,\n", 1504 | " epochs=epochs,\n", 1505 | " verbose=1,\n", 1506 | " validation_split=0.1,\n", 1507 | " callbacks=[Tensorboard]) #callback for Tensorboard\n", 1508 | "\n", 1509 | " # Evaluate the accuracy of our trained model\n", 1510 | " score = model.evaluate(x_test, y_test,\n", 1511 | " batch_size=batch_size, verbose=1)\n", 1512 | " print('Test score:', score[0])\n", 1513 | "\n", 1514 | "\n", 1515 | " # Here's how to generate a prediction on individual examples\n", 1516 | " for i in range(5):\n", 1517 | " prediction = model.predict(np.array([x_test[i]]))\n", 1518 | " #print(prediction[0])\n", 1519 | " #print(y_test[i])\n", 1520 | " predicted_label = array_to_labels(prediction[0],threshold=.5)\n", 1521 | " print(test_posts.iloc[i][:200], \"...\")\n", 1522 | " print('Actual label:' + '|'.join(array_to_labels(y_test[i])))\n", 1523 | " print(\"Predicted label: \" + '|'.join(predicted_label) + '\\n') \n", 1524 | "\n", 1525 | "### EXPORT MODEL ### \n", 1526 | " model_builder = tf.saved_model.builder.SavedModelBuilder(args.output_dir+\"/export/\"+time.strftime(\"%Y%m%d-%H%M%S\"))\n", 1527 | "\n", 1528 | " inputs = {'input': tf.saved_model.utils.build_tensor_info(model.input)}\n", 1529 | " outputs = {'output': tf.saved_model.utils.build_tensor_info(model.output)}\n", 1530 | "\n", 1531 | " signature_def = tf.saved_model.signature_def_utils.build_signature_def(\n", 1532 | " inputs=inputs,\n", 1533 | " outputs=outputs,\n", 1534 | " method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME\n", 1535 | " )\n", 1536 | "\n", 1537 | " model_builder.add_meta_graph_and_variables(\n", 1538 | " tf.keras.backend.get_session(),\n", 1539 | " tags=[tf.saved_model.tag_constants.SERVING],\n", 1540 | " signature_def_map={\n", 1541 | " tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature_def\n", 1542 | " }\n", 1543 | " )\n", 1544 | "\n", 1545 | " model_builder.save()\n" 1546 | ] 1547 | }, 1548 | { 1549 | "cell_type": "code", 1550 | "execution_count": 145, 1551 | "metadata": { 1552 | "collapsed": false, 1553 | "scrolled": false 1554 | }, 1555 | "outputs": [ 1556 | { 1557 | "data": { 1558 | "text/html": [ 1559 | "\n", 1560 | " \n", 1561 | " \n", 1568 | " " 1569 | ], 1570 | "text/plain": [ 1571 | "" 1572 | ] 1573 | }, 1574 | "metadata": {}, 1575 | "output_type": "display_data" 1576 | }, 1577 | { 1578 | "name": "stdout", 1579 | "output_type": "stream", 1580 | "text": [ 1581 | "You have TensorFlow version 1.4.0\n", 1582 | "Loaded 1000 rows\n", 1583 | "5 most common classes:\n", 1584 | "[('javascript', 376), ('java', 334), ('jquery', 133), ('c#', 100), ('html', 82)]\n", 1585 | "Train size: 800\n", 1586 | "Test size: 200\n", 1587 | "x_train shape: (800, 1000)\n", 1588 | "x_test shape: (200, 1000)\n", 1589 | "y_train shape: (800, 5)\n", 1590 | "y_test shape: (200, 5)\n", 1591 | "Train on 720 samples, validate on 80 samples\n", 1592 | "Epoch 1/2\n", 1593 | "\r", 1594 | " 32/720 [>.............................] - ETA: 0s - loss: 0.6860 - acc: 0.5625\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r", 1595 | "192/720 [=======>......................] - ETA: 0s - loss: 0.5539 - acc: 0.7594\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r", 1596 | "352/720 [=============>................] - ETA: 0s - loss: 0.4947 - acc: 0.7915\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r", 1597 | "512/720 [====================>.........] - ETA: 0s - loss: 0.4687 - acc: 0.8105\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r", 1598 | "672/720 [===========================>..] - ETA: 0s - loss: 0.4466 - acc: 0.8280\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r", 1599 | "720/720 [==============================] - 0s - loss: 0.4436 - acc: 0.8303 - val_loss: 0.3341 - val_acc: 0.8700\n", 1600 | "Epoch 2/2\n", 1601 | "\r", 1602 | " 32/720 [>.............................] - ETA: 0s - loss: 0.2374 - acc: 0.9313\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r", 1603 | "192/720 [=======>......................] - ETA: 0s - loss: 0.2978 - acc: 0.8906\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r", 1604 | "352/720 [=============>................] - ETA: 0s - loss: 0.2764 - acc: 0.9034\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r", 1605 | "512/720 [====================>.........] - ETA: 0s - loss: 0.2657 - acc: 0.9047\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r", 1606 | "672/720 [===========================>..] - ETA: 0s - loss: 0.2544 - acc: 0.9086\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r", 1607 | "720/720 [==============================] - 0s - loss: 0.2514 - acc: 0.9097 - val_loss: 0.2677 - val_acc: 0.9075\n", 1608 | "\r", 1609 | " 32/200 [===>..........................] - ETA: 0sTest score: 0.693889396191\n", 1610 | "update single record with an array of results a query leads to a list of results (@invoice_results - - also the name of the controller action) with two specific columns in the view (formatting remove ...\n", 1611 | "Actual label:javascript|c#\n", 1612 | "Predicted label: javascript\n", 1613 | "\n", 1614 | "jquery and mouseevents i have a question regarding mouse events in the jquery library. i have a simple javascript function as following:
$(function() {     var xpos;     var ypos;     $( ...\n",
1615 |       "Actual label:\n",
1616 |       "Predicted label: javascript\n",
1617 |       "\n",
1618 |       "drag and drop using raphael.js has laggy performance with more than 10 draggable elements  i m making a simple html5 app  that will be wrapped to be used on android  ios and web browsers. in my app i  ...\n",
1619 |       "Actual label:javascript|jquery|html\n",
1620 |       "Predicted label: javascript|jquery\n",
1621 |       "\n",
1622 |       "event logging in asp.net  i am using vs2005 c# .net 2.0 and sql server 2005.    are there any websites that provide step by step instructions to implement event logging for my web application     scrolltop from previous page. eg: i am in list page and scroll down to end of the page and clicked on a record to v ...\n",
1627 |       "Actual label:c#\n",
1628 |       "Predicted label: javascript\n",
1629 |       "\n"
1630 |      ]
1631 |     },
1632 |     {
1633 |      "name": "stderr",
1634 |      "output_type": "stream",
1635 |      "text": [
1636 |       "2017-11-14 19:55:03.220977: I tensorflow/core/platform/cpu_feature_guard.cc:137] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA\n"
1637 |      ]
1638 |     }
1639 |    ],
1640 |    "source": [
1641 |     "%%bash\n",
1642 |     "gcloud ml-engine local train \\\n",
1643 |     "   --module-name=trainer.task \\\n",
1644 |     "   --package-path=trainer \\\n",
1645 |     "   -- \\\n",
1646 |     "   --output_dir='./output'"
1647 |    ]
1648 |   },
1649 |   {
1650 |    "cell_type": "code",
1651 |    "execution_count": 133,
1652 |    "metadata": {
1653 |     "collapsed": false
1654 |    },
1655 |    "outputs": [
1656 |     {
1657 |      "data": {
1658 |       "text/html": [
1659 |        "\n",
1660 |        "          \n",
1661 |        "          \n",
1668 |        "          "
1669 |       ],
1670 |       "text/plain": [
1671 |        ""
1672 |       ]
1673 |      },
1674 |      "metadata": {},
1675 |      "output_type": "display_data"
1676 |     }
1677 |    ],
1678 |    "source": [
1679 |     "GCS_BUCKET = 'gs://vijays-sandbox-ml' #CHANGE THIS TO YOUR BUCKET\n",
1680 |     "PROJECT = 'vijays-sandbox' #CHANGE THIS TO YOUR PROJECT ID\n",
1681 |     "REGION = 'us-central1' #OPTIONALLY CHANGE THIS"
1682 |    ]
1683 |   },
1684 |   {
1685 |    "cell_type": "code",
1686 |    "execution_count": 134,
1687 |    "metadata": {
1688 |     "collapsed": false
1689 |    },
1690 |    "outputs": [
1691 |     {
1692 |      "data": {
1693 |       "text/html": [
1694 |        "\n",
1695 |        "          \n",
1696 |        "          \n",
1703 |        "          "
1704 |       ],
1705 |       "text/plain": [
1706 |        ""
1707 |       ]
1708 |      },
1709 |      "metadata": {},
1710 |      "output_type": "display_data"
1711 |     }
1712 |    ],
1713 |    "source": [
1714 |     "import os\n",
1715 |     "os.environ['GCS_BUCKET'] = GCS_BUCKET\n",
1716 |     "os.environ['PROJECT'] = PROJECT\n",
1717 |     "os.environ['REGION'] = REGION"
1718 |    ]
1719 |   },
1720 |   {
1721 |    "cell_type": "markdown",
1722 |    "metadata": {},
1723 |    "source": [
1724 |     "#### Configuration file for hyperparameter tuning\n",
1725 |     "\n",
1726 |     "Here I specify\n",
1727 |     "\n",
1728 |     "1. Which hyperparamters i wish to tune\n",
1729 |     "2. What min and max range I want to tune between\n",
1730 |     "3. What success metric i want to evaluate against\n",
1731 |     "\n",
1732 |     "Note that the hyperparameter tuner passes values to tensorflow via the command line, so any hyperparameter I wish to tune must be exposed as a command line argument in my code"
1733 |    ]
1734 |   },
1735 |   {
1736 |    "cell_type": "code",
1737 |    "execution_count": 148,
1738 |    "metadata": {
1739 |     "collapsed": false
1740 |    },
1741 |    "outputs": [
1742 |     {
1743 |      "data": {
1744 |       "text/html": [
1745 |        "\n",
1746 |        "          \n",
1747 |        "          \n",
1754 |        "          "
1755 |       ],
1756 |       "text/plain": [
1757 |        ""
1758 |       ]
1759 |      },
1760 |      "metadata": {},
1761 |      "output_type": "display_data"
1762 |     },
1763 |     {
1764 |      "name": "stdout",
1765 |      "output_type": "stream",
1766 |      "text": [
1767 |       "Overwriting config.yaml\n"
1768 |      ]
1769 |     }
1770 |    ],
1771 |    "source": [
1772 |     "%%writefile config.yaml\n",
1773 |     "trainingInput:\n",
1774 |     "  hyperparameters:\n",
1775 |     "    goal: MAXIMIZE\n",
1776 |     "    hyperparameterMetricTag: val_acc\n",
1777 |     "    maxTrials: 5\n",
1778 |     "    maxParallelTrials: 1\n",
1779 |     "    params:\n",
1780 |     "    - parameterName: neurons\n",
1781 |     "      type: INTEGER\n",
1782 |     "      minValue: 10\n",
1783 |     "      maxValue: 1000\n",
1784 |     "      scaleType: UNIT_LINEAR_SCALE"
1785 |    ]
1786 |   },
1787 |   {
1788 |    "cell_type": "markdown",
1789 |    "metadata": {},
1790 |    "source": [
1791 |     "#### Upgrade ML Engine Tensorflow Version\n",
1792 |     "\n",
1793 |     "Our code requires TF 1.4, however TF 1.4 is not yet a pre-packaged runtime for ML Engine. However we can force the installation of TF 1.4 by specifying it as a PyPi dependency as documented [here](https://cloud.google.com/ml-engine/docs/versioning#specifying_custom_versions_of_tensorflow_for_training)"
1794 |    ]
1795 |   },
1796 |   {
1797 |    "cell_type": "code",
1798 |    "execution_count": 136,
1799 |    "metadata": {
1800 |     "collapsed": false
1801 |    },
1802 |    "outputs": [
1803 |     {
1804 |      "data": {
1805 |       "text/html": [
1806 |        "\n",
1807 |        "          \n",
1808 |        "          \n",
1815 |        "          "
1816 |       ],
1817 |       "text/plain": [
1818 |        ""
1819 |       ]
1820 |      },
1821 |      "metadata": {},
1822 |      "output_type": "display_data"
1823 |     },
1824 |     {
1825 |      "name": "stdout",
1826 |      "output_type": "stream",
1827 |      "text": [
1828 |       "Overwriting setup.py\n"
1829 |      ]
1830 |     }
1831 |    ],
1832 |    "source": [
1833 |     "%%writefile setup.py\n",
1834 |     "from setuptools import find_packages\n",
1835 |     "from setuptools import setup\n",
1836 |     "\n",
1837 |     "REQUIRED_PACKAGES = ['tensorflow==1.4','datalab']\n",
1838 |     "\n",
1839 |     "setup(\n",
1840 |     "    name='trainer',\n",
1841 |     "    version='0.1',\n",
1842 |     "    install_requires=REQUIRED_PACKAGES,\n",
1843 |     "    packages=find_packages(),\n",
1844 |     "    include_package_data=True,\n",
1845 |     "    description='Text Classification Trainer Application')"
1846 |    ]
1847 |   },
1848 |   {
1849 |    "cell_type": "markdown",
1850 |    "metadata": {},
1851 |    "source": [
1852 |     "#### Queue ML Engine Training Job\n",
1853 |     "\n",
1854 |     "We use the gcloud command line tool to do so"
1855 |    ]
1856 |   },
1857 |   {
1858 |    "cell_type": "code",
1859 |    "execution_count": 147,
1860 |    "metadata": {
1861 |     "collapsed": false
1862 |    },
1863 |    "outputs": [
1864 |     {
1865 |      "data": {
1866 |       "text/html": [
1867 |        "\n",
1868 |        "          \n",
1869 |        "          \n",
1876 |        "          "
1877 |       ],
1878 |       "text/plain": [
1879 |        ""
1880 |       ]
1881 |      },
1882 |      "metadata": {},
1883 |      "output_type": "display_data"
1884 |     },
1885 |     {
1886 |      "name": "stdout",
1887 |      "output_type": "stream",
1888 |      "text": [
1889 |       "jobId: text_classification_171114_195631\n",
1890 |       "state: QUEUED\n"
1891 |      ]
1892 |     },
1893 |     {
1894 |      "name": "stderr",
1895 |      "output_type": "stream",
1896 |      "text": [
1897 |       "Job [text_classification_171114_195631] submitted successfully.\n",
1898 |       "Your job is still active. You may view the status of your job with the command\n",
1899 |       "\n",
1900 |       "  $ gcloud ml-engine jobs describe text_classification_171114_195631\n",
1901 |       "\n",
1902 |       "or continue streaming the logs with the command\n",
1903 |       "\n",
1904 |       "  $ gcloud ml-engine jobs stream-logs text_classification_171114_195631\n"
1905 |      ]
1906 |     }
1907 |    ],
1908 |    "source": [
1909 |     "%%bash\n",
1910 |     "JOBNAME=text_classification_$(date -u +%y%m%d_%H%M%S)\n",
1911 |     "\n",
1912 |     "gcloud ml-engine jobs submit training $JOBNAME \\\n",
1913 |     "   --region=$REGION \\\n",
1914 |     "   --runtime-version=1.2 \\\n",
1915 |     "   --module-name=trainer.task \\\n",
1916 |     "   --package-path=trainer \\\n",
1917 |     "   --job-dir=$GCS_BUCKET/$JOBNAME/ \\\n",
1918 |     "   --config config.yaml \\\n",
1919 |     "   -- \\\n",
1920 |     "   --output_dir=$GCS_BUCKET/$JOBNAME/output"
1921 |    ]
1922 |   },
1923 |   {
1924 |    "cell_type": "markdown",
1925 |    "metadata": {
1926 |     "collapsed": true
1927 |    },
1928 |    "source": [
1929 |     "### 7) Inspect Results Using TensorBoard"
1930 |    ]
1931 |   },
1932 |   {
1933 |    "cell_type": "code",
1934 |    "execution_count": 89,
1935 |    "metadata": {
1936 |     "collapsed": false
1937 |    },
1938 |    "outputs": [
1939 |     {
1940 |      "data": {
1941 |       "text/html": [
1942 |        "\n",
1943 |        "          \n",
1944 |        "          \n",
1951 |        "          "
1952 |       ],
1953 |       "text/plain": [
1954 |        ""
1955 |       ]
1956 |      },
1957 |      "metadata": {},
1958 |      "output_type": "display_data"
1959 |     },
1960 |     {
1961 |      "data": {
1962 |       "text/html": [
1963 |        "

TensorBoard was started successfully with pid 7530. Click here to access it.

" 1964 | ] 1965 | }, 1966 | "metadata": {}, 1967 | "output_type": "display_data" 1968 | }, 1969 | { 1970 | "data": { 1971 | "text/plain": [ 1972 | "7530" 1973 | ] 1974 | }, 1975 | "execution_count": 89, 1976 | "metadata": {}, 1977 | "output_type": "execute_result" 1978 | } 1979 | ], 1980 | "source": [ 1981 | "from google.datalab.ml import TensorBoard\n", 1982 | "TensorBoard().start('output')" 1983 | ] 1984 | }, 1985 | { 1986 | "cell_type": "markdown", 1987 | "metadata": {}, 1988 | "source": [ 1989 | "If you're running from a jupyter notebook on your laptop/workstation (as opposed to datalab) you can use the below command instead to launch tensorboard\n", 1990 | "\n", 1991 | "When you're done with tensorboard interrupt the kernel (using the Jupyter menu bar) to quit" 1992 | ] 1993 | }, 1994 | { 1995 | "cell_type": "code", 1996 | "execution_count": null, 1997 | "metadata": { 1998 | "collapsed": false 1999 | }, 2000 | "outputs": [], 2001 | "source": [ 2002 | "#alternative tensorboard command for outside datalab\n", 2003 | "\n", 2004 | "#!tensorboard --logdir=output/" 2005 | ] 2006 | }, 2007 | { 2008 | "cell_type": "markdown", 2009 | "metadata": { 2010 | "collapsed": true 2011 | }, 2012 | "source": [ 2013 | "### 8) Deploy For Prediction\n", 2014 | "\n", 2015 | "Cloud ML Engine has a prediction service that will wrap our tensorflow model with a REST API and allow remote clients to get predictions.\n", 2016 | "\n", 2017 | "You can deploy the model from the Google Cloud Console GUI, or you can use the gcloud command line tool. We will use the latter method." 2018 | ] 2019 | }, 2020 | { 2021 | "cell_type": "code", 2022 | "execution_count": null, 2023 | "metadata": { 2024 | "collapsed": false 2025 | }, 2026 | "outputs": [], 2027 | "source": [ 2028 | "%%bash\n", 2029 | "MODEL_NAME=\"text_classification\"\n", 2030 | "MODEL_VERSION=\"v1_1000_word_embedding\"\n", 2031 | "MODEL_LOCATION=\"output/export/20171110-135219\" #REPLACE this with the location of your model\n", 2032 | "\n", 2033 | "#gcloud ml-engine versions delete ${MODEL_VERSION} --model ${MODEL_NAME} #Uncomment to overwrite existing version\n", 2034 | "#gcloud ml-engine models delete ${MODEL_NAME} #Uncomment to overwrite existing model\n", 2035 | "gcloud ml-engine models create ${MODEL_NAME} --regions $REGION\n", 2036 | "gcloud ml-engine versions create ${MODEL_VERSION} --model ${MODEL_NAME} --origin ${MODEL_LOCATION} --staging-bucket=$GCS_BUCKET" 2037 | ] 2038 | }, 2039 | { 2040 | "cell_type": "markdown", 2041 | "metadata": {}, 2042 | "source": [ 2043 | "### 9) Get Predictions\n", 2044 | "There are two flavors of the ML Engine Prediction Service: Batch and online.\n", 2045 | "\n", 2046 | "Online prediction is more appropriate for latency sensitive requests as results are returned quickly and synchronously.\n", 2047 | "\n", 2048 | "Batch prediction is more appropriate for large prediction requests that you only need to run a few times a day.\n", 2049 | "\n", 2050 | "Below we define a function that takes care of\n", 2051 | "1. Authenticating to the Google Cloud API\n", 2052 | "2. Converting our post text to the vector embedding the model was trained on\n", 2053 | "3. Passes this embedding in JSON format, which is what the API expects" 2054 | ] 2055 | }, 2056 | { 2057 | "cell_type": "code", 2058 | "execution_count": null, 2059 | "metadata": { 2060 | "collapsed": true 2061 | }, 2062 | "outputs": [], 2063 | "source": [ 2064 | "def predict_json(project, model, post, version=None):\n", 2065 | " \"\"\"Send json data to a deployed model for prediction.\n", 2066 | "\n", 2067 | " Args:\n", 2068 | " project (str): project where the Cloud ML Engine Model is deployed.\n", 2069 | " model (str): model name.\n", 2070 | " post: str, the text you want to classify.\n", 2071 | " version: str, version of the model to target.\n", 2072 | " Returns:\n", 2073 | " Mapping[str: any]: dictionary of prediction results defined by the\n", 2074 | " model.\n", 2075 | " \"\"\"\n", 2076 | "\n", 2077 | " # Convert post to vector embedding\n", 2078 | " instances = tokenize.texts_to_matrix([post]).tolist()\n", 2079 | " # Authenticate\n", 2080 | " # GOOGLE_APPLICATION_CREDENTIALS=\n", 2081 | " # OR: gcloud auth application-default login\n", 2082 | " service = googleapiclient.discovery.build('ml', 'v1')\n", 2083 | " \n", 2084 | " name = 'projects/{}/models/{}'.format(project, model)\n", 2085 | "\n", 2086 | " if version is not None:\n", 2087 | " name += '/versions/{}'.format(version)\n", 2088 | "\n", 2089 | " response = service.projects().predict(\n", 2090 | " name=name,\n", 2091 | " body={'instances': instances}\n", 2092 | " ).execute()\n", 2093 | "\n", 2094 | " if 'error' in response:\n", 2095 | " raise RuntimeError(response['error'])\n", 2096 | "\n", 2097 | " return response['predictions']" 2098 | ] 2099 | }, 2100 | { 2101 | "cell_type": "markdown", 2102 | "metadata": {}, 2103 | "source": [ 2104 | "Now we'll call the prediction function and get results! Try modifying the post text to see how it affects the label scores. Does it behave how you would expect?" 2105 | ] 2106 | }, 2107 | { 2108 | "cell_type": "code", 2109 | "execution_count": null, 2110 | "metadata": { 2111 | "collapsed": false 2112 | }, 2113 | "outputs": [], 2114 | "source": [ 2115 | "POST = \"java is my world\"\n", 2116 | "MODEL = \"text_classification\"\n", 2117 | "VERSION = \"v1_1000_word_embedding\"\n", 2118 | "\n", 2119 | "response = predict_json(PROJECT,MODEL,POST,VERSION)[0].get('output')\n", 2120 | "\n", 2121 | "print(\"Post: {} \\nLabel Scores: {} \\nLabels:{} \\nLabels above threshold:{}\".format(\n", 2122 | " POST,response,labels_list,array_to_labels(response,threshold=0.5)))" 2123 | ] 2124 | }, 2125 | { 2126 | "cell_type": "code", 2127 | "execution_count": null, 2128 | "metadata": { 2129 | "collapsed": true 2130 | }, 2131 | "outputs": [], 2132 | "source": [] 2133 | } 2134 | ], 2135 | "metadata": { 2136 | "colab": { 2137 | "default_view": {}, 2138 | "name": "josh3.ipynb", 2139 | "provenance": [], 2140 | "version": "0.3.2", 2141 | "views": {} 2142 | }, 2143 | "kernelspec": { 2144 | "display_name": "Python 2", 2145 | "language": "python", 2146 | "name": "python2" 2147 | }, 2148 | "language_info": { 2149 | "codemirror_mode": { 2150 | "name": "ipython", 2151 | "version": 2 2152 | }, 2153 | "file_extension": ".py", 2154 | "mimetype": "text/x-python", 2155 | "name": "python", 2156 | "nbconvert_exporter": "python", 2157 | "pygments_lexer": "ipython2", 2158 | "version": "2.7.12" 2159 | } 2160 | }, 2161 | "nbformat": 4, 2162 | "nbformat_minor": 1 2163 | } 2164 | --------------------------------------------------------------------------------