├── package.json ├── .gitignore ├── requirements.txt ├── handler.py ├── upload.py ├── LICENSE ├── README.md ├── serverless.yml ├── infer.py ├── train.py └── census_data.py /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "tflambdademo", 3 | "description": "", 4 | "version": "0.1.0", 5 | "dependencies": {}, 6 | "devDependencies": { 7 | "serverless-python-requirements": "^4.3.0" 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Distribution / packaging 2 | .Python 3 | *.pyc 4 | env/ 5 | build/ 6 | develop-eggs/ 7 | dist/ 8 | downloads/ 9 | eggs/ 10 | .eggs/ 11 | lib/ 12 | lib64/ 13 | parts/ 14 | sdist/ 15 | var/ 16 | *.egg-info/ 17 | .installed.cfg 18 | *.egg 19 | *.zip 20 | 21 | # Serverless directories 22 | .serverless 23 | 24 | # node 25 | node_modules 26 | 27 | # data files 28 | *.data 29 | *.test 30 | model_* 31 | model.tar.gz 32 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.7.1 2 | astor==0.7.1 3 | certifi==2019.3.9 4 | chardet==3.0.4 5 | gast==0.2.2 6 | grpcio==1.20.1 7 | h5py==2.9.0 8 | idna==2.8 9 | Keras-Applications==1.0.7 10 | Keras-Preprocessing==1.0.9 11 | Markdown==3.1 12 | mock==2.0.0 13 | numpy==1.16.3 14 | pbr==5.2.0 15 | protobuf==3.7.1 16 | six==1.12.0 17 | tensorboard==1.13.1 18 | tensorflow==1.13.1 19 | tensorflow-estimator==1.13.0 20 | termcolor==1.1.0 21 | urllib3==1.24.2 22 | Werkzeug==0.15.2 23 | -------------------------------------------------------------------------------- /handler.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | 4 | def hello(event, context): 5 | body = { 6 | "message": "Go Serverless v1.0! Your function executed successfully!", 7 | "input": event 8 | } 9 | 10 | response = { 11 | "statusCode": 200, 12 | "body": json.dumps(body) 13 | } 14 | 15 | return response 16 | 17 | # Use this code if you don't use the http event with the LAMBDA-PROXY 18 | # integration 19 | """ 20 | return { 21 | "message": "Go Serverless v1.0! Your function executed successfully!", 22 | "event": event 23 | } 24 | """ 25 | -------------------------------------------------------------------------------- /upload.py: -------------------------------------------------------------------------------- 1 | try: 2 | import unzip_requirements 3 | except ImportError: 4 | pass 5 | 6 | import os 7 | import json 8 | import time 9 | 10 | import boto3 11 | import tensorflow as tf 12 | 13 | import census_data 14 | 15 | FILE_DIR = '/tmp/' 16 | BUCKET = os.environ['BUCKET'] 17 | 18 | 19 | def uploadHandler(event, context): 20 | # Download data to local tmp directory 21 | census_data.download(FILE_DIR) 22 | 23 | # Upload files to S3 24 | epoch_now = str(int(time.time())) 25 | 26 | boto3.Session( 27 | ).resource('s3' 28 | ).Bucket(BUCKET 29 | ).Object(os.path.join(epoch_now,census_data.TRAINING_FILE) 30 | ).upload_file(FILE_DIR+census_data.TRAINING_FILE) 31 | 32 | boto3.Session( 33 | ).resource('s3' 34 | ).Bucket(BUCKET 35 | ).Object(os.path.join(epoch_now,census_data.EVAL_FILE) 36 | ).upload_file(FILE_DIR+census_data.EVAL_FILE) 37 | 38 | response = { 39 | "statusCode": 200, 40 | "body": json.dumps({'epoch': epoch_now}) 41 | } 42 | 43 | return response 44 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Michael Moritz 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Serverless Machine Learning on AWS Lambda with TensorFlow 2 | 3 | Configured to deploy a TensorFlow model to AWS Lambda using the Serverless framework. 4 | 5 | by: Mike Moritz 6 | 7 | More info here: [https://coderecipe.ai/architectures/16924675](https://coderecipe.ai/architectures/16924675) 8 | 9 | ### Prerequisites 10 | 11 | #### Setup serverless 12 | 13 | ``` 14 | npm install serverless 15 | 16 | serverless plugin install -n serverless-python-requirements 17 | 18 | pip install -r requirements.txt 19 | 20 | ``` 21 | #### Setup AWS credentials 22 | 23 | Make sure you have AWS access key and secrete keys setup locally, following this video [here](https://www.youtube.com/watch?v=KngM5bfpttA) 24 | 25 | ### Download the code locally 26 | 27 | ``` 28 | serverless create --template-url https://github.com/mikepm35/TfLambdaDemo --path tf-lambda 29 | ``` 30 | 31 | ### Update S3 bucket to unique name 32 | In serverless.yml: 33 | ``` 34 | environment: 35 | BUCKET: 36 | ``` 37 | 38 | ### Deploy to the cloud 39 | 40 | ``` 41 | cd tf-lambda 42 | 43 | npm install 44 | 45 | serverless deploy --stage 46 | ``` 47 | -------------------------------------------------------------------------------- /serverless.yml: -------------------------------------------------------------------------------- 1 | service: tflambdademo 2 | 3 | provider: 4 | name: aws 5 | region: us-east-1 6 | runtime: python3.6 7 | stage: dev 8 | 9 | iamRoleStatements: 10 | - Effect: Allow 11 | Action: 12 | - s3:* 13 | Resource: 14 | Fn::Join: 15 | - "" 16 | - - "arn:aws:s3:::" 17 | - ${self:provider.environment.BUCKET} 18 | - "/*" 19 | 20 | environment: 21 | BUCKET: tflambdademo 22 | 23 | functions: 24 | upload: 25 | handler: upload.uploadHandler 26 | timeout: 30 27 | events: 28 | - http: 29 | path: upload 30 | method: post 31 | 32 | train: 33 | handler: train.trainHandler 34 | memorySize: 3008 35 | timeout: 30 36 | events: 37 | - http: 38 | path: train 39 | method: post 40 | 41 | infer: 42 | handler: infer.inferHandler 43 | timeout: 30 44 | events: 45 | - http: 46 | path: infer 47 | method: post 48 | 49 | plugins: 50 | - serverless-python-requirements 51 | 52 | custom: 53 | pythonRequirements: 54 | dockerizePip: true 55 | zip: true 56 | slim: true 57 | noDeploy: 58 | - boto3 59 | - botocore 60 | - docutils 61 | - jmespath 62 | - pip 63 | - python-dateutil 64 | - s3transfer 65 | - setuptools 66 | - six 67 | - tensorboard 68 | 69 | resources: 70 | Resources: 71 | SageBucket: 72 | Type: AWS::S3::Bucket 73 | Properties: 74 | BucketName: ${self:provider.environment.BUCKET} 75 | -------------------------------------------------------------------------------- /infer.py: -------------------------------------------------------------------------------- 1 | try: 2 | import unzip_requirements 3 | except ImportError: 4 | pass 5 | 6 | import json 7 | import os 8 | import tarfile 9 | 10 | import boto3 11 | import tensorflow as tf 12 | import numpy as np 13 | 14 | import census_data 15 | 16 | FILE_DIR = '/tmp/' 17 | BUCKET = os.environ['BUCKET'] 18 | 19 | 20 | def _easy_input_function(data_dict, batch_size=64): 21 | """ 22 | data_dict = { 23 | '': ['', ''] 24 | '': ['', ''] 25 | ... 26 | } 27 | """ 28 | 29 | # Convert input data to numpy arrays 30 | for col in data_dict: 31 | col_ind = census_data._CSV_COLUMNS.index(col) 32 | dtype = type(census_data._CSV_COLUMN_DEFAULTS[col_ind][0]) 33 | data_dict[col] = np.array(data_dict[col], 34 | dtype=dtype) 35 | 36 | labels = data_dict.pop('income_bracket') 37 | 38 | ds = tf.data.Dataset.from_tensor_slices((data_dict, labels)) 39 | ds = ds.batch(64) 40 | 41 | return ds 42 | 43 | 44 | def inferHandler(event, context): 45 | body = json.loads(event.get('body')) 46 | 47 | # Read in prediction data as dictionary 48 | # Keys should match _CSV_COLUMNS, values should be lists 49 | predict_input = body['input'] 50 | 51 | # Read in epoch 52 | epoch_files = body['epoch'] 53 | 54 | # Download model from S3 and extract 55 | boto3.Session( 56 | ).resource('s3' 57 | ).Bucket(BUCKET 58 | ).download_file( 59 | os.path.join(epoch_files,'model.tar.gz'), 60 | FILE_DIR+'model.tar.gz') 61 | 62 | tarfile.open(FILE_DIR+'model.tar.gz', 'r').extractall(FILE_DIR) 63 | 64 | # Create feature columns 65 | wide_cols, deep_cols = census_data.build_model_columns() 66 | 67 | # Load model 68 | classifier = tf.estimator.LinearClassifier( 69 | feature_columns=wide_cols, 70 | model_dir=FILE_DIR+'tmp/model_'+epoch_files+'/', 71 | warm_start_from=FILE_DIR+'tmp/model_'+epoch_files+'/') 72 | 73 | # Setup prediction 74 | predict_iter = classifier.predict( 75 | lambda:_easy_input_function(predict_input)) 76 | 77 | # Iterate over prediction and convert to lists 78 | predictions = [] 79 | for prediction in predict_iter: 80 | for key in prediction: 81 | prediction[key] = prediction[key].tolist() 82 | 83 | predictions.append(prediction) 84 | 85 | response = { 86 | "statusCode": 200, 87 | "body": json.dumps(predictions, 88 | default=lambda x: x.decode('utf-8')) 89 | } 90 | 91 | return response 92 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | try: 2 | import unzip_requirements 3 | except ImportError: 4 | pass 5 | 6 | import os 7 | import json 8 | import time 9 | import functools 10 | import tarfile 11 | 12 | import boto3 13 | import tensorflow as tf 14 | 15 | import census_data 16 | 17 | FILE_DIR = '/tmp/' 18 | BUCKET = os.environ['BUCKET'] 19 | # FILE_DIR = './' 20 | # BUCKET = 'tflambdademo' 21 | 22 | 23 | def trainHandler(event, context): 24 | time_start = time.time() 25 | 26 | body = json.loads(event.get('body')) 27 | 28 | # Read in epoch 29 | epoch_files = body['epoch'] 30 | 31 | # Download files from S3 32 | boto3.Session( 33 | ).resource('s3' 34 | ).Bucket(BUCKET 35 | ).download_file( 36 | os.path.join(epoch_files,census_data.TRAINING_FILE), 37 | FILE_DIR+census_data.TRAINING_FILE) 38 | 39 | boto3.Session( 40 | ).resource('s3' 41 | ).Bucket(BUCKET 42 | ).download_file( 43 | os.path.join(epoch_files,census_data.EVAL_FILE), 44 | FILE_DIR+census_data.EVAL_FILE) 45 | 46 | # Create feature columns 47 | wide_cols, deep_cols = census_data.build_model_columns() 48 | 49 | # Setup estimator 50 | classifier = tf.estimator.LinearClassifier( 51 | feature_columns=wide_cols, 52 | model_dir=FILE_DIR+'model_'+epoch_files+'/') 53 | 54 | # Create callable input function and execute train 55 | train_inpf = functools.partial( 56 | census_data.input_fn, 57 | FILE_DIR+census_data.TRAINING_FILE, 58 | num_epochs=2, shuffle=True, 59 | batch_size=64) 60 | 61 | classifier.train(train_inpf) 62 | 63 | # Create callable input function and execute evaluation 64 | test_inpf = functools.partial( 65 | census_data.input_fn, 66 | FILE_DIR+census_data.EVAL_FILE, 67 | num_epochs=1, shuffle=False, 68 | batch_size=64) 69 | 70 | result = classifier.evaluate(test_inpf) 71 | print('Evaluation result: %s' % result) 72 | 73 | # Zip up model files and store in s3 74 | with tarfile.open(FILE_DIR+'model.tar.gz', mode='w:gz') as arch: 75 | arch.add(FILE_DIR+'model_'+epoch_files+'/', recursive=True) 76 | 77 | boto3.Session( 78 | ).resource('s3' 79 | ).Bucket(BUCKET 80 | ).Object(os.path.join(epoch_files,'model.tar.gz') 81 | ).upload_file(FILE_DIR+'model.tar.gz') 82 | 83 | 84 | # Convert result from float32 for json serialization 85 | for key in result: 86 | result[key] = result[key].item() 87 | 88 | response = { 89 | "statusCode": 200, 90 | "body": json.dumps({'epoch': epoch_files, 91 | 'runtime': round(time.time()-time_start, 1), 92 | 'result': result}) 93 | } 94 | 95 | return response 96 | -------------------------------------------------------------------------------- /census_data.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Download and clean the Census Income Dataset.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import os 22 | import sys 23 | 24 | # pylint: disable=wrong-import-order 25 | from absl import app as absl_app 26 | from absl import flags 27 | from six.moves import urllib 28 | import tensorflow as tf 29 | # pylint: enable=wrong-import-order 30 | 31 | 32 | DATA_URL = 'https://archive.ics.uci.edu/ml/machine-learning-databases/adult' 33 | TRAINING_FILE = 'adult.data' 34 | TRAINING_URL = '%s/%s' % (DATA_URL, TRAINING_FILE) 35 | EVAL_FILE = 'adult.test' 36 | EVAL_URL = '%s/%s' % (DATA_URL, EVAL_FILE) 37 | 38 | 39 | _CSV_COLUMNS = [ 40 | 'age', 'workclass', 'fnlwgt', 'education', 'education_num', 41 | 'marital_status', 'occupation', 'relationship', 'race', 'gender', 42 | 'capital_gain', 'capital_loss', 'hours_per_week', 'native_country', 43 | 'income_bracket' 44 | ] 45 | 46 | _CSV_COLUMN_DEFAULTS = [[0], [''], [0], [''], [0], [''], [''], [''], [''], [''], 47 | [0], [0], [0], [''], ['']] 48 | 49 | _HASH_BUCKET_SIZE = 1000 50 | 51 | _NUM_EXAMPLES = { 52 | 'train': 32561, 53 | 'validation': 16281, 54 | } 55 | 56 | 57 | def _download_and_clean_file(filename, url): 58 | """Downloads data from url, and makes changes to match the CSV format.""" 59 | temp_file, _ = urllib.request.urlretrieve(url) 60 | with tf.gfile.Open(temp_file, 'r') as temp_eval_file: 61 | with tf.gfile.Open(filename, 'w') as eval_file: 62 | for line in temp_eval_file: 63 | line = line.strip() 64 | line = line.replace(', ', ',') 65 | if not line or ',' not in line: 66 | continue 67 | if line[-1] == '.': 68 | line = line[:-1] 69 | line += '\n' 70 | eval_file.write(line) 71 | tf.gfile.Remove(temp_file) 72 | 73 | 74 | def download(data_dir): 75 | """Download census data if it is not already present.""" 76 | tf.gfile.MakeDirs(data_dir) 77 | 78 | training_file_path = os.path.join(data_dir, TRAINING_FILE) 79 | if not tf.gfile.Exists(training_file_path): 80 | _download_and_clean_file(training_file_path, TRAINING_URL) 81 | 82 | eval_file_path = os.path.join(data_dir, EVAL_FILE) 83 | if not tf.gfile.Exists(eval_file_path): 84 | _download_and_clean_file(eval_file_path, EVAL_URL) 85 | 86 | 87 | def build_model_columns(): 88 | """Builds a set of wide and deep feature columns.""" 89 | # Continuous variable columns 90 | age = tf.feature_column.numeric_column('age') 91 | education_num = tf.feature_column.numeric_column('education_num') 92 | capital_gain = tf.feature_column.numeric_column('capital_gain') 93 | capital_loss = tf.feature_column.numeric_column('capital_loss') 94 | hours_per_week = tf.feature_column.numeric_column('hours_per_week') 95 | 96 | education = tf.feature_column.categorical_column_with_vocabulary_list( 97 | 'education', [ 98 | 'Bachelors', 'HS-grad', '11th', 'Masters', '9th', 'Some-college', 99 | 'Assoc-acdm', 'Assoc-voc', '7th-8th', 'Doctorate', 'Prof-school', 100 | '5th-6th', '10th', '1st-4th', 'Preschool', '12th']) 101 | 102 | marital_status = tf.feature_column.categorical_column_with_vocabulary_list( 103 | 'marital_status', [ 104 | 'Married-civ-spouse', 'Divorced', 'Married-spouse-absent', 105 | 'Never-married', 'Separated', 'Married-AF-spouse', 'Widowed']) 106 | 107 | relationship = tf.feature_column.categorical_column_with_vocabulary_list( 108 | 'relationship', [ 109 | 'Husband', 'Not-in-family', 'Wife', 'Own-child', 'Unmarried', 110 | 'Other-relative']) 111 | 112 | workclass = tf.feature_column.categorical_column_with_vocabulary_list( 113 | 'workclass', [ 114 | 'Self-emp-not-inc', 'Private', 'State-gov', 'Federal-gov', 115 | 'Local-gov', '?', 'Self-emp-inc', 'Without-pay', 'Never-worked']) 116 | 117 | # To show an example of hashing: 118 | occupation = tf.feature_column.categorical_column_with_hash_bucket( 119 | 'occupation', hash_bucket_size=_HASH_BUCKET_SIZE) 120 | 121 | # Transformations. 122 | age_buckets = tf.feature_column.bucketized_column( 123 | age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65]) 124 | 125 | # Wide columns and deep columns. 126 | base_columns = [ 127 | education, marital_status, relationship, workclass, occupation, 128 | age_buckets, 129 | ] 130 | 131 | crossed_columns = [ 132 | tf.feature_column.crossed_column( 133 | ['education', 'occupation'], hash_bucket_size=_HASH_BUCKET_SIZE), 134 | tf.feature_column.crossed_column( 135 | [age_buckets, 'education', 'occupation'], 136 | hash_bucket_size=_HASH_BUCKET_SIZE), 137 | ] 138 | 139 | wide_columns = base_columns + crossed_columns 140 | 141 | deep_columns = [ 142 | age, 143 | education_num, 144 | capital_gain, 145 | capital_loss, 146 | hours_per_week, 147 | tf.feature_column.indicator_column(workclass), 148 | tf.feature_column.indicator_column(education), 149 | tf.feature_column.indicator_column(marital_status), 150 | tf.feature_column.indicator_column(relationship), 151 | # To show an example of embedding 152 | tf.feature_column.embedding_column(occupation, dimension=8), 153 | ] 154 | 155 | return wide_columns, deep_columns 156 | 157 | 158 | def input_fn(data_file, num_epochs, shuffle, batch_size): 159 | """Generate an input function for the Estimator.""" 160 | assert tf.gfile.Exists(data_file), ( 161 | '%s not found. Please make sure you have run census_dataset.py and ' 162 | 'set the --data_dir argument to the correct path.' % data_file) 163 | 164 | def parse_csv(value): 165 | tf.logging.info('Parsing {}'.format(data_file)) 166 | columns = tf.decode_csv(value, record_defaults=_CSV_COLUMN_DEFAULTS) 167 | features = dict(zip(_CSV_COLUMNS, columns)) 168 | labels = features.pop('income_bracket') 169 | classes = tf.equal(labels, '>50K') # binary classification 170 | return features, classes 171 | 172 | # Extract lines from input files using the Dataset API. 173 | dataset = tf.data.TextLineDataset(data_file) 174 | 175 | if shuffle: 176 | dataset = dataset.shuffle(buffer_size=_NUM_EXAMPLES['train']) 177 | 178 | dataset = dataset.map(parse_csv, num_parallel_calls=5) 179 | 180 | # We call repeat after shuffling, rather than before, to prevent separate 181 | # epochs from blending together. 182 | dataset = dataset.repeat(num_epochs) 183 | dataset = dataset.batch(batch_size) 184 | return dataset 185 | --------------------------------------------------------------------------------