├── .vscode └── settings.json ├── Code ├── Module-06 │ ├── IAM-Policy-Get-DyanamoDB.json │ ├── IAM-Policy-ReadKinesis.json │ ├── IAM-Policy-WriteKinesis.json │ ├── Module6and9-Lambda-Code.py │ └── insert_template.py ├── Module-07 │ └── Module7-Lambda-Code.py ├── Module-08 │ └── Module8-Lambda-Code.py ├── Module-10 │ ├── Firehose-copy-command.txt │ ├── Redshift-Table-Create-Command.txt │ └── jsonpaths.json └── Module-11 │ └── Redshift-Table-Create-Command.txt ├── README.md └── data ├── Postman_test_string.txt └── TestSample.csv /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.pythonPath": "/usr/bin/python3" 3 | } -------------------------------------------------------------------------------- /Code/Module-06/IAM-Policy-Get-DyanamoDB.json: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Sid": "VisualEditor0", 6 | "Effect": "Allow", 7 | "Action": "dynamodb:GetItem", 8 | "Resource": "*" 9 | } 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /Code/Module-06/IAM-Policy-ReadKinesis.json: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Sid": "VisualEditor0", 6 | "Effect": "Allow", 7 | "Action": [ 8 | "kinesis:ListStreams", 9 | "kinesis:SubscribeToShard", 10 | "kinesis:DescribeStreamSummary", 11 | "kinesis:ListShards", 12 | "kinesis:DescribeStreamConsumer", 13 | "kinesis:GetShardIterator", 14 | "kinesis:GetRecords", 15 | "kinesis:DescribeStream", 16 | "kinesis:DescribeLimits", 17 | "kinesis:ListStreamConsumers", 18 | "kinesis:ListTagsForStream" 19 | ], 20 | "Resource": "*" 21 | } 22 | ] 23 | } 24 | -------------------------------------------------------------------------------- /Code/Module-06/IAM-Policy-WriteKinesis.json: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Sid": "VisualEditor0", 6 | "Effect": "Allow", 7 | "Action": [ 8 | "kinesis:PutRecord", 9 | "kinesis:PutRecords" 10 | ], 11 | "Resource": "*" 12 | } 13 | ] 14 | } 15 | -------------------------------------------------------------------------------- /Code/Module-06/Module6and9-Lambda-Code.py: -------------------------------------------------------------------------------- 1 | import json 2 | import boto3 3 | 4 | def lambda_handler(event, context): 5 | 6 | print("MyEvent:") 7 | print(event) 8 | 9 | # mycontext = event.get("context") 10 | # method = mycontext.get("http-method") 11 | method = event['context']['http-method'] 12 | 13 | if method == "GET": 14 | # TODO: write code... 15 | dynamo_client = boto3.client('dynamodb') 16 | 17 | im_customerID = event['params']['querystring']['CustomerID'] 18 | print(im_customerID) 19 | response = dynamo_client.get_item(TableName = 'Customers', Key = {'CustomerID':{'N': im_customerID}}) 20 | print(response['Item']) 21 | 22 | #myreturn = "This is the return of the get" 23 | 24 | return { 25 | 'statusCode': 200, 26 | 'body': json.dumps(response['Item']) 27 | } 28 | 29 | elif method == "POST": 30 | 31 | # mystring = event['params']['querystring']['param1'] 32 | p_record = event['body-json'] 33 | recordstring = json.dumps(p_record) 34 | 35 | client = boto3.client('kinesis') 36 | response = client.put_record( 37 | StreamName='APIData', 38 | Data= recordstring, 39 | PartitionKey='string' 40 | ) 41 | 42 | return { 43 | 'statusCode': 200, 44 | 'body': json.dumps(p_record) 45 | } 46 | else: 47 | return { 48 | 'statusCode': 501, 49 | 'body': json.dumps("Server Error") 50 | } 51 | -------------------------------------------------------------------------------- /Code/Module-06/insert_template.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import requests 3 | 4 | ''' 5 | Updates: 6 | - 2021-02-04: Created the TestSample.csv file in the new data folder and added the path to it here . This way you don't have to create it yourself 7 | 8 | Notes: 9 | - If you run into a json-body not found error, than you forgot to configure the application/json mapping template in the method 10 | - Some students had the problem: They get 200 here but the ClodWatch log of the Lambda says KeyError: 'context' -- I can only force this when I dont't send in a payload to the API. Make sure you send data 11 | - If you get 403 error: Make sure to add the resource name you created in API gateway to the URL . In my case "hello". If you just copy out your URL from the "stage" in the UI then this is missing. 12 | ''' 13 | 14 | # URL of your endpoint 15 | URL = "https://xxxxxx.execute-api.us-east-1.amazonaws.com/prod/hello" 16 | 17 | 18 | #read the testfile 19 | data = pd.read_csv('data/TestSample.csv', sep = ',') 20 | 21 | # write a single row from the testfile into the api 22 | #export = data.loc[2].to_json() 23 | #response = requests.post(URL, data = export) 24 | #print(response) 25 | 26 | # write all the rows from the testfile to the api as put request 27 | for i in data.index: 28 | try: 29 | # convert the row to json 30 | export = data.loc[i].to_json() 31 | 32 | #send it to the api 33 | response = requests.post(URL, data = export) 34 | 35 | # print the returncode 36 | print(export) 37 | print(response) 38 | except: 39 | print(data.loc[i]) 40 | -------------------------------------------------------------------------------- /Code/Module-07/Module7-Lambda-Code.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import base64 4 | import json 5 | import boto3 6 | from datetime import datetime 7 | 8 | s3_client = boto3.client('s3') 9 | 10 | # Converting datetime object to string 11 | dateTimeObj = datetime.now() 12 | 13 | #format the string 14 | timestampStr = dateTimeObj.strftime("%d-%b-%Y-%H%M%S") 15 | 16 | # this is the list for the records 17 | kinesisRecords = [] 18 | 19 | def lambda_handler(event, context): 20 | #print("Received event: " + json.dumps(event, indent=2)) 21 | for record in event['Records']: 22 | # Kinesis data is base64 encoded so decode here 23 | # If you run into the error: [ERROR] TypeError: sequence item 0: expected str instance, bytes found 24 | # Add the ecoding into UTF8: 25 | #payload = base64.b64decode(record['kinesis']['data']).decode('utf-8') 26 | payload = base64.b64decode(record['kinesis']['data']) 27 | 28 | 29 | # append each record to a list 30 | kinesisRecords.append(payload) 31 | # this is just for logging 32 | # print("Decoded payload: " + payload) 33 | 34 | # make a string out of the list. Backslash n for new line in the s3 file 35 | ex_string = '\n'.join(kinesisRecords) 36 | 37 | # generate the name for the file with the timestamp 38 | mykey = 'output-' + timestampStr + '.txt' 39 | 40 | #put the file into the s3 bucket 41 | response = s3_client.put_object(Body=ex_string, Bucket='aws-de-project', Key= mykey) 42 | 43 | return 'Successfully processed {} records.'.format(len(event['Records'])) 44 | -------------------------------------------------------------------------------- /Code/Module-08/Module8-Lambda-Code.py: -------------------------------------------------------------------------------- 1 | import json 2 | import base64 3 | import boto3 4 | 5 | from datetime import datetime 6 | 7 | def lambda_handler(event, context): 8 | 9 | client = boto3.client('dynamodb') 10 | 11 | #print("Received event: " + json.dumps(event, indent=2)) 12 | for record in event['Records']: 13 | # Kinesis data is base64 encoded so decode here 14 | t_record = base64.b64decode(record['kinesis']['data']) 15 | 16 | # decode the bytes into a string 17 | str_record = str(t_record,'utf-8') 18 | 19 | #transform the json string into a dictionary 20 | dict_record = json.loads(str_record) 21 | 22 | # create Customer Row 23 | ############################ 24 | 25 | customer_key = dict() 26 | customer_key.update({'CustomerID': {"N": str(dict_record['CustomerID'])}}) 27 | 28 | 29 | ex_customer = dict() 30 | ex_customer.update({str(dict_record['InvoiceNo']): {'Value':{"S":'Some overview JSON for the UI goes here'},"Action":"PUT"}}) 31 | 32 | response = client.update_item(TableName='Customers', Key = customer_key, AttributeUpdates = ex_customer) 33 | 34 | # Create Inventory Row 35 | ############################# 36 | 37 | inventory_key = dict() 38 | inventory_key.update({'InvoiceNo': {"N": str(dict_record['InvoiceNo'])}}) 39 | 40 | #create export dictionary 41 | ex_dynamoRecord = dict() 42 | 43 | #remove Invoice and Stock code from dynmodb record 44 | stock_dict = dict(dict_record) 45 | stock_dict.pop('InvoiceNo',None) 46 | stock_dict.pop('StockCode',None) 47 | 48 | #turn the dict into a json 49 | stock_json = json.dumps(stock_dict) 50 | 51 | #create a record (column) for the InvoiceNo 52 | #add the stock json to the column with the name of the stock number 53 | ex_dynamoRecord.update({str(dict_record['StockCode']): {'Value':{"S":stock_json},"Action":"PUT"}}) 54 | 55 | #print(ex_dynamoRecord) 56 | response = client.update_item(TableName='Invoices', Key = inventory_key, AttributeUpdates = ex_dynamoRecord) 57 | 58 | 59 | return 'Successfully processed {} records.'.format(len(event['Records'])) 60 | -------------------------------------------------------------------------------- /Code/Module-10/Firehose-copy-command.txt: -------------------------------------------------------------------------------- 1 | COPY firehosetransactions FROM 's3://firehoseredshift/' 2 | CREDENTIALS 'aws_iam_role=arn:aws:iam:::role/' 3 | MANIFEST json 's3://firehoseredshift/jsonpaths.json'; 4 | -------------------------------------------------------------------------------- /Code/Module-10/Redshift-Table-Create-Command.txt: -------------------------------------------------------------------------------- 1 | 2 | #create Table for import set to not null . 3 | #This way row inserts run in error if they are not correctly imported 4 | 5 | create table firehosetransactions( 6 | InvoiceNo int not null, 7 | StockCode varchar(200) not null, 8 | Description varchar(200) not null, 9 | Quantity int not null, 10 | InvoiceDate varchar(200) not null, 11 | UnitPrice float not null, 12 | CustomerID int not null, 13 | Country varchar(200) not null 14 | ); 15 | 16 | # For finding errors during import 17 | select * from STL_LOAD_ERRORS 18 | 19 | # More details about the errors 20 | STL_LOADERROR_DETAIL -------------------------------------------------------------------------------- /Code/Module-10/jsonpaths.json: -------------------------------------------------------------------------------- 1 | { 2 | "jsonpaths": [ 3 | "$['InvoiceNo']", 4 | "$['StockCode']", 5 | "$['Description']", 6 | "$['Quantity']", 7 | "$['InvoiceDate']", 8 | "$['UnitPrice']", 9 | "$['CustomerID']", 10 | "$['Country']" 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /Code/Module-11/Redshift-Table-Create-Command.txt: -------------------------------------------------------------------------------- 1 | 2 | #create Table for the bulk import set to not null . 3 | #This way row inserts run in error if they are not correctly imported 4 | 5 | create table bulkimport( 6 | InvoiceNo varchar(200) not null, 7 | StockCode varchar(200) not null, 8 | Description varchar(200) not null, 9 | Quantity int not null, 10 | InvoiceDate varchar(200) not null, 11 | UnitPrice float not null, 12 | CustomerID int not null, 13 | Country varchar(200) not null 14 | ); 15 | 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Title Of Your Project 3 | Add a catchy title to your project. Something that people immediately know what you are doing 4 | 5 | # Introduction & Goals 6 | - Introduce your project to the reader 7 | - Orient this section on the Table of contents 8 | - Write this like an executive summary 9 | - With what data are you working 10 | - What tools are you using 11 | - What are you doing with these tools 12 | - Once you are finished add the conclusion here as well 13 | 14 | # Contents 15 | 16 | - [The Data Set](#the-data-set) 17 | - [Used Tools](#used-tools) 18 | - [Connect](#connect) 19 | - [Buffer](#buffer) 20 | - [Processing](#processing) 21 | - [Storage](#storage) 22 | - [Visualization](#visualization) 23 | - [Pipelines](#pipelines) 24 | - [Stream Processing](#stream-processing) 25 | - [Storing Data Stream](#storing-data-stream) 26 | - [Processing Data Stream](#processing-data-stream) 27 | - [Batch Processing](#batch-processing) 28 | - [Visualizations](#visualizations) 29 | - [Demo](#demo) 30 | - [Conclusion](#conclusion) 31 | - [Follow Me On](#follow-me-on) 32 | - [Appendix](#appendix) 33 | 34 | 35 | # The Data Set 36 | - Explain the data set 37 | - Why did you choose it? 38 | - What do you like about it? 39 | - What is problematic? 40 | - What do you want to do with it? 41 | 42 | # Used Tools 43 | - Explain which tools do you use and why 44 | - How do they work (don't go too deep into details, but add links) 45 | - Why did you choose them 46 | - How did you set them up 47 | 48 | ## Connect 49 | ## Buffer 50 | ## Processing 51 | ## Storage 52 | ## Visualization 53 | 54 | # Pipelines 55 | - Explain the pipelines for processing that you are building 56 | - Go through your development and add your source code 57 | 58 | ## Stream Processing 59 | ### Storing Data Stream 60 | ### Processing Data Stream 61 | ## Batch Processing 62 | ## Visualizations 63 | 64 | # Demo 65 | - You could add a demo video here 66 | - Or link to your presentation video of the project 67 | 68 | # Conclusion 69 | Write a comprehensive conclusion. 70 | - How did this project turn out 71 | - What major things have you learned 72 | - What were the biggest challenges 73 | 74 | # Follow Me On 75 | Add the link to your LinkedIn Profile 76 | 77 | # Appendix 78 | 79 | [Markdown Cheat Sheet](https://github.com/adam-p/markdown-here/wiki/Markdown-Cheatsheet) 80 | -------------------------------------------------------------------------------- /data/Postman_test_string.txt: -------------------------------------------------------------------------------- 1 | {"InvoiceNo":536365,"StockCode":"84029E","Description":"RED WOOLLY HOTTIE WHITE HEART.","Quantity":6,"InvoiceDate":"12\/1\/2010 8:26","UnitPrice":3.39,"CustomerID":17850,"Country":"United Kingdom"} -------------------------------------------------------------------------------- /data/TestSample.csv: -------------------------------------------------------------------------------- 1 | InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country 2 | 536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,12/1/2010 8:26,2.55,17850,United Kingdom 3 | 536365,71053,WHITE METAL LANTERN,6,12/1/2010 8:26,3.39,17850,United Kingdom 4 | 536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,12/1/2010 8:26,2.75,17850,United Kingdom 5 | 536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,12/1/2010 8:26,3.39,17850,United Kingdom 6 | 536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,12/1/2010 8:26,3.39,17850,United Kingdom --------------------------------------------------------------------------------