├── .gitignore ├── LICENSE ├── README.md ├── data ├── segment-event-generator.py └── transformed │ └── run-1558198402129-part-r-00000 ├── eventengine └── workshop.template ├── exercise1 ├── README.md ├── etl │ └── glue_etl.py ├── images │ ├── Architecture-Exercise1.png │ ├── AthenaCreateDatabase.png │ ├── AthenaDbAndTable.png │ ├── AthenaQueryEvents.png │ ├── AthenaQueryResults.png │ ├── AthenaQuerySkus.png │ ├── AthenaQueryStar.png │ ├── GlueAddJob.png │ ├── GlueAddJobSettings.png │ ├── GlueCrawlerAdd.png │ ├── GlueCrawlerAddDataStore.png │ ├── GlueCrawlerAddOnDemand.png │ ├── GlueCrawlerAddOutput.png │ ├── GlueCrawlerAddRole.png │ ├── GlueCrawlerRunItNow.png │ ├── GlueCrawlers.png │ ├── GlueEditJobScript.png │ ├── GlueGetStarted.png │ ├── GlueJobOutputFile.png │ ├── GlueJobs.png │ ├── GlueRunJobDialog.png │ ├── GlueRunJobParams.png │ ├── S3Bucket.png │ ├── S3CreateFolder.png │ ├── S3Folder.png │ ├── S3Upload.png │ ├── S3Uploaded.png │ ├── SegmentAddDestination.png │ ├── SegmentS3-BucketName.png │ ├── SegmentS3-Configure.png │ ├── SegmentS3-ConfirmSource.png │ ├── SegmentS3-Destination.png │ └── SegmentS3-Settings.png └── sql │ ├── athena_create_table.sql │ └── redshift_unload_as_csv.sql ├── exercise2 ├── README.md ├── event_schema.avsc └── images │ ├── Architecture-Exercise2.png │ ├── PersonalizeCampaignCreating.png │ ├── PersonalizeCampaignTest.png │ ├── PersonalizeCreateCampaign.png │ ├── PersonalizeCreateCampaignDash.png │ ├── PersonalizeCreateDataset.png │ ├── PersonalizeCreateDatasetGroup.png │ ├── PersonalizeCreateGroup.png │ ├── PersonalizeCreateSolution.png │ ├── PersonalizeCreateSolutionVersion.png │ ├── PersonalizeDatasetActive.png │ ├── PersonalizeDatasetGroups.png │ ├── PersonalizeEventAvroSchema.png │ ├── PersonalizeGetStarted.png │ ├── PersonalizeImportJob.png │ ├── PersonalizeInteractionDatasetCreating.png │ ├── PersonalizeRoleARN.png │ ├── PersonalizeSchema.png │ ├── PersonalizeSolutionConfig.png │ ├── PersonalizeSolutionHPO.png │ ├── PersonalizeSolutionInProgress.png │ ├── PersonalizeStart.png │ └── PersonalizeTransformedS3Path.png ├── exercise3 ├── README.md ├── images │ ├── APIGW_Test.png │ ├── APIGW_TestGet.png │ ├── APIGW_TestGetResults.png │ ├── APIGW_endpoint.png │ ├── Architecture-Exercise3-Part1.png │ ├── Architecture-Exercise3-Part2.png │ ├── CloudWatchLambda.png │ ├── Kinesis-Monitoring.png │ ├── Kinesis-PutRecordsGraph.png │ ├── KinesisCreateStream.png │ ├── KinesisDashboard.png │ ├── KinesisStreamCreated.png │ ├── LambdaCreateFunction.png │ ├── LambdaDashboard.png │ ├── LambdaEnvVariable.png │ ├── LambdaFunctionCode.png │ ├── LambdaFunctionsNav.png │ ├── LambdaKinesisConfig.png │ ├── LambdaKinesisTrigger.png │ ├── LambdaMonitoring.png │ ├── LambdaNav.png │ ├── LambdaRecAPIGW_Config.png │ ├── LambdaRecAPIGW_Trigger.png │ ├── LambdaRecCampaignArn.png │ ├── LambdaRecCode.png │ ├── LambdaRecEndpointCreate.png │ ├── LambdaSaveFunction.png │ ├── PersonalizeCampaignArn.png │ ├── PersonalizeCampaignConfig.png │ ├── PersonalizeCreateCampaign.png │ ├── PersonalizeCreateTracker.png │ ├── PersonalizeEventTrackerConfig.png │ ├── PersonalizeEventTrackerCreating.png │ ├── PersonalizeEventTrackerDetails.png │ ├── SegmentDestinations.png │ ├── SegmentKinesis-AddDestination.png │ ├── SegmentKinesis-ConfigStart.png │ ├── SegmentKinesis-ConfirmSource.png │ ├── SegmentKinesis-EventTester.png │ ├── SegmentKinesis-IAMRole.png │ └── SegmentKinesis-Settings.png └── recommendations │ └── lambda_function.py ├── exercise4 ├── README.md ├── app.py ├── function.zip └── images │ ├── APIGW_Test.png │ ├── APIGW_TestGet.png │ ├── APIGW_TestGetResults.png │ ├── APIGW_endpoint.png │ ├── Architecture-Exercise4-Part1.png │ ├── Architecture-Exercise4-Part2.png │ ├── Architecture-Exercise4-Part3.png │ ├── IAM_ExecuteRoleARN.png │ ├── IAM_FindExecuteRole.png │ ├── LambdaConfigFunction.png │ ├── LambdaCreateFunction.png │ ├── LambdaFunctionArn.png │ ├── LambdaRecAPIGW_Config.png │ ├── LambdaRecAPIGW_Trigger.png │ ├── LambdaRecCampaignArn.png │ ├── LambdaRecEndpointCreate.png │ ├── LambdaRecFunctionSave.png │ ├── LambdaRecFunctionSource.png │ ├── LambdaSelectFunction.png │ ├── LambdaUploadFunctionZip.png │ ├── LambdaUploadFunctionZip2.png │ ├── PersonalizeCampaignArn.png │ ├── PersonalizeConfigTracker.png │ ├── PersonalizeCreateTracker.png │ └── PersonalizeTrackerId.png └── images ├── PersonalizeDataIngestion.png ├── PersonalizeDataIngestionWithSegment.png └── SegmentPersonalizeArchitecture.png /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | package 3 | .vscode 4 | 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this 4 | software and associated documentation files (the "Software"), to deal in the Software 5 | without restriction, including without limitation the rights to use, copy, modify, 6 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 7 | permit persons to whom the Software is furnished to do so. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 10 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 11 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 12 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 13 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 14 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Segment + Amazon Personalize Workshop 2 | 3 | [Segment](http://segment.com) provides an easy way to collect data once about how your users are interacting with your applications (i.e. clickstream events) and send the data to third party tools and warehouses. Segment does this by enabling businesses to collect first-party event data from their websites, mobile apps, and cloud tools like email and CRM, combine with offline data, then standardize and clean the data so it can be utilized in 200+ tools like marketing, analytics, attribution, and warehouses including Amazon Redshift. 4 | 5 | [Amazon Personalize](https://aws.amazon.com/personalize/) is a machine learning service that makes it easy for developers to create individualized recommendations for customers using their applications. 6 | 7 | Machine learning is being increasingly used to improve customer engagement by powering personalized product and content recommendations, tailored search results, and targeted marketing promotions. However, developing the machine-learning capabilities necessary to produce these sophisticated recommendation systems has been beyond the reach of most organizations today due to the complexity of developing machine learning functionality. Amazon Personalize allows developers with no prior machine learning experience to easily build sophisticated personalization capabilities into their applications, using machine learning technology perfected from years of use on Amazon.com. 8 | 9 | ![Segment + Amazon Personalize Architecture](images/SegmentPersonalizeArchitecture.png) 10 | 11 | This project includes the content, instructions, test data, and code for a workshop that is intended to guide attendees through the process of integrating Segment with Amazon Personalize. The workshop will teach attendees how to use Segment to collect and send data to Amazon Personalize, where it can be used to make real-time item recommendations, tailored search results, and targeted marketing promotions. The workshop will include hands-on exercises for data collection and analytics, training machine learning models, and activating insights for personalized recommendations in a sample application. 12 | 13 | Attendees will leave with the skillsets for how to unlock real-time personalization and recommendations using the same technology used at Amazon.com. 14 | 15 | ## Workshop Setup 16 | 17 | Before following the exercises below, be sure to clone this repository to your local system. 18 | 19 | ```bash 20 | git clone https://github.com/james-jory/segment-personalize-workshop.git 21 | ``` 22 | 23 | If you are following this workshop on your own (i.e. in your own personal AWS account and **not** part of an organized workshop delivered by AWS), you will also need to apply the CloudFormation template [eventengine/workshop.template](eventengine/workshop.template) within your account before stepping through the exercises. This template will setup the necessary resources and IAM roles & policies required by the exercises in this workshop. If you're participating in an AWS-led workshop, this has likely already been done for you. 24 | 25 | ## [Exercise 1](exercise1/) - Data Preparation, Filtering, and Exploration 26 | 27 | The focus of this [exercise](exercise1/) is to learn how to use historical clickstream data from Segment to train or bootstrap a machine learning model in Personalize. We will walk through the process of configuring Segment to write clickstream data to an Amazon Simple Storage Service (S3) bucket. Then you will build an AWS Glue Job that will transform and filter the raw data written to S3 into a format that can be uploaded into Personalize. In addition, we will learn how to use Amazon Athena to query and explore this data directly from S3. 28 | 29 | ## [Exercise 2](exercise2/) - Create Personalize Dataset Group, Solution, and Campaign 30 | 31 | In this [exercise](exercise2/) we will pick up where we left off in the prior exercise by uploading the transformed data from S3 into a Personalize Dataset Group. Then you will create a Personalize Solution based on this data. A solution is the term Amazon Personalize uses for a trained machine learning model. Creating a solution entails optimizing the model to deliver the best results for a specific business need. Amazon Personalize uses "recipes" to create these personalized solutions. We will wrap up this exercise by creating a campaign. A deployed solution is known as a campaign, and is able to make recommendations for your users. 32 | 33 | ## [Exercise 3](exercise3/) - Getting Recommendations from Personalize 34 | 35 | In this [exercise](exercise3/) we will demonstrate how recommendations from Personalize can be accessed by your applications via a REST API using [Amazon API Gateway](https://aws.amazon.com/api-gateway/) and [AWS Lambda](https://aws.amazon.com/lambda/). You can use this function as a secure mechanism to access recommendations from your web and mobile apps as well as an interception point to rehydrate item identifiers into more metadata rich representations. 36 | 37 | ## [Exercise 4](exercise4/) - Real-Time Data Collection and Activating Recommendations using Segment Personas 38 | 39 | In this final [exercise](exercise4/) we will look at how [Segment's Amazon Personalize](https://segment.com/docs/destinations/amazon-personalize/) destination can be used to send events collected in real-time directly to Personalize. This will allow Personalize to learn from customer interactions that are being collected by Segment to improve its recommendations. Furthermore, we will learn now to take recommendations from Personalize and attach those recommendations to customer profiles in Segment Personas. This allows you to weave recommendations not only in your own website and mobile apps but also throughout your martech stack. -------------------------------------------------------------------------------- /data/segment-event-generator.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | 4 | from dateutil import relativedelta 5 | import analytics 6 | import calendar 7 | import datetime 8 | import json 9 | import random 10 | import socket 11 | import struct 12 | import sys 13 | import uuid 14 | 15 | from_date = sys.argv[1] 16 | 17 | # 18 | # Add your Segment write keys here. 19 | # You should have one source configured for web, ios, android, and email in 20 | # your Segment workspace. 21 | # 22 | 23 | web_write_key = '' 24 | android_write_key = '' 25 | ios_write_key = '' 26 | email_write_key = '' 27 | android_analytics = analytics.Client(android_write_key) 28 | web_analytics = analytics.Client(web_write_key) 29 | ios_analytics = analytics.Client(ios_write_key) 30 | email_analytics = analytics.Client(email_write_key) 31 | clients = [(android_analytics, 'Android'), (web_analytics, 'Web'), (ios_analytics, 'iOS')] 32 | #clients = [(web_analytics, 'Web')] 33 | custom_traits = [ 34 | {'property_name': 'likelihood_to_buy', 'values': [(.50, 45), (.60, 40), (.40, 35), (.80, 32)]}, 35 | {'property_name': 'lifetime_value', 'values': [(1000.00, 45), (2000.00, 40), (5000.00, 35), (400.00, 32)]} 36 | ] 37 | registration_path = [ 38 | {'event_name': 'Page Viewed', 'property_choices': {'page_name': [('Home', 20), ('Search', 55)]}}, 39 | {'event_name': 'Signup Clicked', 'property_choices': {'page_name': [('Home', 20), ('Search', 40)]}}, 40 | {'event_name': 'Signup Success', 'property_choices': {'page_name': [('Home', 20), ('Search', 40)]}} 41 | ] 42 | usage_paths = [ 43 | [ 44 | {'event_name': 'Product Added', 45 | 'property_choices': { 46 | 'sku': [("adidas-classic-backpack", 5), ("adidas-classic-backpack-legend-ink-multicolour", 5), ("adidas-kids-stan-smith", 5), ("adidas-superstar-80s", 5), ("asics-tiger-gel-lyte-v-30-years-of-gel-pack", 5), ("black-leather-bag", 5), ("blue-silk-tuxedo", 5), ("chequered-red-shirt", 5), ("classic-leather-jacket", 5), ("classic-varsity-top", 5), ("converse-chuck-taylor-all-star-ii-hi", 5), ("converse-chuck-taylor-all-star-lo", 5), ("converse-toddler-chuck-taylor-all-star-axel-mid", 5), ("dark-denim-top", 5), ("dr-martens-1460z-dmc-8-eye-boot-cherry-smooth", 5), ("dr-martens-1461-dmc-3-eye-shoe-black-smooth", 5), ("dr-martens-cavendish-3-eye-shoe-black", 5), ("flex-fit-mini-ottoman-black", 5), ("floral-white-top", 5), ("herschel-iona", 5), ("led-high-tops", 5), ("longsleeve-cotton-top", 5), ("navy-sport-jacket", 5), ("nike-crackle-print-tb-tee", 5), ("nike-swoosh-pro-flat-peak-cap", 5), ("nike-toddler-roshe-one", 5), ("ocean-blue-shirt", 5), ("olive-green-jacket", 5), ("palladium-pallatech-hi-tx-chevron", 5), ("puma-suede-classic-regal", 5), ("red-sports-tee", 5), ("silk-summer-top", 5), ("dark-winter-jacket", 5), ("striped-silk-blouse", 5), ("striped-skirt-and-top", 5), ("supra-mens-vaider", 5), ("timberland-mens-6-inch-premium-boot", 5), ("vans-apparel-and-accessories-classic-super-no-show-socks-3-pack-white", 5), ("vans-era-59-moroccan-geo-dress-blues", 5), ("vans-authentic-butterfly-true-white-black", 5), ("vans-authentic-multi-eyelets-gradient-crimson", 5), ("vans-classic-slip-on-perforated-suede", 5), ("vans-era-59-desert-cowboy", 5), ("vans-old-skool-butterfly-true-white-black", 5), ("vans-sh-8-hi", 5), ("vans-sk8-hi-decon-cutout-leaves-white", 5), ("vans-authentic-lo-pro-burgandy-white", 5), ("white-cotton-shirt", 5), ("yellow-wool-jumper", 5), ("zipped-jacket", 5)] 47 | }, 48 | 'dependent_props_list': { 49 | } 50 | } 51 | ], 52 | [ 53 | {'event_name': 'Product Clicked', 54 | 'property_choices': { 55 | 'sku': [("adidas-classic-backpack", 5), ("adidas-classic-backpack-legend-ink-multicolour", 5), ("adidas-kids-stan-smith", 5), ("adidas-superstar-80s", 5), ("asics-tiger-gel-lyte-v-30-years-of-gel-pack", 5), ("black-leather-bag", 5), ("blue-silk-tuxedo", 5), ("chequered-red-shirt", 5), ("classic-leather-jacket", 5), ("classic-varsity-top", 5), ("converse-chuck-taylor-all-star-ii-hi", 5), ("converse-chuck-taylor-all-star-lo", 5), ("converse-toddler-chuck-taylor-all-star-axel-mid", 5), ("dark-denim-top", 5), ("dr-martens-1460z-dmc-8-eye-boot-cherry-smooth", 5), ("dr-martens-1461-dmc-3-eye-shoe-black-smooth", 5), ("dr-martens-cavendish-3-eye-shoe-black", 5), ("flex-fit-mini-ottoman-black", 5), ("floral-white-top", 5), ("herschel-iona", 5), ("led-high-tops", 5), ("longsleeve-cotton-top", 5), ("navy-sport-jacket", 5), ("nike-crackle-print-tb-tee", 5), ("nike-swoosh-pro-flat-peak-cap", 5), ("nike-toddler-roshe-one", 5), ("ocean-blue-shirt", 5), ("olive-green-jacket", 5), ("palladium-pallatech-hi-tx-chevron", 5), ("puma-suede-classic-regal", 5), ("red-sports-tee", 5), ("silk-summer-top", 5), ("dark-winter-jacket", 5), ("striped-silk-blouse", 5), ("striped-skirt-and-top", 5), ("supra-mens-vaider", 5), ("timberland-mens-6-inch-premium-boot", 5), ("vans-apparel-and-accessories-classic-super-no-show-socks-3-pack-white", 5), ("vans-era-59-moroccan-geo-dress-blues", 5), ("vans-authentic-butterfly-true-white-black", 5), ("vans-authentic-multi-eyelets-gradient-crimson", 5), ("vans-classic-slip-on-perforated-suede", 5), ("vans-era-59-desert-cowboy", 5), ("vans-old-skool-butterfly-true-white-black", 5), ("vans-sh-8-hi", 5), ("vans-sk8-hi-decon-cutout-leaves-white", 5), ("vans-authentic-lo-pro-burgandy-white", 5), ("white-cotton-shirt", 5), ("yellow-wool-jumper", 5), ("zipped-jacket", 5)] 56 | }, 57 | 'dependent_props_list': { 58 | } 59 | } 60 | ], 61 | [ 62 | {'event_name': 'Order Completed', 63 | 'property_choices': { 64 | 'sku': [("adidas-classic-backpack", 5), ("adidas-classic-backpack-legend-ink-multicolour", 5), ("adidas-kids-stan-smith", 5), ("adidas-superstar-80s", 5), ("asics-tiger-gel-lyte-v-30-years-of-gel-pack", 5), ("black-leather-bag", 5), ("blue-silk-tuxedo", 5), ("chequered-red-shirt", 5), ("classic-leather-jacket", 5), ("classic-varsity-top", 5), ("converse-chuck-taylor-all-star-ii-hi", 5), ("converse-chuck-taylor-all-star-lo", 5), ("converse-toddler-chuck-taylor-all-star-axel-mid", 5), ("dark-denim-top", 5), ("dr-martens-1460z-dmc-8-eye-boot-cherry-smooth", 5), ("dr-martens-1461-dmc-3-eye-shoe-black-smooth", 5), ("dr-martens-cavendish-3-eye-shoe-black", 5), ("flex-fit-mini-ottoman-black", 5), ("floral-white-top", 5), ("herschel-iona", 5), ("led-high-tops", 5), ("longsleeve-cotton-top", 5), ("navy-sport-jacket", 5), ("nike-crackle-print-tb-tee", 5), ("nike-swoosh-pro-flat-peak-cap", 5), ("nike-toddler-roshe-one", 5), ("ocean-blue-shirt", 5), ("olive-green-jacket", 5), ("palladium-pallatech-hi-tx-chevron", 5), ("puma-suede-classic-regal", 5), ("red-sports-tee", 5), ("silk-summer-top", 5), ("dark-winter-jacket", 5), ("striped-silk-blouse", 5), ("striped-skirt-and-top", 5), ("supra-mens-vaider", 5), ("timberland-mens-6-inch-premium-boot", 5), ("vans-apparel-and-accessories-classic-super-no-show-socks-3-pack-white", 5), ("vans-era-59-moroccan-geo-dress-blues", 5), ("vans-authentic-butterfly-true-white-black", 5), ("vans-authentic-multi-eyelets-gradient-crimson", 5), ("vans-classic-slip-on-perforated-suede", 5), ("vans-era-59-desert-cowboy", 5), ("vans-old-skool-butterfly-true-white-black", 5), ("vans-sh-8-hi", 5), ("vans-sk8-hi-decon-cutout-leaves-white", 5), ("vans-authentic-lo-pro-burgandy-white", 5), ("white-cotton-shirt", 5), ("yellow-wool-jumper", 5), ("zipped-jacket", 5)] 65 | }, 66 | 'dependent_props_list': { 67 | } 68 | } 69 | ] 70 | ] 71 | 72 | email_spec = [ 73 | {'event_name': 'Email Sent', 'property_choices': { 74 | 'campaign_id': [(15203, 70), (22123, 55), (34321, 20), (41232, 55), (53213, 20), (61232, 55), (234321, 40), (923422, 50)], 75 | 'app_id': [(15203, 70), (22123, 55), (34321, 20), (41232, 55), (53213, 20), (61232, 55), (234321, 40), (923422, 50)], 76 | 'button_id': [(1, 70), (2, 55), (3, 20), (4, 55), (5, 20), (6, 55)], 77 | }}, 78 | {'event_name': 'Email Delivered', 'property_choices': { 79 | 'campaign_id': [(15203, 70), (22123, 55), (34321, 20), (41232, 55), (53213, 20), (61232, 55), (234321, 40), (923422, 50)], 80 | 'app_id': [(15203, 70), (22123, 55), (34321, 20), (41232, 55), (53213, 20), (61232, 55), (234321, 40), (923422, 50)], 81 | 'button_id': [(1, 70), (2, 55), (3, 20), (4, 55), (5, 20), (6, 55)], 82 | }}, 83 | {'event_name': 'Email Opened', 'property_choices': { 84 | 'campaign_id': [(15203, 70), (22123, 55), (34321, 20), (41232, 55), (53213, 20), (61232, 55), (234321, 40), (923422, 50)], 85 | 'app_id': [(15203, 70), (22123, 55), (34321, 20), (41232, 55), (53213, 20), (61232, 55), (234321, 40), (923422, 50)], 86 | 'button_id': [(1, 70), (2, 55), (3, 20), (4, 55), (5, 20), (6, 55)], 87 | }}, 88 | {'event_name': 'Email Link Clicked', 'property_choices': { 89 | 'campaign_id': [(15203, 70), (22123, 55), (34321, 20), (41232, 55), (53213, 20), (61232, 55), (234321, 40), (923422, 50)], 90 | 'app_id': [(15203, 70), (22123, 55), (34321, 20), (41232, 55), (53213, 20), (61232, 55), (234321, 40), (923422, 50)], 91 | 'button_id': [(1, 70), (2, 55), (3, 20), (4, 55), (5, 20), (6, 55)], 92 | }}, 93 | {'event_name': 'Unsubscribe', 'property_choices': { 94 | 'campaign_id': [(15203, 70), (22123, 55), (34321, 20), (41232, 55), (53213, 20), (61232, 55), (234321, 40), (923422, 50)], 95 | 'app_id': [(15203, 70), (22123, 55), (34321, 20), (41232, 55), (53213, 20), (61232, 55), (234321, 40), (923422, 50)], 96 | 'button_id': [(1, 70), (2, 55), (3, 20), (4, 55), (5, 20), (6, 55)], 97 | }}, 98 | ] 99 | 100 | def weighted_choice(choices): 101 | total = sum(w for c, w in choices) 102 | r = random.uniform(0, total) 103 | upto = 0 104 | for c, w in choices: 105 | if upto + w > r: 106 | return c 107 | upto += w 108 | 109 | class User: 110 | def __init__(self, operating_system, custom_traits): 111 | self.anonymous_id = str(uuid.uuid4()) 112 | self.user_id = random.randint(1000000000, 9999999999) 113 | self.traits = self.build_traits(operating_system, custom_traits) 114 | 115 | def build_traits(self, operating_system, custom_traits): 116 | traits = {} 117 | first_names = ['James','John','Robert','Michael','William','David','Richard','Charles','Joseph','Thomas','Christopher','Daniel','Paul','Mark','Donald','George','Kenneth','Steven','Edward','Brian','Ronald','Anthony','Kevin','Jason','Matthew','Gary','Timothy','Jose','Larry','Jeffrey','Frank','Scott','Eric','Stephen','Andrew','Raymond','Gregory','Joshua','Jerry','Dennis','Walter','Patrick','Peter','Harold','Douglas','Henry','Carl','Arthur','Ryan','Roger','Joe','Juan','Jack','Albert','Jonathan','Justin','Terry','Gerald','Keith','Samuel','Willie','Ralph','Lawrence','Nicholas','Roy','Benjamin','Bruce','Brandon','Adam','Harry','Fred','Wayne','Billy','Steve','Louis','Jeremy','Aaron','Randy','Howard','Eugene','Carlos','Russell','Bobby','Victor','Martin','Ernest','Phillip','Todd','Jesse','Craig','Alan','Shawn','Clarence','Sean','Philip','Chris','Johnny','Earl','Jimmy','Antonio','Danny','Bryan','Tony','Luis','Mike','Stanley','Leonard','Nathan','Dale','Manuel','Rodney','Curtis','Norman','Allen','Marvin','Vincent','Glenn','Jeffery','Travis','Jeff','Chad','Jacob','Lee','Melvin','Alfred','Kyle','Francis','Bradley','Jesus','Herbert','Frederick','Ray','Joel','Edwin','Don','Eddie','Ricky','Troy','Randall','Barry','Alexander','Bernard','Mario','Leroy','Francisco','Marcus','Micheal','Theodore','Mary','Patricia','Linda','Barbara','Elizabeth','Jennifer','Maria','Susan','Margaret','Dorothy','Lisa','Nancy','Karen','Betty','Helen','Sandra','Donna','Carol','Ruth','Sharon','Michelle','Laura','Sarah','Kimberly','Deborah','Jessica','Shirley','Cynthia','Angela','Melissa','Brenda','Amy','Anna','Rebecca','Virginia','Kathleen','Pamela','Martha','Debra','Amanda','Stephanie','Carolyn','Christine','Marie','Janet','Catherine','Frances','Ann','Joyce','Diane','Alice','Julie','Heather','Teresa','Doris','Gloria','Evelyn','Jean','Cheryl','Mildred','Katherine','Joan','Ashley','Judith','Rose','Janice','Kelly','Nicole','Judy','Christina','Kathy','Theresa','Beverly','Denise','Tammy','Irene','Jane','Lori','Rachel','Marilyn','Andrea','Kathryn','Louise','Sara','Anne','Jacqueline','Wanda','Bonnie','Julia','Ruby','Lois','Tina','Phyllis','Norma','Paula','Diana','Annie','Lillian','Emily','Robin','Peggy','Crystal','Gladys','Rita','Dawn','Connie','Florence','Tracy','Edna','Tiffany','Carmen','Rosa','Cindy','Grace','Wendy','Victoria','Edith','Kim','Sherry','Sylvia','Josephine'] 118 | last_names = ['Smith','Johnson','Williams','Jones','Brown','Davis','Miller','Wilson','Moore','Taylor','Anderson','Thomas','Jackson','White','Harris','Martin','Thompson','Garcia','Martinez','Robinson','Clark','Rodriguez','Lewis','Lee','Walker','Hall','Allen','Young','Hernandez','King','Wright','Lopez','Hill','Scott','Green','Adams','Baker','Gonzalez','Nelson','Carter','Mitchell','Perez','Roberts','Turner','Phillips','Campbell','Parker','Evans','Edwards','Collins','Stewart','Sanchez','Morris','Rogers','Reed','Cook','Morgan','Bell','Murphy','Bailey','Rivera','Cooper','Richardson','Cox','Howard','Ward','Torres','Peterson','Gray','Ramirez','James','Watson','Brooks','Kelly','Sanders','Price','Bennett','Wood','Barnes','Ross','Henderson','Coleman','Jenkins','Perry','Powell','Long','Patterson','Hughes','Flores','Washington','Butler','Simmons','Foster','Gonzales','Bryant','Alexander','Russell','Griffin','Diaz','Hayes','Myers','Ford','Hamilton','Graham','Sullivan','Wallace','Woods','Cole','West','Jordan','Owens','Reynolds','Fisher','Ellis','Harrison','Gibson','Mcdonald','Cruz','Marshall','Ortiz','Gomez','Murray','Freeman','Wells','Webb','Simpson','Stevens','Tucker','Porter','Hunter','Hicks','Crawford','Henry','Boyd','Mason','Morales','Kennedy','Warren','Dixon','Ramos','Reyes','Burns','Gordon','Shaw','Holmes','Rice','Robertson','Hunt','Black','Daniels','Palmer','Mills','Nichols','Grant','Knight','Ferguson','Rose','Stone','Hawkins','Dunn','Perkins','Hudson','Spencer','Gardner','Stephens','Payne','Pierce','Berry','Matthews','Arnold','Wagner','Willis','Ray','Watkins','Olson','Carroll','Duncan','Snyder','Hart','Cunningham','Bradley','Lane','Andrews','Ruiz','Harper','Fox','Riley','Armstrong','Carpenter','Weaver','Greene','Lawrence','Elliott','Chavez','Sims','Austin','Peters','Kelley','Franklin','Lawson','Fields','Gutierrez','Ryan','Schmidt','Carr','Vasquez','Castillo','Wheeler','Chapman','Oliver','Montgomery','Richards','Williamson','Johnston','Banks','Meyer','Bishop','Mccoy','Howell','Alvarez','Morrison','Hansen','Fernandez','Garza','Harvey','Little','Burton','Stanley','Nguyen','George','Jacobs','Reid','Kim','Fuller','Lynch','Dean','Gilbert','Garrett','Romero','Welch','Larson','Frazier','Burke','Hanson','Day','Mendoza','Moreno','Bowman','Medina','Fowler','Brewer','Hoffman','Carlson','Silva','Pearson','Holland','Douglas','Fleming','Jensen','Vargas','Byrd','Davidson'] 119 | email_domains = ['gmailx', 'yahoox', 'aolx', 'hotmailx'] 120 | email_words = ['dragon','lancer','sword','fire','magic','dance','random','hacker','pike','trebuchet','catapult','iron','ranger','bow','arrow','strafe','hound','wiggle','darkness','light','coward','hero','giant','troll','dog','wolf','bear','puma','lion','pterodactyl','love','shadow','x'] 121 | marketing_campaign_source = {'property_name': 'Campaign Source', 'values': [('Twitter', 20), ('Facebook', 55), ('Email', 12), ('Organic', 70), ('Google Adwords', 10)]} 122 | marketing_campaign_name = {'property_name': 'Campaign Name', 'values': [('Super Sale', 20), ('Buy Now', 15), ('Huge Discounts!', 25)]} 123 | invited_user = {'property_name': 'Invited User?', 'values': [('Invited User?', 10), ('Invited User?', 100)]} 124 | app_version = {'property_name': 'App Version', 'values': [('1.0.1', 5),('2.0.1', 10), ('3.0.1', 90)]} 125 | experiment_group = {'property_name': 'Experiment Group', 'values': [('Group A', 20), ('Group B', 20), ('Group C', 60)]} 126 | top_categories = {'property_name': 'Favorite Departments', 'values': [('Auto', 20), ('Accessories', 20), ('Clothing', 20), ('Beauty', 20), ('Electronics', 30)]} 127 | traits_to_add = [invited_user, experiment_group, marketing_campaign_source, top_categories] 128 | traits = self.modify_user(traits, traits_to_add) 129 | traits = self.modify_user(traits, custom_traits) 130 | iphone_models = {'property_name': 'model', 'values': [('iPhone4,1', 40) , ('iPhone3,1', 35), ('iPhone6,1', 50), ('iPhone5,2', 30), ('iPhone6,1', 45), ('iPhone5,1', 28), ('iPod5,1', 22), ('iPad2,5', 20), ('iPad3,4', 15), ('iPad4,1', 10)]} 131 | android_models = {'property_name': 'model', 'values': [('GT-I9300', 45), ('GT-I9500', 40), ('SM-G900F', 35), ('GT-I8190L', 32), ('XT1032', 28), ('Nexus 5', 25), ('LG-D802', 20)]} 132 | browser = {'property_name': 'browser', 'values': [('Chrome', 45), ('Firefox', 40), ('Safari', 35), ('Internet Explorer', 32)]} 133 | traits['operating_system'] = operating_system 134 | if traits['operating_system'] == 'iOS': 135 | self.modify_user(traits, [iphone_models, app_version]) 136 | elif traits['operating_system'] == 'Android': 137 | self.modify_user(traits, [android_models, app_version]) 138 | else: 139 | self.modify_user(traits, [browser]) 140 | if traits['Campaign Source'] == 'Organic': 141 | traits['Campaign Name'] = 'Organic' 142 | else: 143 | self.modify_user(traits, [marketing_campaign_name]) 144 | referrers = ['Organic', 'https://retailmenot.com','https://google.com','https://facebook.com','https://rxsaver.com'] 145 | email = '%s.%s@%s.com' % (random.choice(email_words), random.choice(email_words), random.choice(email_domains)) 146 | referrer = random.choice(referrers) 147 | traits.update({'first_name':random.choice(first_names), 'last_name':random.choice(last_names), 'email':email, 'Referrering Domain':referrer, 'ip': socket.inet_ntoa(struct.pack('>I', random.randint(1, 0xffffffff)))}) 148 | return traits 149 | 150 | def modify_user(self, traits, traits_to_add): 151 | for list_prop in traits_to_add: 152 | prop = weighted_choice(list_prop['values']) 153 | # if one value is organic, make all subsequent values organic 154 | if prop: 155 | traits.update({list_prop['property_name']:prop}) 156 | return traits 157 | 158 | def get_user(self): 159 | return {'traits': self.traits, 'user_id': self.user_id, 'anonymous_id': self.anonymous_id} 160 | 161 | def proceed(success_percent): 162 | if random.randint(1, 100) < success_percent: 163 | return True 164 | return False 165 | 166 | def send_track(client, user, operating_system, event, timestamp): 167 | properties = build_event_properties(event, operating_system) 168 | client.track(user['user_id'], event['event_name'], properties, context={'ip': user['traits']['ip']}, anonymous_id=user['anonymous_id'], timestamp=datetime.datetime.fromtimestamp(timestamp)) 169 | 170 | def registration(registration_path, user_amount, timestamp, client, operating_system): 171 | for x in range(user_amount): 172 | user = User(operating_system, custom_traits).get_user() 173 | registration_funnel(client, user, operating_system, registration_path, 0, timestamp) 174 | 175 | def registration_funnel(client, user, operating_system, registration_path, state, timestamp): 176 | timestamp = timestamp + random.randint(1,600) 177 | send_track(client, user, operating_system, registration_path[state], timestamp) 178 | success_percent = 50 + (state * 10) 179 | if proceed(success_percent): 180 | if len(registration_path) >= state + 2: 181 | registration_funnel(client, user, operating_system, registration_path, state + 1, timestamp) 182 | else: 183 | client.identify(user['user_id'], user['traits'], timestamp=datetime.datetime.fromtimestamp(timestamp)) 184 | file = open('new_users.txt', 'a') 185 | file.write(json.dumps(user) + '\n') 186 | file.close() 187 | email_funnel(user, email_spec, operating_system, timestamp) 188 | return 189 | 190 | def usage(usage_paths, operating_system, timestamp, client): 191 | try: 192 | users_file = open('registered_users.txt', 'r') 193 | except: 194 | users_file = [] 195 | for user in users_file: 196 | user = json.loads(user) 197 | for path in usage_paths: 198 | usage_funnel(client, user, operating_system, path, timestamp, 0) 199 | if proceed(70): 200 | file = open('new_users.txt', 'a') 201 | file.write(json.dumps(user) + '\n') 202 | file.close() 203 | 204 | def usage_funnel(client, user, operating_system, path, timestamp, state): 205 | if proceed(70 + (state * 10)): 206 | timestamp = timestamp + random.randint(1,600) 207 | send_track(client, user, operating_system, path[state], timestamp) 208 | if len(path) >= state + 2: 209 | usage_funnel(client, user, operating_system, path, timestamp, state + 1) 210 | elif proceed(10): 211 | usage_funnel(client, user, operating_system, path, timestamp, 0) 212 | return 213 | 214 | 215 | def build_event_properties(event, operating_system): 216 | prop_choices = event['property_choices'] 217 | properties = {} 218 | if event.get('dependent_props_list'): 219 | properties.update(assign_dependent_properties(event['dependent_props_list'])) 220 | for prop_name in prop_choices: 221 | properties[prop_name] = weighted_choice(prop_choices[prop_name]) 222 | properties.update(build_platform_properties(operating_system)) 223 | return properties 224 | 225 | def build_platform_properties(operating_system): 226 | properties = {'operating_system': operating_system} 227 | app_version = {'property_name': 'App Version', 'values': [('1.0.1', 5),('2.0.1', 10), ('3.0.1', 90)]} 228 | iphone_models = {'property_name': 'model', 'values': [('iPhone4,1', 40) , ('iPhone3,1', 35), ('iPhone6,1', 50), ('iPhone5,2', 30), ('iPhone6,1', 45), ('iPhone5,1', 28), ('iPod5,1', 22), ('iPad2,5', 20), ('iPad3,4', 15), ('iPad4,1', 10)]} 229 | android_models = {'property_name': 'model', 'values': [('GT-I9300', 45), ('GT-I9500', 40), ('SM-G900F', 35), ('GT-I8190L', 32), ('XT1032', 28), ('Nexus 5', 25), ('LG-D802', 20)]} 230 | browser = {'property_name': 'browser', 'values': [('Chrome', 45), ('Firefox', 40), ('Safari', 35), ('Internet Explorer', 32)]} 231 | if operating_system == 'Android': 232 | properties['app_version'] = weighted_choice(app_version['values']) 233 | properties['model'] = weighted_choice(android_models['values']) 234 | elif operating_system == 'iOS': 235 | properties['app_version'] = weighted_choice(app_version['values']) 236 | properties['model'] = weighted_choice(iphone_models['values']) 237 | elif operating_system == 'Web': 238 | properties['browser'] = weighted_choice(browser['values']) 239 | return properties 240 | 241 | 242 | def assign_dependent_properties(prop_choices): 243 | properties = {} 244 | for prop_name in prop_choices: 245 | prop_value = weighted_choice(prop_choices[prop_name]['values']) 246 | properties[prop_name] = prop_value 247 | dependents = prop_choices[prop_name]['dependent_properties'][prop_value] 248 | for dependent in dependents: 249 | properties[dependent] = weighted_choice(dependents[dependent]) 250 | return properties 251 | 252 | def email_funnel(user, email_spec, operating_system, timestamp): 253 | for event in email_spec: 254 | timestamp += 400 255 | if proceed(70): 256 | send_track(email_analytics, user, operating_system, event, timestamp) 257 | else: 258 | return 259 | return 260 | 261 | def stupid_file_switch(old_file, new_file): 262 | new = open(new_file, "r") 263 | old = open(old_file, "w") 264 | for user in new: 265 | old.write(user) 266 | old.close() 267 | new.close() 268 | new = open(new_file, "w") 269 | new.close() 270 | 271 | def generate_data(registration_path, usage_paths, user_amount, from_date): 272 | from_date_list = from_date.split("-") 273 | from_date = datetime.date(int(from_date_list[0]), int(from_date_list[1]), int(from_date_list[2])) 274 | to_date = datetime.date.today() 275 | delta = (to_date - from_date).days 276 | for x in range(delta): 277 | request_date = str(from_date + relativedelta.relativedelta(days=x)) 278 | #print request_date 279 | timestamp = calendar.timegm(datetime.datetime.strptime(request_date, "%Y-%m-%d").timetuple()) + 32400 280 | for client, operating_system in clients: 281 | registration(registration_path, user_amount, timestamp, client, operating_system) 282 | usage(usage_paths, operating_system, timestamp, client) 283 | stupid_file_switch('registered_users.txt', 'new_users.txt') 284 | for client, operating_system in clients: 285 | client.flush() 286 | 287 | generate_data(registration_path, usage_paths, 1000, from_date) 288 | -------------------------------------------------------------------------------- /eventengine/workshop.template: -------------------------------------------------------------------------------- 1 | { 2 | "AWSTemplateFormatVersion": "2010-09-09", 3 | "Description": "IAM Roles required for Segment + Personalize workshop", 4 | "Parameters": { 5 | "CampaignName": { 6 | "Type": "String", 7 | "Default": "segment-workshop-campaign", 8 | "Description": "Enter the name of the Personalize Campaign to use for fetching recommendations." 9 | }, 10 | "SegmentLambdaDestinationFunctionName": { 11 | "Type": "String", 12 | "Default": "SegmentPersonalizeDestinationHandler", 13 | "Description": "Enter name of the Lambda function to be called by Segment for the Personalize destination." 14 | }, 15 | "SegmentSecretId": { 16 | "Type": "String", 17 | "Default": "123456789", 18 | "Description": "Enter Segment Secret ID that should be used for External ID in cross-account trust policy." 19 | } 20 | }, 21 | "Resources": { 22 | "GlueServiceRole": { 23 | "Type": "AWS::IAM::Role", 24 | "Properties": { 25 | "Path": "/service-role/", 26 | "Policies": [ 27 | { 28 | "PolicyDocument": { 29 | "Version": "2012-10-17", 30 | "Statement": [ 31 | { 32 | "Effect": "Allow", 33 | "Action": [ 34 | "s3:GetObject", 35 | "s3:PutObject" 36 | ], 37 | "Resource": [ 38 | "arn:aws:s3:::segment-personalize-workshop/segment-logs/*", 39 | "arn:aws:s3:::segment-personalize-data/segment-logs/*", 40 | { "Fn::Sub": "arn:aws:s3:::personalize-data-${AWS::AccountId}/raw-events/events.json*" }, 41 | { "Fn::Sub": "arn:aws:s3:::personalize-data-${AWS::AccountId}/transformed/*" } 42 | ] 43 | }, 44 | { 45 | "Action": [ 46 | "s3:ListBucket" 47 | ], 48 | "Resource": [ 49 | "arn:aws:s3:::segment-personalize-workshop", 50 | "arn:aws:s3:::segment-personalize-data" 51 | ], 52 | "Effect": "Allow" 53 | } 54 | ] 55 | }, 56 | "PolicyName": "glue-execution-policy" 57 | } 58 | ], 59 | "ManagedPolicyArns": [ 60 | "arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole" 61 | ], 62 | "AssumeRolePolicyDocument": { 63 | "Version": "2012-10-17", 64 | "Statement": [ 65 | { 66 | "Effect": "Allow", 67 | "Principal": { 68 | "Service": "glue.amazonaws.com" 69 | }, 70 | "Action": "sts:AssumeRole" 71 | } 72 | ] 73 | } 74 | } 75 | }, 76 | "PersonalizeServiceRole": { 77 | "Type": "AWS::IAM::Role", 78 | "Properties": { 79 | "Path": "/service-role/", 80 | "Policies": [ 81 | { 82 | "PolicyDocument": { 83 | "Version": "2012-10-17", 84 | "Statement": [ 85 | { 86 | "Action": [ 87 | "s3:ListBucket" 88 | ], 89 | "Effect": "Allow", 90 | "Resource": { 91 | "Fn::Sub": "arn:aws:s3:::personalize-data-${AWS::AccountId}" 92 | } 93 | }, 94 | { 95 | "Action": [ 96 | "s3:GetObject", 97 | "s3:PutObject" 98 | ], 99 | "Effect": "Allow", 100 | "Resource": { 101 | "Fn::Sub": "arn:aws:s3:::personalize-data-${AWS::AccountId}/*" 102 | } 103 | } 104 | ] 105 | }, 106 | "PolicyName": "personalize-execution-policy" 107 | } 108 | ], 109 | "ManagedPolicyArns": [ 110 | "arn:aws:iam::aws:policy/service-role/AmazonPersonalizeFullAccess" 111 | ], 112 | "AssumeRolePolicyDocument": { 113 | "Version": "2012-10-17", 114 | "Statement": [ 115 | { 116 | "Effect": "Allow", 117 | "Principal": { 118 | "Service": "personalize.amazonaws.com" 119 | }, 120 | "Action": "sts:AssumeRole" 121 | } 122 | ] 123 | } 124 | } 125 | }, 126 | "SegmentPersonalizeLambdaRole": { 127 | "Type": "AWS::IAM::Role", 128 | "Properties": { 129 | "Path": "/", 130 | "Policies": [ 131 | { 132 | "PolicyDocument": { 133 | "Version": "2012-10-17", 134 | "Statement": [ 135 | { 136 | "Effect": "Allow", 137 | "Action": [ 138 | "logs:CreateLogGroup", 139 | "logs:CreateLogStream", 140 | "logs:PutLogEvents" 141 | ], 142 | "Resource": "arn:aws:logs:*:*:*" 143 | }, 144 | { 145 | "Effect": "Allow", 146 | "Action": [ 147 | "personalize:*" 148 | ], 149 | "Resource": [ 150 | "*", 151 | { "Fn::Sub": "arn:aws:personalize-events:${AWS::Region}:${AWS::AccountId}:*" } 152 | ] 153 | }, 154 | { 155 | "Effect": "Allow", 156 | "Action": [ 157 | "personalize:GetRecommendations" 158 | ], 159 | "Resource": { 160 | "Fn::Sub": "arn:aws:personalize:${AWS::Region}:${AWS::AccountId}:campaign/${CampaignName}" 161 | } 162 | } 163 | ] 164 | }, 165 | "PolicyName": "segment-personalize-lambda-policy" 166 | } 167 | ], 168 | "AssumeRolePolicyDocument": { 169 | "Version": "2012-10-17", 170 | "Statement": [ 171 | { 172 | "Effect": "Allow", 173 | "Principal": { 174 | "Service": "lambda.amazonaws.com" 175 | }, 176 | "Action": "sts:AssumeRole" 177 | } 178 | ] 179 | } 180 | } 181 | }, 182 | "SegmentExecutePersonalizeLambdaRole": { 183 | "Type": "AWS::IAM::Role", 184 | "Properties": { 185 | "Path": "/", 186 | "Policies": [ 187 | { 188 | "PolicyDocument": { 189 | "Version": "2012-10-17", 190 | "Statement": [ 191 | { 192 | "Effect": "Allow", 193 | "Action": [ 194 | "lambda:InvokeFunction" 195 | ], 196 | "Resource": [ 197 | { 198 | "Fn::Sub": "arn:aws:lambda:${AWS::Region}:${AWS::AccountId}:function:${SegmentLambdaDestinationFunctionName}" 199 | } 200 | ] 201 | } 202 | ] 203 | }, 204 | "PolicyName": "segment-execute-lambda-policy" 205 | } 206 | ], 207 | "AssumeRolePolicyDocument": { 208 | "Version": "2012-10-17", 209 | "Statement": [ 210 | { 211 | "Effect": "Allow", 212 | "Principal": { 213 | "AWS": "arn:aws:iam::595280932656:root" 214 | }, 215 | "Action": "sts:AssumeRole", 216 | "Condition": { 217 | "StringEquals": { 218 | "sts:ExternalId": { "Ref": "SegmentSecretId" } 219 | } 220 | } 221 | } 222 | ] 223 | } 224 | } 225 | }, 226 | "PersonalizeS3RoleForLab": { 227 | "Type": "AWS::IAM::Role", 228 | "Properties": { 229 | "Path": "/service-role/", 230 | "ManagedPolicyArns": [ 231 | "arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess" 232 | ], 233 | "AssumeRolePolicyDocument": { 234 | "Version": "2012-10-17", 235 | "Statement": [ 236 | { 237 | "Effect": "Allow", 238 | "Principal": { 239 | "Service": "personalize.amazonaws.com" 240 | }, 241 | "Action": "sts:AssumeRole" 242 | } 243 | ] 244 | } 245 | } 246 | }, 247 | "PersonalizeDataBucket": { 248 | "Type" : "AWS::S3::Bucket", 249 | "Properties" : { 250 | "BucketName" : { 251 | "Fn::Sub": "personalize-data-${AWS::AccountId}" 252 | } 253 | } 254 | }, 255 | "PersonalizeDataBucketPolicy" : { 256 | "Type" : "AWS::S3::BucketPolicy", 257 | "Properties" : { 258 | "Bucket" : { "Ref" : "PersonalizeDataBucket" }, 259 | "PolicyDocument": { 260 | "Statement": [ 261 | { 262 | "Sid": "PersonalizeS3BucketAccessPolicy", 263 | "Effect": "Allow", 264 | "Principal": { 265 | "Service": "personalize.amazonaws.com" 266 | }, 267 | "Action": [ 268 | "s3:GetObject", 269 | "s3:ListBucket" 270 | ], 271 | "Resource": [ 272 | { "Fn::GetAtt": [ "PersonalizeDataBucket", "Arn" ] }, 273 | { 274 | "Fn::Join": [ "", [ 275 | { "Fn::GetAtt": [ "PersonalizeDataBucket", "Arn" ] }, 276 | "/*" 277 | ]] 278 | } 279 | ] 280 | }, 281 | { 282 | "Sid": "AllowSegmentUser", 283 | "Effect": "Allow", 284 | "Principal": { 285 | "AWS": "arn:aws:iam::107630771604:user/s3-copy" 286 | }, 287 | "Action": "s3:PutObject", 288 | "Resource": { 289 | "Fn::Join": [ "", [ 290 | { "Fn::GetAtt": [ "PersonalizeDataBucket", "Arn" ] }, 291 | "/segment-logs/*" 292 | ]] 293 | } 294 | } 295 | ] 296 | } 297 | } 298 | } 299 | } 300 | } 301 | -------------------------------------------------------------------------------- /exercise1/README.md: -------------------------------------------------------------------------------- 1 | # Exercise 1 - Data Transformation, Filtering, and Exploration 2 | 3 | ## Overview 4 | 5 | The effectiveness of machine learning models is directly tied to the quantity and quality of data input during the training process. For most personalization ML solutions, training data typically comes from clickstream data collected from websites, mobile applications, and other online & offline channels where end-users are interacting with items for which we wish to make recommendations. Examples of clickstream events include viewing items, adding items to a list or cart, and purchasing items. Although an Amazon Personalize Campaign can be started with just new clickstream data, the initial quality of the recommendations will not be as high as a model that has been trained on recent historical data. 6 | 7 | One of Segment's core capabilities is the ability collect semantic events and properties and to aggregate those properties into user profiles using Personas for later use in marketing and analytics tools. 8 | 9 | In this exercise we will walk through the process required to take historical clickstream data collected by Segment to train a model in Amazon Personalize. The advantage of bootstrapping Personalize with historical clickstream data is that you will start with a model that reflects your users's latest purchases and browsing behavior. 10 | 11 | Segment provides the ability to send event data from one or more data sources configured in your Segment account to several AWS services including Amazon Simple Storage Service (S3), Amazon Kinesis, and Amazon Redshift. Since the raw format, fields, and event types in the Segment event data cannot be directly uploaded to Amazon Personalize for model training, this exercise will guide you through the process of transforming the data into the format expected by Personalize. 12 | 13 | In the interest of time for the workshop, we will start with data that has already been written to an Amazon S3 bucket by the Segment S3 destination. The [format](https://segment.com/docs/destinations/amazon-s3/#data-format) of these files is compressed JSON where events are grouped into directories by Segment source ID and time. Since the Personalize service requires training data to be uploaded in CSV format, we will need to aggregate, filter, and transform the raw JSON into a single CSV file using an ETL job. We will use AWS Glue for this step. However, before creating our Glue ETL job, we'll learn how to use Amazon Athena to easily explore our training data. Being able to quickly inspect and investigate training data can be invaluable in gaining insight from and resolving data related issues. 14 | 15 | > There is a minimum amount of data that is necessary to train a model in Personalize. Using existing historical data allows you to immediately start training a solution. If you ingest data as it is created, and there is no historical data, it can take a while before training can begin. 16 | 17 | ### What You'll Be Building 18 | 19 | ![Exercise 1 Architecture](images/Architecture-Exercise1.png) 20 | 21 | In this exercise we will walk through the process required to take the raw historical clickstream data collected by Segment to train a model in Amazon Personalize. The advantage of bootstrapping Personalize with historical clickstream data is that you will start with a model that has the benefit of past events to make more accurate recommendations. Segment provides the ability to push clickstream data to the following locations in your AWS account. 22 | 23 | * S3 bucket 24 | * Kinesis Data Stream 25 | * Kinesis Data Firehose 26 | * Redshift 27 | 28 | For this exercise we will walk you through how to setup an S3 destination in your Segment account. In the interest of time, though, we will provide access to pre-populated in an existing S3 bucket. Then you will learn how to use Amazon Athena to query and visualize the JSON files directly from S3. Finally, you will use AWS Glue to create an ETL (extract, transform, load) Job that will filter and transform the raw JSON files into the format required by Personalize. The output file will be written back to S3 in your account. 29 | 30 | > The Segment Amazon S3 destination is just one method for collecting historical training data for Personalize. If you're already using [Segment's Amazon Redshift Data Warehouse destination](https://segment.com/docs/destinations/redshift/), you can easily extract (or unload) data from your Redshift instance to a CSV file in S3 that is suitable for uploading into Personalize. See the [sql/redshift_unload_as_csv.sql](sql/redshift_unload_as_csv.sql) file for a sample SQL statement. 31 | 32 | ### Exercise Preparation 33 | 34 | If you have not already cloned this repository to your local machine, do so now. 35 | 36 | git clone https://github.com/james-jory/segment-personalize-workshop.git 37 | 38 | ### Claim your Segment Workspace 39 | 40 | For the live workshop exercises, Segment has pre-provisioned workspaces for the workshop. If you are doing this workshop after the live event, you will need a new Business Tier workspace with Personas enabled from Segment, or you will need to use your existing Segment workspace. If you do not have one, please contact your Segment sales representative at https://segment.com/contact/sales/. 41 | 42 | To get your workshop workspace: 43 | 44 | 1. Open the Google Sheet at [Segment Workspaces](https://docs.google.com/spreadsheets/d/1SyEDxLmquN96tsv-dhrOhduRLilWjITQyCBXCcA73U4/edit?usp=sharing) 45 | 2. Find an unclaimed workspace 46 | 47 | ![](https://paper-attachments.dropbox.com/s_E5B8F73CCFD247F55CDECE88873E27F307EC497126FFEDFF0F4E68F01755C7F7_1560739886350_image.png) 48 | 49 | 50 | 3. Claim your shiny new Segment workspace by putting your name in the appropriate fields, please. 51 | 52 | ## Part 1 - Set up Your Segment Workspace 53 | 54 | After you have claimed your workspace, there are a couple of steps you need to follow to get it ready to start the workshop. 55 | 56 | 57 | 1. Go to [https://app.segment.com](https://app.segment.com/). 58 | 2. Make sure you select the “Password” login radio button. 59 | 3. Log in as: 60 | 61 | ``` 62 | username: igor+awsmlworkshop@segment.com 63 | password: 64 | ``` 65 | 66 | ![](https://paper-attachments.dropbox.com/s_539A927F5DA788B557CE05EF51E8221F1D7D02D016B6CA298FD5F55304B8CA28_1558297663348_image.png) 67 | 68 | 69 | 70 | 4. Click the “Log In” button. 71 | 5. Find the workspace name you claimed in the spreadsheet. 72 | 6. Click the tile with the name you claimed. 73 | 74 | 75 | ![](https://paper-attachments.dropbox.com/s_539A927F5DA788B557CE05EF51E8221F1D7D02D016B6CA298FD5F55304B8CA28_1558298143343_image.png) 76 | 77 | 78 | 79 | 7. Click on the Settings gear in the bottom left corner of the screen that appears. 80 | 81 | 82 | ![](https://paper-attachments.dropbox.com/s_539A927F5DA788B557CE05EF51E8221F1D7D02D016B6CA298FD5F55304B8CA28_1558298296954_image.png) 83 | 84 | 85 | 86 | 8. Change the name of the workspace to something you will remember, so that you can find your workspace more easily in case you have to log back in during or after the workshop. 87 | 9. Add `igor+awsmlworkshop@segment.com` in the Incident Contact section. 88 | 10. Click the “Save Changes” button. 89 | 90 | 91 | ![](https://paper-attachments.dropbox.com/s_539A927F5DA788B557CE05EF51E8221F1D7D02D016B6CA298FD5F55304B8CA28_1558298453353_image.png) 92 | 93 | 94 | ## Part 2 - Create Segment Sources 95 | 96 | Segment Sources allow you to collect semantic events as your users interact with your web sites, mobile applications, or server-side applications. For this workshop, you will set up sources for a web application, an Android application, and iOS mobile application. We will also create a source that will be used to send recommendations from Personalize to user profiles in Segment. 97 | 98 | Your initial Segment workspace will look like this: 99 | 100 | 101 | ![](https://d2mxuefqeaa7sj.cloudfront.net/s_539A927F5DA788B557CE05EF51E8221F1D7D02D016B6CA298FD5F55304B8CA28_1551126460468_image.png) 102 | 103 | 104 | You will need to add four sources, using the ‘Add Source’ button in the screen shot above. To set up a source: 105 | 106 | 107 | ![](https://d2mxuefqeaa7sj.cloudfront.net/s_539A927F5DA788B557CE05EF51E8221F1D7D02D016B6CA298FD5F55304B8CA28_1551126918810_image.png) 108 | 109 | 110 | 111 | ![](https://d2mxuefqeaa7sj.cloudfront.net/s_539A927F5DA788B557CE05EF51E8221F1D7D02D016B6CA298FD5F55304B8CA28_1551126938657_image.png) 112 | 113 | 114 | 115 | ![](https://d2mxuefqeaa7sj.cloudfront.net/s_539A927F5DA788B557CE05EF51E8221F1D7D02D016B6CA298FD5F55304B8CA28_1551126965261_image.png) 116 | 117 | ![](https://d2mxuefqeaa7sj.cloudfront.net/s_539A927F5DA788B557CE05EF51E8221F1D7D02D016B6CA298FD5F55304B8CA28_1551127036032_image.png) 118 | 119 | 120 | Once your source is configured, it will appear in your workspace like this: 121 | 122 | 123 | ![](https://d2mxuefqeaa7sj.cloudfront.net/s_539A927F5DA788B557CE05EF51E8221F1D7D02D016B6CA298FD5F55304B8CA28_1551127061361_image.png) 124 | 125 | 126 | You will need to repeat these steps to configure three more sources. One for Android, one for iOS, and one for your Personalize events. 127 | 128 | Name your sources as follows: 129 | 130 | | Source Name | Source Type | Description | 131 | | ----------- | ----------- | ----------- | 132 | | website-prod | Javascript | Accepts user events from a web site; created in the example above. | 133 | | android-prod | Android | Accepts user events from an Android application. | 134 | | ios-prod | iOS | Accepts user events from an iOS application. | 135 | | personas-event-source | Python | Accepts events from the Personalize service Lambda you will connect in the last exercise. | 136 | 137 | For the web source, use the Javascript source type, for Android the Android source, for iOS the iOS source, and for the personas-events-source use the Python source type. 138 | 139 | ## Part 3 - Set up Segment Personas 140 | 141 | Personas will use the events that you collect from your user interactions to create individual user profiles. This will allow you and your marketing teams to group users into audiences. Later, you will be able to define the destinations to which you will be able to send user definitions and traits by setting up destinations in Personas. You will also be able to add product recommendations from Personalize to each user profile in Personas. 142 | 143 | After setting up your sources, your workspace should look something like this: 144 | 145 | ![](https://d2mxuefqeaa7sj.cloudfront.net/s_539A927F5DA788B557CE05EF51E8221F1D7D02D016B6CA298FD5F55304B8CA28_1551127589771_image.png) 146 | 147 | 148 | Click on the Personas Orb on the left hand side of your screen, and you will be redirected to the Personas setup wizard. This will allow you to set up Personas so that it can receive events from the sources which you just configured. 149 | 150 | 151 | ![](https://d2mxuefqeaa7sj.cloudfront.net/s_539A927F5DA788B557CE05EF51E8221F1D7D02D016B6CA298FD5F55304B8CA28_1551127707826_image.png) 152 | 153 | 154 | Click ‘Get Started’ and enable all of the sources you just created: 155 | 156 | 157 | ![](https://d2mxuefqeaa7sj.cloudfront.net/s_539A927F5DA788B557CE05EF51E8221F1D7D02D016B6CA298FD5F55304B8CA28_1551127784671_image.png) 158 | 159 | 160 | Then click ‘Review’: 161 | 162 | ![](https://d2mxuefqeaa7sj.cloudfront.net/s_539A927F5DA788B557CE05EF51E8221F1D7D02D016B6CA298FD5F55304B8CA28_1551127817276_image.png) 163 | 164 | 165 | And then ‘Enable Personas.’ 166 | 167 | You now have an event channel from your applications, and a way to collect identity information about individual users. Let’s set up Segment so that this data can be passed on to Personalize via an S3 bucket for your initial training set. 168 | 169 | 170 | ## Part 4 - Create S3 Destination in Segment 171 | 172 | Although we won't be testing pushing data from Segment to S3 in the workshop due to time limitations, we will walk through how to configure an S3 destination in Segment. Start by logging in to your Segment account and clicking "Destinations" in the left navigation. Then click the "Add Destination" button. 173 | 174 | ![Segment Destinations](images/SegmentAddDestination.png) 175 | 176 | On the Destination catalog page, search for "S3" in the search field. Click on "Amazon S3" in the search results. 177 | 178 | ![Segment find S3 Destination](images/SegmentS3-Destination.png) 179 | 180 | Click "Configure Amazon S3" to setup the S3 destination. 181 | 182 | ![Segment S3 Configuration](images/SegmentS3-Configure.png) 183 | 184 | On the "Select Source" page, select an existing Source and click the "Confirm Source" button. To learn more about setting up Sources in Segment, see the Segment [documentation](https://segment.com/docs/sources/). 185 | 186 | ![Segment S3 Confirm Source](images/SegmentS3-ConfirmSource.png) 187 | 188 | The Settings page for the S3 Destination requires an S3 bucket name. An S3 bucket has already been created for you in your AWS account for the workshop. To find the bucket name, login to your AWS workshop account and browse to the S3 service page in a different browser tab/window. Locate the bucket with a name starting with `personalize-data-...`. Click on the bucket name and copy the name to your clipboard. 189 | 190 | ![Segment S3 Destination Bucket Name](images/SegmentS3-BucketName.png) 191 | 192 | Back on the Segment Amazon S3 destination settings page, paste the bucket name into the "Bucket Name" field. Also be sure to activate the destination at the top of the configuration form. 193 | 194 | ![Segment S3 Destination Settings](images/SegmentS3-Settings.png) 195 | 196 | Detailed instructions for configuring an S3 destination can be found on Segment's [documentation site](https://segment.com/docs/destinations/amazon-s3/). 197 | 198 | As mentioned above, we won't be testing actually pushing data through the S3 destination in this workshop due to time limitations. Instead, we will use raw data already collected in an S3 bucket. 199 | 200 | ## Part 5 - Send Test Data Into Your Segment Workspace 201 | 202 | In this step you will pre-populate simulated event data into your Segment instance, your S3 bucket, and Personas. 203 | 204 | The data you pre-populate here will be *required* in later steps when configuring Personalize to send recommendations to Personas and your marketing tools. 205 | 206 | Because events are synchronized from Segment to S3 on a batch basis, we will also give you a pre-populated initial training set to save time, in the next part. 207 | 208 | You will need some data to be populated in Segment however, since this will allow you to create recommendations based on (simulated) user activity later on. 209 | 210 | This part can be choose your own adventure, though we strongly recommend using the AWS Cloud9 option to avoid Python package hell. Let’s start with Cloud9. 211 | 212 | > AWS Cloud9 is a cloud-based integrated development environment (IDE) that lets you write, run, and debug code with just a browser. The purpose of using Cloud9 for this part of the workshop is to simply provide a consistent and predictable environment for running a Python script that will push sample data into your Segment workspace. Cloud9 is not required to use Segment or Personalize. 213 | 214 | 1. Go to your AWS Console. 215 | 2. Under the Services dropdown, find Cloud9 216 | 3. Click the “Create Environment” button. 217 | 218 | 219 | ![](https://paper-attachments.dropbox.com/s_539A927F5DA788B557CE05EF51E8221F1D7D02D016B6CA298FD5F55304B8CA28_1558299104773_image.png) 220 | 221 | 222 | 223 | 4. Give your environment a name. 224 | 5. Click the “Next Step” button. 225 | 226 | 227 | ![](https://paper-attachments.dropbox.com/s_539A927F5DA788B557CE05EF51E8221F1D7D02D016B6CA298FD5F55304B8CA28_1558299167074_image.png) 228 | 229 | 230 | Cloud9 terminals run in VPCs like all EC2 instances. 231 | 232 | 233 | 6. Change the Platform radio button to “Ubuntu Server”. This is essential for the Python environment steps below to work properly. 234 | 7. Change the Cost-saving setting to “After four hours”. This sets a longer timeout for the inactivity monitor. 235 | 8. Click the “Create new VPC” button. This will open a new tab or browser window, depending on your browser. 236 | 237 | 238 | ![](https://paper-attachments.dropbox.com/s_E5B8F73CCFD247F55CDECE88873E27F307EC497126FFEDFF0F4E68F01755C7F7_1560740342985_image.png) 239 | 240 | 241 | 242 | 9. Select the default, VPC with a single public subnet. 243 | 10. Click the Select button. 244 | 245 | 246 | ![](https://paper-attachments.dropbox.com/s_539A927F5DA788B557CE05EF51E8221F1D7D02D016B6CA298FD5F55304B8CA28_1558299502361_image.png) 247 | 248 | 249 | 250 | 11. Name your subnet. 251 | 12. Click the Create VPC button. 252 | 253 | 254 | ![](https://paper-attachments.dropbox.com/s_539A927F5DA788B557CE05EF51E8221F1D7D02D016B6CA298FD5F55304B8CA28_1558299573199_image.png) 255 | 256 | 13. Go back to the Cloud9 configuration wizard in the previous screen or tab. 257 | 14. Your VPC and subnet should already be automatically selected. If they are not, click the refresh button next to the VPC dropdown. 258 | 15. Select your VPC. 259 | 16. Click the Next Step button. 260 | 261 | 262 | ![](https://paper-attachments.dropbox.com/s_539A927F5DA788B557CE05EF51E8221F1D7D02D016B6CA298FD5F55304B8CA28_1558299736195_image.png) 263 | 264 | 265 | 266 | 17. Click the “Create Environment” button. 267 | 268 | 269 | ![](https://paper-attachments.dropbox.com/s_539A927F5DA788B557CE05EF51E8221F1D7D02D016B6CA298FD5F55304B8CA28_1558299781230_image.png) 270 | 271 | 272 | 273 | 18. You will be forwarded to a screen that shows your remote IDE which provides a terminal in which you can execute terminal commands. If you are running an ad blocker in your browser, you will need to disable it for this URL because some ad blockers prevent the Cloud9 service from running in the browser. 274 | 275 | 276 | ![](https://paper-attachments.dropbox.com/s_539A927F5DA788B557CE05EF51E8221F1D7D02D016B6CA298FD5F55304B8CA28_1558299984070_image.png) 277 | 278 | 279 | 280 | 19. Welcome, l33t h4x0r! 281 | 282 | 283 | ![](https://paper-attachments.dropbox.com/s_539A927F5DA788B557CE05EF51E8221F1D7D02D016B6CA298FD5F55304B8CA28_1558300244295_image.png) 284 | 285 | 286 | Keep this window or tab open, or at least save the URL to your instance. You will need to use this one more time later in the workshop to send events into Segment. Or like hack a mainframe or something. 287 | 288 | 289 | 20. You will need to run the following commands in the terminal window (you are in Ubuntu, right?) 290 | 291 | ``` 292 | git clone https://github.com/james-jory/segment-personalize-workshop.git 293 | pip install python-dateutil --upgrade 294 | pip install analytics-python 295 | ``` 296 | 297 | 21. Open the segment-event-generator.py file in the ./data folder of the workshop project. 298 | 299 | ``` 300 | cd segment-personalize-workshop/data 301 | vi segment-event-generator.py 302 | ``` 303 | 304 | ![](https://paper-attachments.dropbox.com/s_539A927F5DA788B557CE05EF51E8221F1D7D02D016B6CA298FD5F55304B8CA28_1558301096176_image.png) 305 | 306 | 307 | 308 | 22. Go to your Segment workspace window. 309 | 23. You will need the write keys for the web, android, and ios sources you created earlier. You can get these by clicking on each source as shown below. 310 | 311 | 312 | ![](https://camo.githubusercontent.com/804a4e22b35ff233bfe829df4609cf81c7c3562c/68747470733a2f2f64326d787565667165616137736a2e636c6f756466726f6e742e6e65742f735f353339413932374635444137383842353537434530354546353145383232314631443744303244303136423643413239384644354635353330344238434132385f313535313134323631363733355f696d6167652e706e67) 313 | 314 | 315 | The write key for the source is in the next screen: 316 | 317 | ![](https://camo.githubusercontent.com/30d8382284c7b9902629a461f029e7e868b0bd95/68747470733a2f2f64326d787565667165616137736a2e636c6f756466726f6e742e6e65742f735f353339413932374635444137383842353537434530354546353145383232314631443744303244303136423643413239384644354635353330344238434132385f313535313134323731363836305f696d6167652e706e67) 318 | 319 | 24. Add each write key to the appropriate variable entry in the script (you will not need a key for the email_write_key entry in the script, leave that blank): 320 | 321 | 322 | ![](https://paper-attachments.dropbox.com/s_539A927F5DA788B557CE05EF51E8221F1D7D02D016B6CA298FD5F55304B8CA28_1558301096176_image.png) 323 | 324 | 325 | 326 | 25. Save your changes (escape key > colon key > `wq` in the vi terminal). Now you can push events into your Segment sources. 327 | 328 | 329 | 26. In your Cloud9 terminal, run the script: 330 | 331 | ``` 332 | python segment-event-generator.py 2019-06-16 333 | ``` 334 | 335 | This will generate two days worth of interaction data in your Segment instance. If you are using this after the date shown above, your should change the date to be at least one day prior to today so that you get a long enough run of historical data. 336 | 337 | 338 | 27. Validate you can see your events by clicking on each of your sources and looking at the Debugger tab. You should see events in all of the sources you configured above. Keep your Cloud9 terminal open for later. 339 | 340 | 341 | ## Part 6 - Explore Workshop Test Data 342 | 343 | In the interest of time, we have pre-populated an S3 bucket with raw historical JSON data from a sample Segment account. We will use this data to train an initial model in Personalize that will be used throughout the remainder of this workshop. 344 | 345 | In this step we will demonstrate how Amazon Athena can be used to create external table pointing at the raw JSON files and then query those files using SQL. This can be an invaluable tool to inspect your data before uploading it into Personalize and as you iterate with models over time. 346 | 347 | Log in to the AWS console. If you are participating in an AWS led workshop, use the instructions provided to access your temporary workshop account. Browse to the Amazon Athena service page in the console, making sure that you are in the "N. Virginia" region. If this is the first time you've used Athena, you will be presented with a welcome page. Click the "Get started" button to continue. If the tutorial is presented, feel free to walk through the tutorial or click the "X" to exit the tutorial. 348 | 349 | Before we can create a table for the data in S3, we need an Athena database. For the purposes of this exercise, the database that we use is not important. If a "default" or "sampledb" database already exists in your account, you can use that for the new table. Otherwise you can create a new database using the DDL statement below (paste it into the "New query 1" tab and press "Run query". 350 | 351 | ```sql 352 | CREATE DATABASE IF NOT EXISTS default; 353 | ``` 354 | 355 | ![Create Athena Database](images/AthenaCreateDatabase.png) 356 | 357 | Next let's create a table in the Athena database above that points to the historical data in S3. We have written the DDL statement for you that will create this table. Open the [sql/athena_create_table.sql](sql/athena_create_table.sql) file, copy the contents to your clipboard, and paste file contents into the "New query 1" tab in the Athena console. Take a moment to inspect the `CREATE EXTERNAL TABLE...` statement. One important aspect of this DDL statement is that there are several field name mapping statements in the `SERDEPROPERTIES` section (as shown below). These mappings address the [Athena requirement](https://docs.aws.amazon.com/athena/latest/ug/tables-databases-columns-names.html) that the only special character allowed database, table, view, and column names is an underscore. Since the Segment test data has several trait names with embedded spaces, these mappings allow us to safely query this data in Athena. 358 | 359 | ```sql 360 | CREATE EXTERNAL TABLE IF NOT EXISTS segment_logs ( 361 | ... 362 | ) 363 | ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe' 364 | WITH SERDEPROPERTIES ( 365 | "mapping.campaign_name"="Campaign Name", 366 | "mapping.campaign_source"="Campaign Source", 367 | "mapping.experiment_group"="Experiment Group", 368 | "mapping.favorite_departments"="Favorite Departments", 369 | "mapping.invited_user"="Invited User?", 370 | "mapping.referrering_domain"="Referrering Domain" 371 | ) 372 | LOCATION 's3://segment-personalize-workshop/segment-logs/'; 373 | ``` 374 | 375 | When you're ready, press the "Run query" button to execute the statement. This will create the table in the Glue Data Catalog. Now you are ready to execute queries against the compressed JSON files. Trying inspecting the data with a few queries. 376 | 377 | ```sql 378 | -- What does the data look like? 379 | SELECT * FROM segment_logs limit 20; 380 | ``` 381 | 382 | ![Athena Query Star](images/AthenaQueryStar.png) 383 | 384 | ```sql 385 | -- The 'event' column is what we can use for event type in training our model. 386 | -- What event types are available? 387 | SELECT COUNT(messageId) AS eventCount, event FROM segment_logs GROUP BY event ORDER BY eventCount DESC; 388 | ``` 389 | 390 | ![Athena Query Events](images/AthenaQueryEvents.png) 391 | 392 | ```sql 393 | -- In order to make recommendations we need an item/product to recommend. 394 | -- Our product SKU is in 'properties.sku'. What event types are available where we have a SKU? 395 | SELECT COUNT(messageId) AS eventCount, event FROM segment_logs WHERE properties.sku IS NOT NULL GROUP BY event ORDER BY eventCount DESC; 396 | ``` 397 | 398 | ![Athena Query SKUs](images/AthenaQuerySkus.png) 399 | 400 | From the results of the last query you will notice that there are three events that include a product SKU: 'Product Clicked', 'Product Added', and 'Order Completed'. We will use these events in training our model for product recommendations. We could also train models based on other event types, such as 'Page Viewed' or 'Signup Success' and use them to make content or membership program recommendations. 401 | 402 | ## Part 7 - Data Preparation 403 | 404 | Since the raw format, fields, and event types in the Segment event data cannot be directly uploaded to Amazon Personalize for model training, this step will guide you through the process of transforming the data into the format expected by Personalize. We will use the same compressed JSON files you queried with Athena in the previous step. We will use AWS Glue to create an ETL job that will take the JSON files, apply filtering and field mapping to each JSON event, and write the output back to S3 as a CSV file. 405 | 406 | ### Create AWS Glue ETL Job 407 | 408 | First, ensure that you are logged in to the AWS account provided to you for this workshop. Then browse to the Glue service in the console, making sure that the AWS region is "N. Virginia" (us-east-1). Click the "Get started" button and then click "Jobs" in the left navigation on the Glue console page. 409 | 410 | ![Glue Jobs](images/GlueJobs.png) 411 | 412 | Click the "Add job" button and enter the following information. 413 | 414 | * Enter a job name such as "SegmentEventsJsonToCsv". 415 | * For IAM role, a role has already been created for you that starts with the name `module-personalize-GlueServiceRole-...`. Select this role. 416 | * Leave Type as "Spark". 417 | * For "This job runs", click the radio button "A new script to be authored by you". 418 | * Leave everything else the same and click Next at the bottom of the form. 419 | * On the "Connections" step just click "Save job and edit script" since we are not accessing data in a database for this job. 420 | 421 | ![Glue Job Settings](images/GlueAddJobSettings.png) 422 | 423 | The source code for the Glue job has already been written for you. Copy the contents of [etl/glue_etl.py](etl/glue_etl.py) to your clipboard and paste it into the Glue editor window. **Then click "Save" to save the job script.** 424 | 425 | ![Glue Job Script](images/GlueEditJobScript.png) 426 | 427 | Let's review key parts of the script in more detail. First, the script is initialized with two job parameters. The `JOB_NAME` parameter is passed to our job by the Glue execution framework. Below we'll see how to specify the `S3_CSV_OUTPUT_PATH` parameter value when we run the job. This parameter tells our job where to write the CSV file. 428 | 429 | ```python 430 | args = getResolvedOptions(sys.argv, ['JOB_NAME', 'S3_CSV_OUTPUT_PATH']) 431 | ``` 432 | 433 | Next the Spark and Glue contexts are created and associated. A Glue Job is also created and initialized. 434 | 435 | ```python 436 | sc = SparkContext() 437 | glueContext = GlueContext(sc) 438 | spark = glueContext.spark_session 439 | job = Job(glueContext) 440 | job.init(args['JOB_NAME'], args) 441 | ``` 442 | 443 | The first step in our Job is to load the raw JSON file as a Glue DynamicFrame. We're loading the JSON from the shared S3 bucket (segment-personalize-workshop) where the training data for the workshop has already been staged. Note that we're specifying the `recurse:True` parameter so that Glue will recursively load all files under the `segment-logs` folder. 444 | 445 | ```python 446 | datasource0 = glueContext.create_dynamic_frame_from_options("s3", {'paths': ["s3://segment-personalize-workshop/segment-logs"], 'recurse':True}, format="json") 447 | ``` 448 | 449 | Since we only want specific events for training our Personalize model, we'll use Glue's `Filter` transformation to keep only the records we want. The `datasource0` DynamicFrame created above is passed to `Filter.apply(...)` function along with the `filter_function` function. It's in `filter_function` where we keep events that have a userId, product SKU, and event specified. All other records are ignored. The resulting DynamicFrame is captured as `interactions`. 450 | 451 | ```python 452 | supported_events = ['Product Added', 'Order Completed', 'Product Clicked'] 453 | def filter_function(dynamicRecord): 454 | if ('userId' in dynamicRecord and 455 | 'properties' in dynamicRecord and 456 | 'sku' in dynamicRecord["properties"] and 457 | 'event' in dynamicRecord and 458 | dynamicRecord['event'] in supported_events): 459 | return True 460 | else: 461 | return False 462 | 463 | interactions = Filter.apply(frame = datasource0, f = filter_function, transformation_ctx = "interactions") 464 | ``` 465 | 466 | Next we will call Glue's `ApplyMapping` transformation, passing the `interactions` DynamicFrame from above and field mapping specification that indicates the fields we want to retain and their new names. These mapped field names will become the column names in our output CSV. You'll notice that we're using the product SKU as the `ITEM_ID` and `event` as the `EVENT_TYPE` These will have meaning in the Personalize interaction schema that we will use when uploading the CSV into Personalize. We're also renaming the `timestamp` field to `TIMESTAMP_ISO` since the format of this field value in the JSON file is an ISO 8601 date and Personalize requires timestamps to be specified in UNIX time (number seconds since Epoc). 467 | 468 | ```python 469 | applymapping1 = ApplyMapping.apply(frame = interactions, mappings = [ \ 470 | ("userId", "string", "USER_ID", "string"), \ 471 | ("properties.sku", "string", "ITEM_ID", "string"), \ 472 | ("event", "string", "EVENT_TYPE", "string"), \ 473 | ("timestamp", "string", "TIMESTAMP_ISO", "string")], \ 474 | transformation_ctx = "applymapping1") 475 | ``` 476 | 477 | To convert the ISO 8601 date format to UNIX time for each record, we'll use Spark's `withColumn(...)` to create a new column called `TIMESTAMP` that is the converted value of the `TIMESTAMP_ISO` field. Before we can call `withColumn`, though, we need to convert the Glue DynamicFrame into a Spark DataFrame. That is accomplished by calling `toDF()` on the output of ApplyMapping transformation above. Since Personalize requires our uploaded CSV to be a single file, we'll call `repartition(1)` on the DataFrame to force all data to be written in a single partition. Finally, after creating the `TIMESTAMP` in the expected format, `DyanmicFrame.fromDF()` is called to convert the DataFrame back into a DyanmicFrame and then we'll drop the `TIMESTAMP_ISO` field. 478 | 479 | ```python 480 | # Repartition to a single file since that is what is required by Personalize 481 | onepartitionDF = applymapping1.toDF().repartition(1) 482 | # Coalesce timestamp into unix timestamp 483 | onepartitionDF = onepartitionDF.withColumn("TIMESTAMP", \ 484 | unix_timestamp(onepartitionDF['TIMESTAMP_ISO'], "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'")) 485 | # Convert back to dynamic frame 486 | onepartition = DynamicFrame.fromDF(onepartitionDF, glueContext, "onepartition_df") 487 | # Drop the ISO formatted timestamp 488 | onepartition = onepartition.drop_fields(['TIMESTAMP_ISO']) 489 | ``` 490 | 491 | The last step is to write our CSV back to S3 at the path specified by the `S3_CSV_OUTPUT_PATH` job property and commit the job. 492 | 493 | ```python 494 | glueContext.write_dynamic_frame.from_options(frame = onepartition, connection_type = "s3", \ 495 | connection_options = {"path": args['S3_CSV_OUTPUT_PATH']}, \ 496 | format = "csv", transformation_ctx = "datasink2") 497 | 498 | job.commit() 499 | ``` 500 | 501 | ### Run AWS Glue ETL Job 502 | 503 | With our ETL Job script created and saved, it's time to run the job to create the CSV needed to train a Personalize Solution. Before going any further, we need to open another AWS console browser tab/window so we can get the S3 bucket name to use in our Glue job parameter that was described above. Right-click on the AWS logo in the upper left corner of the page and select "Open Link in New Tab" (or Window). In the new tab/window, browse to the S3 service page. You should see the S3 bucket pre-created for you. It should be named something line `personalize-data-...`. 504 | 505 | ![S3 Bucket](images/S3Bucket.png) 506 | 507 | We will need this bucket name in a moment. Keep this tab/window open. 508 | 509 | Switch back the brower tab/window where your Glue script was created. While in the Glue service console and the job listed, click the "Run job" button. This will cause the Parameters panel to display. Click the "Security configuration, script libraries, and job parameters" section header to cause the job parameters fields to be displayed. 510 | 511 | ![Glue Job Parameters](images/GlueRunJobDialog.png) 512 | 513 | Scroll down to the "Job parameters" section. This is where we will specify the job parameter that our script expects for the path to the output file. Create a job parameter with the following key and value. Be sure to prefix the key with `--` as shown. For the parameter value, **substitute "YOUR_BUCKET_NAME" with the name of the S3 bucket in the other browser tab/window we opened above.** 514 | 515 | | Key | Value | 516 | | -------------------- | ---------------------------------------------- | 517 | | --S3_CSV_OUTPUT_PATH | s3://***YOUR-BUCKET_NAME***/transformed | 518 | 519 | ![Glue Job Parameters](images/GlueRunJobParams.png) 520 | 521 | Click the "Run job" button to start the job. Once the job has started running you will see log output in the "Logs" tab at the bottom of the page. Your job may sit in a "Pending execution" state for a few minutes before it starts. Once your job starts executing, you can also view the log output in CloudWatch > Logs > `/aws-glue/jobs/output` while it is running. Note that the CloudWatch log will be created when your job starts running. 522 | 523 | When the job completes click the "X" in the upper right corner of the the page to exit the job script editor. 524 | 525 | ### Verify CSV Output File 526 | 527 | Browse to the S3 service page in the AWS console and find the bucket with a name starting with `personalize-data-...`. Click on the bucket name. If the job completed successfully you should see a folder named "transformed". Click on "transformed" and you should see the output file created by the ETL job. If the file is missing or is empty then the job likely experienced a problem. You can review the job's log under CloudWatch > Logs > `/aws-glue/jobs/output` to troubleshoot possible errors. The most common cause for the job failing is not accurately specifying the `S3_CSV_OUTPUT_PATH` job parameter key and/or value. 528 | 529 | If you are unable to get the job to complete successfully, you can skip the ETL step and upload a pre-generated CSV located in the [data/transformed](../data/transformed) directory of this repository (i.e. upload the pre-generated file to your `personalize-data-...` S3 bucket in the `transformed` folder using the AWS console). You may need to create the `transformed` folder before uploading the file if it was not created by your job. 530 | 531 | ![Glue Job Transformed File](images/GlueJobOutputFile.png) 532 | 533 | At this point we have the transformed CSV file containing historical clickstream data that we will use to upload and train a model in Personalize. In the next [exercise](../exercise2/) we will create a Personalize Dataset Group and import the CSV as an interaction dataset. 534 | -------------------------------------------------------------------------------- /exercise1/etl/glue_etl.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | import sys 4 | from awsglue.transforms import * 5 | from awsglue.utils import getResolvedOptions 6 | from awsglue.context import GlueContext 7 | from awsglue.job import Job 8 | from awsglue.dynamicframe import DynamicFrame 9 | from pyspark.context import SparkContext 10 | from pyspark.sql.functions import unix_timestamp 11 | 12 | ## @params: [JOB_NAME,S3_CSV_OUTPUT_PATH] 13 | args = getResolvedOptions(sys.argv, ['JOB_NAME', 'S3_CSV_OUTPUT_PATH']) 14 | 15 | sc = SparkContext() 16 | glueContext = GlueContext(sc) 17 | spark = glueContext.spark_session 18 | job = Job(glueContext) 19 | job.init(args['JOB_NAME'], args) 20 | 21 | # Load JSON files into dynamic frame. 22 | datasource0 = glueContext.create_dynamic_frame_from_options("s3", {'paths': ["s3://segment-personalize-workshop/segment-logs"], 'recurse':True}, format="json") 23 | print("Input file total record count: ", datasource0.count()) 24 | 25 | # Filters the JSON documents that we want included in the output CSV. 26 | # These are the event types we're interested for our dataset. 27 | supported_events = ['Product Added', 'Order Completed', 'Product Clicked'] 28 | def filter_function(dynamicRecord): 29 | if ('userId' in dynamicRecord and 30 | 'properties' in dynamicRecord and 31 | 'sku' in dynamicRecord["properties"] and 32 | 'event' in dynamicRecord and 33 | dynamicRecord['event'] in supported_events): 34 | return True 35 | else: 36 | return False 37 | 38 | # Apply filter function to dynamic frame 39 | interactions = Filter.apply(frame = datasource0, f = filter_function, transformation_ctx = "interactions") 40 | print("Filtered record count: ", interactions.count()) 41 | 42 | # Map only the fields we want in the output CSV, changing names to match target schema. 43 | applymapping1 = ApplyMapping.apply(frame = interactions, mappings = [ \ 44 | ("userId", "string", "USER_ID", "string"), \ 45 | ("properties.sku", "string", "ITEM_ID", "string"), \ 46 | ("event", "string", "EVENT_TYPE", "string"), \ 47 | ("timestamp", "string", "TIMESTAMP_ISO", "string")], \ 48 | transformation_ctx = "applymapping1") 49 | 50 | # Repartition to a single file since that is what is required by Personalize 51 | onepartitionDF = applymapping1.toDF().repartition(1) 52 | # Coalesce timestamp into unix timestamp 53 | onepartitionDF = onepartitionDF.withColumn("TIMESTAMP", \ 54 | unix_timestamp(onepartitionDF['TIMESTAMP_ISO'], "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'")) 55 | # Convert back to dynamic frame 56 | onepartition = DynamicFrame.fromDF(onepartitionDF, glueContext, "onepartition_df") 57 | # Drop the ISO formatted timestamp 58 | onepartition = onepartition.drop_fields(['TIMESTAMP_ISO']) 59 | 60 | # Write output back to S3 as a CSV 61 | glueContext.write_dynamic_frame.from_options(frame = onepartition, connection_type = "s3", \ 62 | connection_options = {"path": args['S3_CSV_OUTPUT_PATH']}, \ 63 | format = "csv", transformation_ctx = "datasink2") 64 | 65 | job.commit() -------------------------------------------------------------------------------- /exercise1/images/Architecture-Exercise1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/Architecture-Exercise1.png -------------------------------------------------------------------------------- /exercise1/images/AthenaCreateDatabase.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/AthenaCreateDatabase.png -------------------------------------------------------------------------------- /exercise1/images/AthenaDbAndTable.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/AthenaDbAndTable.png -------------------------------------------------------------------------------- /exercise1/images/AthenaQueryEvents.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/AthenaQueryEvents.png -------------------------------------------------------------------------------- /exercise1/images/AthenaQueryResults.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/AthenaQueryResults.png -------------------------------------------------------------------------------- /exercise1/images/AthenaQuerySkus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/AthenaQuerySkus.png -------------------------------------------------------------------------------- /exercise1/images/AthenaQueryStar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/AthenaQueryStar.png -------------------------------------------------------------------------------- /exercise1/images/GlueAddJob.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/GlueAddJob.png -------------------------------------------------------------------------------- /exercise1/images/GlueAddJobSettings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/GlueAddJobSettings.png -------------------------------------------------------------------------------- /exercise1/images/GlueCrawlerAdd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/GlueCrawlerAdd.png -------------------------------------------------------------------------------- /exercise1/images/GlueCrawlerAddDataStore.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/GlueCrawlerAddDataStore.png -------------------------------------------------------------------------------- /exercise1/images/GlueCrawlerAddOnDemand.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/GlueCrawlerAddOnDemand.png -------------------------------------------------------------------------------- /exercise1/images/GlueCrawlerAddOutput.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/GlueCrawlerAddOutput.png -------------------------------------------------------------------------------- /exercise1/images/GlueCrawlerAddRole.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/GlueCrawlerAddRole.png -------------------------------------------------------------------------------- /exercise1/images/GlueCrawlerRunItNow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/GlueCrawlerRunItNow.png -------------------------------------------------------------------------------- /exercise1/images/GlueCrawlers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/GlueCrawlers.png -------------------------------------------------------------------------------- /exercise1/images/GlueEditJobScript.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/GlueEditJobScript.png -------------------------------------------------------------------------------- /exercise1/images/GlueGetStarted.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/GlueGetStarted.png -------------------------------------------------------------------------------- /exercise1/images/GlueJobOutputFile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/GlueJobOutputFile.png -------------------------------------------------------------------------------- /exercise1/images/GlueJobs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/GlueJobs.png -------------------------------------------------------------------------------- /exercise1/images/GlueRunJobDialog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/GlueRunJobDialog.png -------------------------------------------------------------------------------- /exercise1/images/GlueRunJobParams.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/GlueRunJobParams.png -------------------------------------------------------------------------------- /exercise1/images/S3Bucket.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/S3Bucket.png -------------------------------------------------------------------------------- /exercise1/images/S3CreateFolder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/S3CreateFolder.png -------------------------------------------------------------------------------- /exercise1/images/S3Folder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/S3Folder.png -------------------------------------------------------------------------------- /exercise1/images/S3Upload.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/S3Upload.png -------------------------------------------------------------------------------- /exercise1/images/S3Uploaded.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/S3Uploaded.png -------------------------------------------------------------------------------- /exercise1/images/SegmentAddDestination.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/SegmentAddDestination.png -------------------------------------------------------------------------------- /exercise1/images/SegmentS3-BucketName.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/SegmentS3-BucketName.png -------------------------------------------------------------------------------- /exercise1/images/SegmentS3-Configure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/SegmentS3-Configure.png -------------------------------------------------------------------------------- /exercise1/images/SegmentS3-ConfirmSource.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/SegmentS3-ConfirmSource.png -------------------------------------------------------------------------------- /exercise1/images/SegmentS3-Destination.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/SegmentS3-Destination.png -------------------------------------------------------------------------------- /exercise1/images/SegmentS3-Settings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise1/images/SegmentS3-Settings.png -------------------------------------------------------------------------------- /exercise1/sql/athena_create_table.sql: -------------------------------------------------------------------------------- 1 | -- Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | -- SPDX-License-Identifier: MIT-0 3 | 4 | -- Demonstrates how to create a table in Athena that can be used 5 | -- to query event data written to S3 by Segment's S3 destination. 6 | -- This is helpful in exploring training data before being uploaded 7 | -- into Personalize before model building. 8 | -- 9 | -- See https://segment.com/docs/destinations/amazon-s3/ 10 | -- 11 | -- Note that special handling is required to map JSON field names 12 | -- for Segment traits that contain spaces to Athena-safe 13 | -- variants. Additional mappings may be required to support your 14 | -- trait naming scheme. 15 | CREATE EXTERNAL TABLE IF NOT EXISTS segment_logs ( 16 | anonymousId string, 17 | channel string, 18 | context struct>, 20 | event string, 21 | messageId string, 22 | originalTimestamp string, 23 | projectId string, 24 | properties struct, 28 | traits struct, 42 | receivedAt string, 43 | sentAt string, 44 | `timestamp` string, 45 | type string, 46 | userId string, 47 | version INT, 48 | writeKey string 49 | ) 50 | ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe' 51 | WITH SERDEPROPERTIES ( 52 | "mapping.campaign_name"="Campaign Name", 53 | "mapping.campaign_source"="Campaign Source", 54 | "mapping.experiment_group"="Experiment Group", 55 | "mapping.favorite_departments"="Favorite Departments", 56 | "mapping.invited_user"="Invited User?", 57 | "mapping.referrering_domain"="Referrering Domain" 58 | ) 59 | LOCATION 's3://segment-personalize-workshop/segment-logs/'; -------------------------------------------------------------------------------- /exercise1/sql/redshift_unload_as_csv.sql: -------------------------------------------------------------------------------- 1 | -- Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | -- SPDX-License-Identifier: MIT-0 3 | 4 | -- Example query for unloading data from Redshift to a 5 | -- CSV file on S3 that is in the format expected by 6 | -- Amazon Personalize. Replace the values below with 7 | -- the AWS S3 bucket, credentials, and region appropriate 8 | -- for your configuration. 9 | UNLOAD (' 10 | SELECT 11 | user_id AS USER_ID, 12 | products_sku AS ITEM_ID, 13 | event AS EVENT_TYPE, 14 | date_part(epoch,"timestamp") AS TIMESTAMP 15 | FROM prod.order_completed 16 | UNION 17 | SELECT 18 | user_id AS USER_ID, 19 | products_sku AS ITEM_ID, 20 | event AS EVENT_TYPE, 21 | date_part(epoch,"timestamp") AS TIMESTAMP 22 | FROM prod.product_added 23 | UNION 24 | SELECT 25 | user_id AS USER_ID, 26 | products_sku AS ITEM_ID, 27 | event AS EVENT_TYPE, 28 | date_part(epoch,"timestamp") AS TIMESTAMP 29 | FROM prod.product_viewed 30 | ') 31 | TO 's3://mybucket/my_folder' 32 | CREDENTIALS 'aws_access_key_id=AWS_ACCESS_KEY_ID;aws_secret_access_key=AWS_SECRET_ACCESS_KEY;token=AWS_SESSION_TOKEN' 33 | HEADER 34 | REGION AS '' 35 | DELIMITER AS ',' 36 | PARALLEL OFF; -------------------------------------------------------------------------------- /exercise2/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Exercise 2 - Create Personalize Dataset Group, Solution, and Campaign 3 | 4 | ## Overview 5 | 6 | Amazon Personalize requires data, uploaded into Amazon Personalize datasets, to train a model. 7 | 8 | You have two means to provide the training data. You can import historical data into Personalize from an Amazon S3 bucket and you can record data as it is created. In [exercise 1](../exercise1) we learned how to prepare historical data provided by Segment and in [exercise 4](../exercise4) we will learn how to send real-time events to Personalize using the Segment Personalize destination. 9 | 10 | A dataset group contains related datasets, three created by you (users, items, and historical interactions), and one created by Amazon Personalize for live event interactions. A dataset group can contain only one of each type of dataset. 11 | 12 | You can create multiple dataset groups to serve different purposes. For example, you might have an application that provides recommendations for purchasing shoes and another that gives recommendations for places to visit in Europe. In Amazon Personalize, each application would have its own dataset group. 13 | 14 | Historical data must be provided in a CSV file. Each dataset type has a unique schema that specifies the contents of the file. 15 | 16 | There is a minimum amount of data that is necessary to train a model. Using existing historical data allows you to immediately start training a solution. If you ingest data as it is created, and there is no historical data, it can take a while before training can begin. 17 | 18 | You can use the Amazon Personalize console to import data into a dataset, build a solution, and create a campaign. Alternatively you can use the AWS SDK to orchestrate the process from your own application code. 19 | 20 | ### What You'll Be Building 21 | 22 | ![Exercise 2 Architecture](images/Architecture-Exercise2.png) 23 | 24 | In the previous [exercise](../exercise1/) we learned how to use AWS Glue to take the raw event data written to S3 by Segment and transform it into the format required by Personalize. In this exercise we will pick up where we left off and learn how to create a dataset group, solution, and campaign in Personalize by performing the following steps. 25 | 26 | * Create a dataset group in Personalize 27 | * Define the schema representing the event data in our CSV 28 | * Upload our CSV into an interaction dataset in the dataset group 29 | * Create a Personalize Solution, or machine learning model, using the data in the dataset group and a Personalize recipe 30 | * Create a Personalize Campaign based on our Solution 31 | 32 | ### Exercise Preparation 33 | 34 | If you haven't already cloned this repository to your local machine, do so now. 35 | 36 | ```bash 37 | git clone https://github.com/james-jory/segment-personalize-workshop.git 38 | ``` 39 | 40 | ## Part 1 - Create Personalize Dataset Group 41 | 42 | Browse to the Amazon Personalize service landing page in the AWS console, making sure that you are still in the "N. Virginia" region. 43 | 44 | Click the "Get started" button to get started. 45 | 46 | ![Personalize Start Page](images/PersonalizeStart.png) 47 | 48 | Enter a name for your dataset group and click "Next". 49 | 50 | ![Personalize Create Dataset Group](images/PersonalizeCreateDatasetGroup.png) 51 | 52 | Enter a name for your interaction dataset. This will be the dataset where we will upload the CSV file. 53 | 54 | ![Personalize Create Dataset](images/PersonalizeCreateDataset.png) 55 | 56 | Scroll down to the "Schema details" and ensure that "Create new schema" is selected. Enter a name for your schema. Next we need to specify the schema for the interaction dataset. This schema will map to the columns in the CSV we created in the [previous exercise](../exercise1). 57 | 58 | Dataset schemas in Personalize are represented in [Avro](https://avro.apache.org/docs/current/spec.html). 59 | 60 | > Avro is a remote procedure call and data serialization framework developed within Apache's Hadoop project. It uses JSON for defining data types and protocols, and serializes data in a compact binary format. 61 | 62 | An Avro schema for our event data has already been written and can be found in [event_schema.avsc](event_schema.avsc) and is also displayed below. 63 | 64 | ```json 65 | { 66 | "type": "record", 67 | "name": "Interactions", 68 | "namespace": "com.amazonaws.personalize.schema", 69 | "fields": [ 70 | { 71 | "name": "USER_ID", 72 | "type": "string" 73 | }, 74 | { 75 | "name": "ITEM_ID", 76 | "type": "string" 77 | }, 78 | { 79 | "name": "EVENT_TYPE", 80 | "type": "string" 81 | }, 82 | { 83 | "name": "TIMESTAMP", 84 | "type": "long" 85 | } 86 | ], 87 | "version": "1.0" 88 | } 89 | ``` 90 | 91 | Let's review the schema in more detail. The required fields for the user-item interaction dataset schema are `USER_ID`, `ITEM_ID`, and `TIMESTAMP`. Additionally, optional reserved fields include `EVENT_TYPE` and `EVENT_VALUE`. In the [previous exercise](../exercise1) we mapped the `userId`, `properties.sku`, `timestamp` (transformed from ISO 8601 to UNIX time), and `event` from the raw event data from Segment to `USER_ID`, `ITEM_ID`, `TIMESTAMP`, and `EVENT_TYPE` columns in the CSV, respectively. 92 | 93 | Copy the contents of Avro schema to your clipboard and paste it into the "Schema definition" editor (replacing the proposed schema). Click "Next" to save the schema and move to the next step. 94 | 95 | ![Personalize Interaction Dataset Schema](images/PersonalizeSchema.png) 96 | 97 | The "Import user-item interaction data" step is displayed next. To complete this form we will need to get two pieces of information from IAM and S3. Give your import job a name. For the "IAM service role", select "Enter a customer IAM role ARN" from the dropdown. For instructions on completing the remaining fields, see the instructions below. 98 | 99 | ![Personalize Interaction Dataset Import Job](images/PersonalizeImportJob.png) 100 | 101 | To obtain the IAM and S3 information we need for this form, we will open a new tab or window to find the needed information. We'll start with the IAM role. 102 | 103 | 1. Open up a new browser window or tab in your current console session by right-clicking on the "AWS" logo in the upper-left corner of the page and choosing "Open Link in New Tab" or "Open Link in New Window" (the menu options may be worded differently in the web browser you're using). 104 | 2. In the __new tab/window that was opened__, browse to the IAM service page. 105 | 3. Select "Roles" in the left navigation and find the IAM role with the name **"module-personalize-PersonalizeServiceRole-..."**. This role was pre-created for you for this workshop and allows Personalize to access the S3 bucket where your CSV is located. 106 | 4. Click on this role name to display the role's detail page. 107 | 5. The "Role ARN" is displayed at the top of the "Summary" section. Click on the copy icon displayed at the end of Role ARN to copy the ARN to your clipboard. 108 | 6. Switch browser tabs/windows back to the Personalize "Import user-item interaction data" form and paste the ARN into the "Custom IAM role ARN" field. 109 | 110 | ![Personalize Role ARN](images/PersonalizeRoleARN.png) 111 | 112 | To complete the form, we need the path in S3 where our transformed CSV is located. To find this path, complete the following steps. 113 | 114 | 1. Open up a new browser window or tab in your current console session by right-clicking on the "AWS" logo in the upper-left corner of the page and choosing "Open Link in New Tab" or "Open Link in New Window" (the menu options may be worded differently in the web browser you're using). 115 | 2. In the __new tab/window that was opened__, browse to the S3 service page. 116 | 3. Locate the S3 bucket starting with the name "personalize-data-...". Click on the bucket name. 117 | 4. Click on the "transformed" folder name. You should see an object named something like "run-1551018703844-part-r-00000". This is the CSV generated by our Glue ETL job in the [previous exercise](../exercise1). ***Make sure that this file is not zero bytes in size (which would indicate that the ETL job did not complete successfully).*** If your file is empty or you want to skip the ETL step, you can upload a pre-generated CSV located in the [data/tranformed](../data/transformed) directory of this repository to your S3 bucket. 118 | 5. Check the box or click on the object name and then click "Copy path" to copy the path to the file to your clipboard. 119 | 6. Switch browser tabs/windows back to the Personalize "Import user-item interaction data" form and paste the CSV path into the "Data location" field. 120 | 121 | ![Personalize Transformed CSV S3 Path](images/PersonalizeTransformedS3Path.png) 122 | 123 | After clicking the "Finish" (or "Start import") button at the bottom of the page, you will be returned to the Personalize Dashboard where you can monitor the progress of your interaction dataset as it is being created. This process can take several minutes to complete. 124 | 125 | ![Personalize Interaction Dataset Creating](images/PersonalizeInteractionDatasetCreating.png) 126 | 127 | ## Part 2 - Create Personalize Solution 128 | 129 | Once our interaction CSV is finished importing into a user-item interaction dataset, we can create a Personalize Solution. This is where the ML model is created. From the Dashboard page for the dataset group we created above, click the "Start" button in the "Create solutions" column. 130 | 131 | > Note: We are skipping the event ingestion step for now and will come back to it in [exercise 4](../exercise4). 132 | 133 | ![Create Personalize Solution](images/PersonalizeCreateSolution.png) 134 | 135 | On the "Create solution" page, enter a "Solution name". When an interaction dataset includes multiple event types, you can tell Personalize which event type to train on. This is optional and we will leave it blank for this exercise. For the "Recipe selection", select the "Manual" radio button and "aws-hrnn" as the "Recipe". In the interest of time, we are manually selecting a recipe rather using the AutoML option. The AutoML process takes longer to build a Solution since Personalize will evaluate multiple recipes to find the best performing recipe and configuration. See the [Predefined Recipes](https://docs.aws.amazon.com/personalize/latest/dg/working-with-predefined-recipes.html) documentation to learn more. 136 | 137 | ![Personalize Solution Configuration](images/PersonalizeSolutionConfig.png) 138 | 139 | Scroll down to the "Solution configuration" panel and click "true" to have Personalize perform hyperparameter optimization (HPO) on our model. We won't make any changes to the default HPO config settings. See the [HRNN Recipe](https://docs.aws.amazon.com/personalize/latest/dg/native-recipe-hrnn.html) documentation for details on the hyperparameters available for this recipe. 140 | 141 | ![Personalize Solution Configuration](images/PersonalizeSolutionHPO.png) 142 | 143 | Scroll to the bottom of the page and click the "Finish" button to move to the next step. Finally, on the "Create solution version" page, click "Finish" to create your Solution. 144 | 145 | ![Personalize Solution Version](images/PersonalizeCreateSolutionVersion.png) 146 | 147 | The process of creating your Solution can take 30-40 minutes for our test dataset. 148 | 149 | ![Personalize Solution Creation in Progress](images/PersonalizeSolutionInProgress.png) 150 | 151 | ## Part 3 - Create Personalize Campaign 152 | 153 | A deployed solution is known as a campaign, and is able to make recommendations for your users. To deploy a solution, you create a campaign in the console or by calling the CreateCampaign API. You can choose which version of the solution to use. By default, a campaign uses the latest version of a solution. 154 | 155 | From the Dataset Group Dashboard, click the "Create new campaign" button. 156 | 157 | ![Personalize Create Campaign](images/PersonalizeCreateCampaignDash.png) 158 | 159 | Enter the name for your campaign as `segment-workshop-campaign`. __You must use this name in order for subsequent exercises in this workshop to function properly__. Select the solution your created above and click "Create campaign". 160 | 161 | ![Personalize Create Campaign Settings](images/PersonalizeCreateCampaign.png) 162 | 163 | Personalize will start creating your new campaign. This process can take several minutes. 164 | 165 | ![Personalize Campaign Creating](images/PersonalizeCampaignCreating.png) 166 | 167 | Once your campaign has finished being created and deployed, you can use the AWS console to test the Personalize "GetRecommendations" endpoint. Enter a `USER_ID` from the CSV we imported earlier, such as "5493069786", or try entering a made up user ID (i.e. simulating a new user not present in the training set). 168 | 169 | ![Personalize Campaign Testing](images/PersonalizeCampaignTest.png) 170 | 171 | In the [next exercise](../exercise3) we will build an API endpoint using Amazon API Gateway and Amazon Lambda to demonstrate how recommendations from your Personalize campaign can be integrated into your applications. 172 | -------------------------------------------------------------------------------- /exercise2/event_schema.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "type": "record", 3 | "name": "Interactions", 4 | "namespace": "com.amazonaws.personalize.schema", 5 | "fields": [ 6 | { 7 | "name": "USER_ID", 8 | "type": "string" 9 | }, 10 | { 11 | "name": "ITEM_ID", 12 | "type": "string" 13 | }, 14 | { 15 | "name": "EVENT_TYPE", 16 | "type": "string" 17 | }, 18 | { 19 | "name": "TIMESTAMP", 20 | "type": "long" 21 | } 22 | ], 23 | "version": "1.0" 24 | } -------------------------------------------------------------------------------- /exercise2/images/Architecture-Exercise2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise2/images/Architecture-Exercise2.png -------------------------------------------------------------------------------- /exercise2/images/PersonalizeCampaignCreating.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise2/images/PersonalizeCampaignCreating.png -------------------------------------------------------------------------------- /exercise2/images/PersonalizeCampaignTest.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise2/images/PersonalizeCampaignTest.png -------------------------------------------------------------------------------- /exercise2/images/PersonalizeCreateCampaign.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise2/images/PersonalizeCreateCampaign.png -------------------------------------------------------------------------------- /exercise2/images/PersonalizeCreateCampaignDash.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise2/images/PersonalizeCreateCampaignDash.png -------------------------------------------------------------------------------- /exercise2/images/PersonalizeCreateDataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise2/images/PersonalizeCreateDataset.png -------------------------------------------------------------------------------- /exercise2/images/PersonalizeCreateDatasetGroup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise2/images/PersonalizeCreateDatasetGroup.png -------------------------------------------------------------------------------- /exercise2/images/PersonalizeCreateGroup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise2/images/PersonalizeCreateGroup.png -------------------------------------------------------------------------------- /exercise2/images/PersonalizeCreateSolution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise2/images/PersonalizeCreateSolution.png -------------------------------------------------------------------------------- /exercise2/images/PersonalizeCreateSolutionVersion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise2/images/PersonalizeCreateSolutionVersion.png -------------------------------------------------------------------------------- /exercise2/images/PersonalizeDatasetActive.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise2/images/PersonalizeDatasetActive.png -------------------------------------------------------------------------------- /exercise2/images/PersonalizeDatasetGroups.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise2/images/PersonalizeDatasetGroups.png -------------------------------------------------------------------------------- /exercise2/images/PersonalizeEventAvroSchema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise2/images/PersonalizeEventAvroSchema.png -------------------------------------------------------------------------------- /exercise2/images/PersonalizeGetStarted.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise2/images/PersonalizeGetStarted.png -------------------------------------------------------------------------------- /exercise2/images/PersonalizeImportJob.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise2/images/PersonalizeImportJob.png -------------------------------------------------------------------------------- /exercise2/images/PersonalizeInteractionDatasetCreating.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise2/images/PersonalizeInteractionDatasetCreating.png -------------------------------------------------------------------------------- /exercise2/images/PersonalizeRoleARN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise2/images/PersonalizeRoleARN.png -------------------------------------------------------------------------------- /exercise2/images/PersonalizeSchema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise2/images/PersonalizeSchema.png -------------------------------------------------------------------------------- /exercise2/images/PersonalizeSolutionConfig.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise2/images/PersonalizeSolutionConfig.png -------------------------------------------------------------------------------- /exercise2/images/PersonalizeSolutionHPO.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise2/images/PersonalizeSolutionHPO.png -------------------------------------------------------------------------------- /exercise2/images/PersonalizeSolutionInProgress.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise2/images/PersonalizeSolutionInProgress.png -------------------------------------------------------------------------------- /exercise2/images/PersonalizeStart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise2/images/PersonalizeStart.png -------------------------------------------------------------------------------- /exercise2/images/PersonalizeTransformedS3Path.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise2/images/PersonalizeTransformedS3Path.png -------------------------------------------------------------------------------- /exercise3/README.md: -------------------------------------------------------------------------------- 1 | # Exercise 3 - Getting Recommendations from Personalize 2 | 3 | ## Overview 4 | 5 | After you create a campaign using Amazon Personalize, you are able to get two different types of recommendations, dependent on what recipe type was used to train the model. For user-personalization and related-items recipes, the GetRecommendations API returns a list of recommended items. For example, products or content can be recommended for users signed in to your website. 6 | 7 | For search-personalization recipes, the PersonalizeRanking API re-ranks a list of recommended items based on a specified query. 8 | 9 | ### What You'll Be Building 10 | 11 | In this workshop we have been focused on building a user-personalization solution, so far trained on historical event data from Segment. In this exercise we will demonstrate how you can integrate recommendations from Personalize into your applications using a REST API. We will build an API Gateway endpoint that calls a Lambda function to fetch recommendations from Personalize. This example will show how to build a basic API endpoint to call Personalize directly from your applications for use cases where you will want to directly integrate recommendations. 12 | 13 | ### Exercise Preparation 14 | 15 | If you haven't already cloned this repository to your local machine, do so now. 16 | 17 | ```bash 18 | git clone https://github.com/james-jory/segment-personalize-workshop.git 19 | ``` 20 | 21 | ## Part 1 - Create API Endpoint & Lambda Function 22 | 23 | ![Exercise 3 Architecture](images/Architecture-Exercise3-Part1.png) 24 | 25 | First we will create a Lambda function that will be called by an API Gateway endpoint. In the AWS console for the account you've been assigned for the workshop, browse to the Lambda service page. Click the "Create a function" button from the welcome page to create a new function. 26 | 27 | ![Lambda Create Function](images/LambdaCreateFunction.png) 28 | 29 | Enter a name for your function and specify Python 3.7 as the runtime. Expand the "Permissions" panel and select an existing IAM role, that has already been created for you, with a name like `module-personalize-SegmentPersonalizeLambdaRole-...`). Click "Create function". 30 | 31 | > If you deployed the project's [CloudFormation template](../eventengine/workshop.template) in your own AWS account, the prefix for the IAM role name will be different. Look for the role with `SegmentPersonalizeLambdaRole` in the name. 32 | 33 | ![Lambda Function Config](images/LambdaRecEndpointCreate.png) 34 | 35 | Scroll down to the "Function code" panel. The source code for the function has already been written and is provided in this repository at [recommendations/lambda_function.py](recommendations/lambda_function.py). Open this file in a new browser tab/window, copy it to your clipboard, and paste it into the source code editor for our Lambda function as shown below. **Click the "Save" button at the top of the page when you're done.** 36 | 37 | ![Lambda Function Code](images/LambdaRecCode.png) 38 | 39 | (Be sure to save your function before proceeding.) 40 | 41 | Next, we will connect Amazon API Gateway to our Lambda funciton. Select "API Gateway" in the "Add triggers" panel in the Designer panel. 42 | 43 | ![Lambda API Gateway Trigger](images/LambdaRecAPIGW_Trigger.png) 44 | 45 | Scroll down to the "Configure triggers" panel. For the API dropdown, select "Create a new API" and set the Security as "Open". For a production deployment you would want to [control access](https://docs.aws.amazon.com/apigateway/latest/developerguide/apigateway-control-access-to-api.html) to this endpoint but that is beyond the scope of this exercise. 46 | 47 | ![Lambda API Gateway Config](images/LambdaRecAPIGW_Config.png) 48 | 49 | Click "Add" to add API Gateway as a trigger to our function and then **click "Save" at the top of the page to save our changes**. 50 | 51 | Next, we need to add environment variables for Segment and for the function to tell it the Personalize Campaign to call for retrieving recommendations. 52 | 53 | To obtain the Personalize Campaign ARN, browse to the Personalize service landing page in the AWS console. Select the Dataset Group you created earlier and then Campaigns in the left navigation. Click on the "segment-workshop-campaign" campaign you created earlier and copy the "Campaign arn" to your clipboard. 54 | 55 | ![Personalize Campaign ARN](images/PersonalizeCampaignArn.png) 56 | 57 | Return to our Lambda function and scroll down to the "Environment variables" panel. Add an environment variable with the key `personalize_campaign_arn` and value of the Campaign ARN in your clipboard. **Click the "Save" button at the top of the page to save your changes.** 58 | 59 | ![Lambda Campaign ARN Environment Variable](images/LambdaRecCampaignArn.png) 60 | 61 | Now let's browse to the API Gateway service page in the AWS console to test our endpoint. Under "APIs" you should see the recommendations API created when we setup our Lambda trigger. Click on the API name. 62 | 63 | ![API Gateway APIs](images/APIGW_endpoint.png) 64 | 65 | Click on the "ANY" resource then on the "TEST" link to build a test request. 66 | 67 | ![API Gateway Test](images/APIGW_Test.png) 68 | 69 | Select "GET" as the Method and enter a Query String of `userId=2941404340`. This is one of the users in our test dataset. Scroll to the bottom of the page and click the "Test" button. 70 | 71 | ![API Gateway Test](images/APIGW_TestGet.png) 72 | 73 | This will send a request through API Gateway which will call our Lambda function. The function will query Personalize for recommendations and return the results to API Gateway. 74 | 75 | ![API Gateway Test](images/APIGW_TestGetResults.png) 76 | 77 | As you should see in the "Response Body", the GetRecommendations endpoint for Personalize returns an "itemList" of item IDs for recommended items for the specified user. Typically you would then use these item IDs to retrieve meta infromation such as item names, descriptions, and images from, say, a database or product microservice in your application. One approach would be to update the Lambda function you created in this exercise to iterate over the itemIds in the itemList, retrieve product information for each item, build a response including this information, and return it to the caller. 78 | 79 | In the final [exercise](../exercise4) we will bring everything together and learn how to integrate recommendations from Personalize with your customer profiles in Segment. This allows you to activate recommendations across other integrations in your Segment account. 80 | -------------------------------------------------------------------------------- /exercise3/images/APIGW_Test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/APIGW_Test.png -------------------------------------------------------------------------------- /exercise3/images/APIGW_TestGet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/APIGW_TestGet.png -------------------------------------------------------------------------------- /exercise3/images/APIGW_TestGetResults.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/APIGW_TestGetResults.png -------------------------------------------------------------------------------- /exercise3/images/APIGW_endpoint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/APIGW_endpoint.png -------------------------------------------------------------------------------- /exercise3/images/Architecture-Exercise3-Part1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/Architecture-Exercise3-Part1.png -------------------------------------------------------------------------------- /exercise3/images/Architecture-Exercise3-Part2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/Architecture-Exercise3-Part2.png -------------------------------------------------------------------------------- /exercise3/images/CloudWatchLambda.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/CloudWatchLambda.png -------------------------------------------------------------------------------- /exercise3/images/Kinesis-Monitoring.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/Kinesis-Monitoring.png -------------------------------------------------------------------------------- /exercise3/images/Kinesis-PutRecordsGraph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/Kinesis-PutRecordsGraph.png -------------------------------------------------------------------------------- /exercise3/images/KinesisCreateStream.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/KinesisCreateStream.png -------------------------------------------------------------------------------- /exercise3/images/KinesisDashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/KinesisDashboard.png -------------------------------------------------------------------------------- /exercise3/images/KinesisStreamCreated.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/KinesisStreamCreated.png -------------------------------------------------------------------------------- /exercise3/images/LambdaCreateFunction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/LambdaCreateFunction.png -------------------------------------------------------------------------------- /exercise3/images/LambdaDashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/LambdaDashboard.png -------------------------------------------------------------------------------- /exercise3/images/LambdaEnvVariable.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/LambdaEnvVariable.png -------------------------------------------------------------------------------- /exercise3/images/LambdaFunctionCode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/LambdaFunctionCode.png -------------------------------------------------------------------------------- /exercise3/images/LambdaFunctionsNav.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/LambdaFunctionsNav.png -------------------------------------------------------------------------------- /exercise3/images/LambdaKinesisConfig.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/LambdaKinesisConfig.png -------------------------------------------------------------------------------- /exercise3/images/LambdaKinesisTrigger.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/LambdaKinesisTrigger.png -------------------------------------------------------------------------------- /exercise3/images/LambdaMonitoring.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/LambdaMonitoring.png -------------------------------------------------------------------------------- /exercise3/images/LambdaNav.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/LambdaNav.png -------------------------------------------------------------------------------- /exercise3/images/LambdaRecAPIGW_Config.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/LambdaRecAPIGW_Config.png -------------------------------------------------------------------------------- /exercise3/images/LambdaRecAPIGW_Trigger.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/LambdaRecAPIGW_Trigger.png -------------------------------------------------------------------------------- /exercise3/images/LambdaRecCampaignArn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/LambdaRecCampaignArn.png -------------------------------------------------------------------------------- /exercise3/images/LambdaRecCode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/LambdaRecCode.png -------------------------------------------------------------------------------- /exercise3/images/LambdaRecEndpointCreate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/LambdaRecEndpointCreate.png -------------------------------------------------------------------------------- /exercise3/images/LambdaSaveFunction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/LambdaSaveFunction.png -------------------------------------------------------------------------------- /exercise3/images/PersonalizeCampaignArn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/PersonalizeCampaignArn.png -------------------------------------------------------------------------------- /exercise3/images/PersonalizeCampaignConfig.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/PersonalizeCampaignConfig.png -------------------------------------------------------------------------------- /exercise3/images/PersonalizeCreateCampaign.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/PersonalizeCreateCampaign.png -------------------------------------------------------------------------------- /exercise3/images/PersonalizeCreateTracker.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/PersonalizeCreateTracker.png -------------------------------------------------------------------------------- /exercise3/images/PersonalizeEventTrackerConfig.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/PersonalizeEventTrackerConfig.png -------------------------------------------------------------------------------- /exercise3/images/PersonalizeEventTrackerCreating.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/PersonalizeEventTrackerCreating.png -------------------------------------------------------------------------------- /exercise3/images/PersonalizeEventTrackerDetails.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/PersonalizeEventTrackerDetails.png -------------------------------------------------------------------------------- /exercise3/images/SegmentDestinations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/SegmentDestinations.png -------------------------------------------------------------------------------- /exercise3/images/SegmentKinesis-AddDestination.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/SegmentKinesis-AddDestination.png -------------------------------------------------------------------------------- /exercise3/images/SegmentKinesis-ConfigStart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/SegmentKinesis-ConfigStart.png -------------------------------------------------------------------------------- /exercise3/images/SegmentKinesis-ConfirmSource.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/SegmentKinesis-ConfirmSource.png -------------------------------------------------------------------------------- /exercise3/images/SegmentKinesis-EventTester.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/SegmentKinesis-EventTester.png -------------------------------------------------------------------------------- /exercise3/images/SegmentKinesis-IAMRole.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/SegmentKinesis-IAMRole.png -------------------------------------------------------------------------------- /exercise3/images/SegmentKinesis-Settings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise3/images/SegmentKinesis-Settings.png -------------------------------------------------------------------------------- /exercise3/recommendations/lambda_function.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | 4 | import json 5 | import boto3 6 | import os 7 | import logging 8 | 9 | logger = logging.getLogger() 10 | logger.setLevel(logging.INFO) 11 | 12 | def lambda_handler(event, context): 13 | """ Proxies requests from API Gateway to the Personalize GetRecommendations endpoint. 14 | This function provides the interception point that we need to perform real-time filtering 15 | of recommendations based on traits on the customer's profile in Segment. Subsequent 16 | iterations of this function developed as part of this exercise will incrementally add 17 | this functionality. 18 | 19 | This function accepts the following arguments as query string parameters: 20 | userId - ID of the user to make recommendations (required for recommendations for a user) 21 | itemId - ID of the item to make recommendations (i.e. related items) (required for related items) 22 | numResults - number of recommendations to return (optional, will inherit default from Personalize if absent) 23 | """ 24 | 25 | logger.info("event: " + json.dumps(event)) 26 | 27 | # Allow Personalize region to be overriden via environment variable. Optional. 28 | api_params = { 'service_name': 'personalize-runtime' } 29 | if 'region_name' in os.environ: 30 | api_params['region_name'] = os.environ['region_name'] 31 | 32 | personalize = boto3.client(**api_params) 33 | 34 | # Build parameters for recommendations request. The Campaign ARN must be specified as 35 | # an environment variable. 36 | if not 'personalize_campaign_arn' in os.environ: 37 | return { 38 | 'statusCode': 500, 39 | 'body': 'Server is not configured correctly' 40 | } 41 | 42 | params = { 'campaignArn': os.environ['personalize_campaign_arn'] } 43 | 44 | if 'userId' in event['queryStringParameters']: 45 | params['userId'] = event['queryStringParameters']['userId'] 46 | if 'itemId' in event['queryStringParameters']: 47 | params['itemId'] = event['queryStringParameters']['itemId'] 48 | if 'numResults' in event['queryStringParameters']: 49 | params['numResults'] = int(event['queryStringParameters']['numResults']) 50 | 51 | recommendations = personalize.get_recommendations(**params) 52 | 53 | # For this version of the function we're just returning the recommendations from 54 | # Personalize directly back to the caller. 55 | logger.info(recommendations) 56 | 57 | return { 58 | 'statusCode': 200, 59 | 'body': json.dumps(recommendations) 60 | } 61 | -------------------------------------------------------------------------------- /exercise4/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Exercise 4 - Activating Recommendations using Segment Personas 3 | 4 | ## Overview 5 | 6 | After you create a campaign using Amazon Personalize, you are able to get two different types of recommendations, dependent on what recipe type was used to train the model. For user-personalization and related-items recipes, the [GetRecommendations](https://docs.aws.amazon.com/personalize/latest/dg/API_RS_GetRecommendations.html) API returns a list of recommended items. For example, products or content can be recommended for users signed in to your website, or in marketing tools. 7 | 8 | For search-personalization recipes, the [PersonalizeRanking](https://docs.aws.amazon.com/personalize/latest/dg/API_RS_PersonalizeRanking.html) API re-ranks a list of recommended items based on a specified query. 9 | 10 | In [Exercise 3](https://github.com/james-jory/segment-personalize-workshop/blob/master/exercise3) you learned how to access your Personalize solution directly in applications, via a Lambda based API endpoint. 11 | 12 | In this exercise, you will configure the Segment Personalize Destination, and then deploy a Lambda that can process your Segment events via that destination. 13 | 14 | This Lambda function will allow you to send updated events to keep your Personalize solution up to date on the latest user behavior being tracked via Segment. You will also push updated product recommendations to your user profiles stored in Segment Personas when specific user events trigger a re-compute of recommendations. 15 | 16 | Once your recommendations are updated on a user profile, your marketing, analytics, and data teams can use these product recommendations in their campaign management and analytics tools with no additional work. 17 | 18 | ## Exercise Preparation 19 | 20 | If you haven't already cloned this repository to your local machine, do so now. 21 | 22 | ``` 23 | git clone https://github.com/james-jory/segment-personalize-workshop.git 24 | ``` 25 | 26 | ## **Part 1 - Create an Event Processing Lambda Function** 27 | 28 | 29 | ![Exercise 4 Architecture](https://paper-attachments.dropbox.com/s_C2B02AED879A518AEFAF0FFED12CDDE467AF9DAEA3DC2098084E706023E68F50_1558234881757_Blank+Diagram.jpeg) 30 | 31 | 32 | First you will create a Lambda function that gets called by the Segment Personalize Destination. Each time Segment gets an event bound for this destination, your function will receive the event and will need to send a tracking event to your Personalize Tracker. You will also get updated recommendations for the user that sent the event. 33 | 34 | 35 | 1. Navigate to Services > Lambda in your AWS Console. 36 | 2. In the left navigation pane (you can expose this with the hamburger button in the top left of the screen), select Functions. 37 | 3. Click ‘Create Function’ (see image below). 38 | 39 | 40 | ![Lambda Create Function](images/LambdaCreateFunction.png) 41 | 42 | 4. Select “Author from scratch” at the top of the screen. 43 | 5. Enter the name for your function: `SegmentPersonalizeDestinationHandler`. ***You must use this name for your function since the pre-made IAM policy for calling your function from Segment expects this name.*** 44 | 6. Specify Python 3.7 as the runtime. 45 | 7. Under Permissions, click “Choose or create an execution role.” 46 | 8. Select “Use an existing role”. 47 | 9. Select the role that starts with the name `module-personalize-SegmentPersonalizeLambdaRole-...` in the "Existing role" dropdown. 48 | 10. Click ‘Create Function.’ 49 | 50 | ![Lambda Function Config](images/LambdaConfigFunction.png) 51 | 52 | 11. Scroll down to the "Function code" panel. 53 | 12. Change the Handler text box to read `app.lambda_handler` instead of `lambda_function.lambda_handler` (if you don’t do this an error message will appear after the next step). 54 | 13. Select ‘Upload a .zip file’. 55 | 56 | ![Lambda Upload Function Zip](images/LambdaUploadFunctionZip.png) 57 | 58 | The source code for the function is provided in the workshop code home directory in `/exercise4/app.py`. For this function, you will use a Lambda .zip file bundle that you will need to make the code work. This is located in the workshop home directory in `/exercise4/function.zip`. 59 | 60 | 14. Click the Upload button. 61 | 62 | ![Lambda Upload Function Zip](images/LambdaUploadFunctionZip2.png) 63 | 64 | 15. Navigate to the directory where you cloned the git repo and go to `segment-personalize-workshop/exercise4/function.zip` 65 | 66 | ![](https://paper-attachments.dropbox.com/s_C2B02AED879A518AEFAF0FFED12CDDE467AF9DAEA3DC2098084E706023E68F50_1558238600761_image.png) 67 | 68 | 16. Click the Open button. 69 | 17. Click the Save button at the top of the screen. It may take a few moments to complete this operation as your .zip file is uploaded. 70 | 71 | When completed, the function code should look something like this: 72 | 73 | ![](https://paper-attachments.dropbox.com/s_C2B02AED879A518AEFAF0FFED12CDDE467AF9DAEA3DC2098084E706023E68F50_1558238740999_image.png) 74 | 75 | Next, we need to add environment variables so the function can pass recommendation data back to Segment as well as a tracker for the Personalize Campaign to pass real-time data to Personalize. Your Lambda code needs the Personalize Campaign ARN in order to ask for recommendations from the Personalize service. 76 | 77 | 18. To obtain the Personalize Campaign ARN, browse to the Personalize service landing page in the AWS console in a new tab or window. 78 | 19. Select the Dataset Group you created earlier and then Campaigns in the left navigation. 79 | 20. Click on the "segment-workshop-campaign" you created earlier. 80 | 21. Copy the Campaign ARN to your clipboard. 81 | 22. Don’t close this tab or window, you will need it in the next section. 82 | 83 | ![Personalize Campaign ARN](https://github.com/james-jory/segment-personalize-workshop/raw/master/exercise4/images/PersonalizeCampaignArn.png) 84 | 85 | 23. Return to your Lambda function and scroll down to the "Environment variables" panel. You may need to click on the function name in the Designer pane to expose the function environment variables. 86 | 24. Add an environment variable with the key `personalize_campaign_arn`. 87 | 25. Paste the Campaign ARN from your clipboard as the value. 88 | 26. Scroll to the top of the page and click the Save button to save your changes. 89 | 90 | ![Lambda Campaign ARN Environment Variable](https://github.com/james-jory/segment-personalize-workshop/raw/master/exercise4/images/LambdaRecCampaignArn.png) 91 | 92 | Another critical dependency in your function is the ability to call the Personalize [PutEvents API](https://docs.aws.amazon.com/personalize/latest/dg/API_UBS_PutEvents.html) endpoint so that new event data can be added to the training set for your Personalize solution. This will enable the following Python code to work properly when sending events to Personalize: 93 | 94 | ```python 95 | response = personalize_events.put_events( 96 | trackingId = os.environ['personalize_tracking_id'], 97 | userId = userId, 98 | sessionId = event['anonymousId'], 99 | eventList = [ 100 | { 101 | "eventId": event['messageId'], 102 | "sentAt": int(dp.parse(event['timestamp']).strftime('%s')), 103 | "eventType": event['event'], 104 | "properties": json.dumps(properties) 105 | } 106 | ] 107 | ) 108 | ``` 109 | 110 | The `trackingId` function argument in your Lambda code identifies the Personalize Event Tracker which should handle the events you submit. This value is passed to your Lambda function as another environment variable. 111 | 112 | 27. In the browser tab/window you opened earlier, browse to the Personalize service landing page in the AWS console. 113 | 28. Click on the Dataset Group and then "Event trackers" in the left navigation. 114 | 29. Click the "Create event tracker" button. 115 | 116 | ![Personalize Event Trackers](images/PersonalizeCreateTracker.png) 117 | 118 | 30. Enter a name for your Event Tracker. 119 | 31. Click the Next button. 120 | 121 | ![Personalize Config Event Tracker](images/PersonalizeConfigTracker.png) 122 | 123 | The Event Tracker's tracking ID is displayed on the following page and is also available on the Event Tracker's detail page. 124 | 125 | 32. Copy this value to your clipboard and **be sure to press the "Finish" button**. 126 | 127 | ![Personalize Tracker ID](images/PersonalizeTrackerId.png) 128 | 129 | 33. Return to your Lambda function. 130 | 34. Create a new key called `personalize_tracking_id`. 131 | 35. Paste the Event Tracker’s tracking ID into the value field. 132 | 36. **Click the Save button at the top of the page to save your changes.** 133 | 134 | ![](https://paper-attachments.dropbox.com/s_C2B02AED879A518AEFAF0FFED12CDDE467AF9DAEA3DC2098084E706023E68F50_1558281914584_image.png) 135 | 136 | Your Lambda will also need a key for the Segment source that will ingest events you will send back via the Lambda, in order to update recommendations after user actions take place. 137 | 138 | 37. Go back to your Segment workspace tab or window. 139 | 38. Click on the `personas-event-source` source. This source will accept events from your Lambda function. 140 | 39. Copy the write key from the Overview tab to your clipboard. 141 | 142 | ![](https://segment.com/docs/destinations/amazon-personalize/images/SegmentWriteKey.png) 143 | 144 | 40. Back again to your Lambda tab or window. 145 | 41. Create a new key called `connections_source_write_key`. 146 | 42. Paste the source key you just copied into the value field. 147 | 43. **Scroll to the top of the page and click the Save button to save your changes.** 148 | 149 | ![](https://paper-attachments.dropbox.com/s_C2B02AED879A518AEFAF0FFED12CDDE467AF9DAEA3DC2098084E706023E68F50_1558282013045_image.png) 150 | 151 | Your lambda is now ready to receive events from Segment! In the next section, you will enable Segment to call your Lambda and send it events. 152 | 153 | ## Part 2 - Setting up Your Segment Destination 154 | 155 | In this section you are going to connect your new Lambda event handler to Segment, via the Segment Personalize Destination. This will enable events to flow to your Lambda and then to Personalize. 156 | 157 | 1. Go to your Segment workspace. 158 | 2. Click the Add Destination button in the top right of the Destinations list. 159 | 160 | ![](https://paper-attachments.dropbox.com/s_C2B02AED879A518AEFAF0FFED12CDDE467AF9DAEA3DC2098084E706023E68F50_1558227370476_image.png) 161 | 162 | 3. Type “amazon” into the search box in the screen that appears. 163 | 4. Select the Amazon Personalize destination. 164 | 165 | 166 | ![](https://paper-attachments.dropbox.com/s_C2B02AED879A518AEFAF0FFED12CDDE467AF9DAEA3DC2098084E706023E68F50_1558227492814_image.png) 167 | 168 | 169 | 170 | 5. On the screen that appears, click the Configure Amazon Personalize button. 171 | 172 | 173 | ![](https://paper-attachments.dropbox.com/s_C2B02AED879A518AEFAF0FFED12CDDE467AF9DAEA3DC2098084E706023E68F50_1558232121279_image.png) 174 | 175 | 176 | 177 | 6. Select the `website-prod` source and click the Confirm Source button. 178 | 179 | 180 | ![](https://paper-attachments.dropbox.com/s_C2B02AED879A518AEFAF0FFED12CDDE467AF9DAEA3DC2098084E706023E68F50_1558232187860_image.png) 181 | 182 | 183 | To configure the destination, you will need to tell Segment the ARN of the Lambda you built in the first part of the exercise. 184 | 185 | 186 | 7. Open the AWS management console in another tab or window. 187 | 8. Go to Services > Lambda > Functions. 188 | 189 | ![Lambda Select Function](images/LambdaSelectFunction.png) 190 | 191 | 9. Click on the link for the Lambda you built earlier. 192 | 10. At the top of the screen, you will see the ARN for your Lambda. 193 | 11. Copy the ARN to the clipboard. 194 | 12. Keep this window or tab open, you will need it in a moment. 195 | 196 | 197 | ![Lambda Function ARN](images/LambdaFunctionArn.png) 198 | 199 | 13. Go back to your Segment workspace window. 200 | 14. Click on Connection Settings > Lambda. 201 | 15. Paste the ARN into the text box. 202 | 16. Click the Save button. 203 | 204 | 205 | ![](https://paper-attachments.dropbox.com/s_E5B8F73CCFD247F55CDECE88873E27F307EC497126FFEDFF0F4E68F01755C7F7_1560739912402_image.png) 206 | 207 | Segment will need execute permission to call your Lambda function from Segment's AWS account. An execution role has already been set up for you for the workshop. 208 | 209 | 17. Open the AWS management console in another tab or window. 210 | 18. Go to Services > IAM. 211 | 19. Click Roles. 212 | 20. In the Search box, type "SegmentExecutePersonalizeLambda". 213 | 214 | ![IAM Find Lambda Execute Role](images/IAM_FindExecuteRole.png) 215 | 216 | 21. Click on the role that appears. 217 | 22. At the top of the screen will be the role ARN for the role. 218 | 23. Copy the ARN to the clipboard. 219 | 220 | ![IAM Find Lambda Execute Role](images/IAM_ExecuteRoleARN.png) 221 | 222 | 24. Go back to your Segment workspace window. 223 | 25. Click Connection Settings > Role Address. 224 | 26. Paste the ARN into the text box. 225 | 27. Click the Save button. 226 | 227 | 228 | ![](https://paper-attachments.dropbox.com/s_C2B02AED879A518AEFAF0FFED12CDDE467AF9DAEA3DC2098084E706023E68F50_1558233476377_image.png) 229 | 230 | 231 | You will also need to set the External ID that Segment will pass to IAM when invoking your Lambda. This ID acts like a shared secret. One has already been configured for you in the execution role. 232 | 233 | 234 | 28. Click on Other Settings > External ID. 235 | 29. Type in `123456789` into the text box. 236 | 30. Click the Save button. 237 | 238 | 239 | ![](https://paper-attachments.dropbox.com/s_C2B02AED879A518AEFAF0FFED12CDDE467AF9DAEA3DC2098084E706023E68F50_1558233715513_image.png) 240 | 241 | 242 | Segment’s destination tries to find your Lambda in the us-west-2 AWS region by default. Because your Lambda was configured in us-east-1, you will need to change this setting. 243 | 244 | 245 | 31. Click on Other Settings > Region. 246 | 32. Type in `us-east-1` in the text box. 247 | 33. Click the Save button. 248 | 249 | 250 | ![](https://paper-attachments.dropbox.com/s_C2B02AED879A518AEFAF0FFED12CDDE467AF9DAEA3DC2098084E706023E68F50_1558284031834_image.png) 251 | 252 | 253 | Because you will be running in debug mode in a few steps, we will enable debug logging in the Lambda here. 254 | 255 | 256 | 34. Click on Other Settings > Log Type. 257 | 35. Select “Tail”. 258 | 36. Click the “Save” button. 259 | 260 | 261 | ![](https://paper-attachments.dropbox.com/s_C2B02AED879A518AEFAF0FFED12CDDE467AF9DAEA3DC2098084E706023E68F50_1558288338858_image.png) 262 | 263 | 264 | 265 | 37. Finally, you can turn on the Personalize destination by clicking the slider toggle at the top of the screen. 266 | 267 | 268 | ![](https://paper-attachments.dropbox.com/s_C2B02AED879A518AEFAF0FFED12CDDE467AF9DAEA3DC2098084E706023E68F50_1558233807669_image.png) 269 | 270 | 271 | Your destination is now ready to process events. In the next section, you will send a batch of events through your Lambda to show how you can process real-time events. First, let’s make sure that your Lambda is ready to process events, by sending a test event via the Segment Event Tester. 272 | 273 | 274 | 38. Click “Event Tester” in the left hand pane of your Amazon Personalize Settings screen. 275 | 276 | 277 | ![](https://paper-attachments.dropbox.com/s_C2B02AED879A518AEFAF0FFED12CDDE467AF9DAEA3DC2098084E706023E68F50_1558282533709_image.png) 278 | 279 | 280 | 281 | 39. Paste the following JSON into the Event Tester. 282 | 40. Make sure the JSON tab is selected at the top, you do not need to select an event type. 283 | 284 | ```javascript 285 | { 286 | "messageId": "test-message-33dlvn", 287 | "timestamp": "2019-02-25T15:55:05.905Z", 288 | "type": "track", 289 | "email": "test@example.org", 290 | "properties": { 291 | "sku": "ocean-blue-shirt" 292 | }, 293 | "userId": "2941404340", 294 | "anonymousId": "2941404340-anon", 295 | "event": "Product Clicked" 296 | } 297 | ``` 298 | 299 | If all goes well, you will see a screen that look like this: 300 | 301 | 302 | ![](https://paper-attachments.dropbox.com/s_C2B02AED879A518AEFAF0FFED12CDDE467AF9DAEA3DC2098084E706023E68F50_1558285258920_image.png) 303 | 304 | In some cases, you may get timeout errors from this screen. This is generally caused by the Lambda being deployed slowly (i.e. cold start). Re-try sending the event. 305 | 306 | This is also where you may see a permissions error - this most likely means that you entered an incorrect external id, an incorrect IAM role ID, or your Lambda function is not named `SegmentPersonalizeDestinationHandler`. Check those, and try sending a test event again. 307 | 308 | Also note that a "200 Success" does not necessarily mean that the event was processed successfully. This is because the Lambda function catches exceptions and logs them to CloudWatch but returns success to Segment. The reason for this is to prevent Segment from continuing to retry sending events that will likely never succeed due to a configuration issue with the function. In production code, you will want to implement more granular error handling. 309 | 310 | To make sure that your events are truly being processed successfully, review the CloudWatch logs for your function in your AWS account. 311 | 312 | 41. In your Segment workspace, click on Sources > personas-event-source. 313 | 42. Select the Debugger tab. 314 | 43. Your Lambda should have sent an event with product recommendations into your source: 315 | 316 | 317 | ![](https://paper-attachments.dropbox.com/s_C2B02AED879A518AEFAF0FFED12CDDE467AF9DAEA3DC2098084E706023E68F50_1558305444088_image.png) 318 | 319 | 320 | 321 | ## **Part 3 - Activating Recommendations using Customer Profile Traits** 322 | 323 | For this final step, you will test your new recommendations endpoint and its synchronization with your user profiles inside of Personas. 324 | 325 | In Exercise 1, you ran a Python script to populate data into your Segment instance. You are going to run it again in this section, so that you can see how the whole event pipeline works. 326 | 327 | 328 | 1. Open your terminal window, or go to your Cloud9 terminal in your browser. 329 | 2. `cd` to the workshop repo directory `segment-personalize-workshop/data` 330 | 3. Run the Python Script in your terminal: 331 | 332 | ``` 333 | python segment-event-generator.py 2019-05-19 334 | ``` 335 | 336 | 4. Go to your Segment workspace. 337 | 5. Click on the `website-prod` source. 338 | 6. Select the Debugger tab at the top of the screen. 339 | 340 | 341 | ![](https://paper-attachments.dropbox.com/s_C2B02AED879A518AEFAF0FFED12CDDE467AF9DAEA3DC2098084E706023E68F50_1558283434807_image.png) 342 | 343 | 344 | As events are sent to the source by the Python script, you will see them appear in real-time in the Debugger view. Segment is now forwarding events to the Personalize source you just built. 345 | 346 | As events start to flow through Segment, you should start to see a list of recommended products from your Personalize solution appearing as traits on user Profiles in Personas. 347 | 348 | 349 | 7. To see user traits, go to your Segment workspace 350 | 8. Click the Personas Orb > Explorer tab 351 | 9. Click on a user profile as shown below 352 | 10. Under the user’s Custom traits tab you will see a `recommended_products` trait, which is kept updated by your Lambda! 353 | 354 | 355 | ![](https://camo.githubusercontent.com/92e1822a74c7bd3469ee02dd501a801f3f54fbe9/68747470733a2f2f64326d787565667165616137736a2e636c6f756466726f6e742e6e65742f735f313241353841393234303543353645384445393638453644453633423342464544314242383232454233343441354133333838303143384237313742434445425f313535313139343639313333335f696d6167652e706e67) 356 | 357 | 358 | By enabling additional destinations in Segment Personas, you can now pass these traits along with the user’s profile to an email tool, data warehouses, downstream event destinations, or to paid campaign tools like Facebook Custom Audiences. 359 | -------------------------------------------------------------------------------- /exercise4/app.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | 4 | import json 5 | import boto3 6 | import os 7 | import requests # Needed for Segment Events REST APIs 8 | import dateutil.parser as dp 9 | import logging 10 | 11 | from botocore.exceptions import ClientError 12 | 13 | logger = logging.getLogger() 14 | logger.setLevel(logging.INFO) 15 | 16 | connections_endpoint_url = "https://api.segment.io/v1" 17 | connections_source_api_key = os.environ['connections_source_write_key'] 18 | 19 | def api_post(url, key, payload): 20 | myResponse = requests.post(url,auth=(key, ''), json=payload) 21 | if(myResponse.ok): 22 | jData = json.loads(myResponse.content) 23 | return jData 24 | else: 25 | myResponse.raise_for_status() 26 | 27 | def set_user_traits(user_id, traits): 28 | # Sends an identify call to Personas to update a user's traits 29 | formatted_url = "{:s}/identify".format(connections_endpoint_url) 30 | message = { "traits": traits, "userId": user_id, "type": "identify" } 31 | try: 32 | response = api_post(formatted_url, connections_source_api_key, message) 33 | except HTTPError as error: 34 | status = error.response.status_code 35 | if status >= 400 and status < 500: 36 | logger.error('Segment: 400 error, more than likely you sent an invalid request.') 37 | elif status >= 500: 38 | logger.error('Segment: There was a server error on the Segment side.') 39 | 40 | def lambda_handler(event, context): 41 | if not 'personalize_tracking_id' in os.environ: 42 | raise Exception('personalize_tracking_id not configured as environment variable') 43 | if not 'personalize_campaign_arn' in os.environ: 44 | raise Exception('personalize_campaign_arn not configured as environment variable') 45 | 46 | logger.info("Segment Event: " + json.dumps(event)) 47 | 48 | # Allow Personalize region to be overriden via environment variable. Optional. 49 | runtime_params = { 'service_name': 'personalize-runtime' } 50 | if 'region_name' in os.environ: 51 | runtime_params['region_name'] = os.environ['region_name'] 52 | 53 | personalize_runtime = boto3.client(**runtime_params) 54 | personalize_events = boto3.client(service_name='personalize-events') 55 | 56 | # Segment will invoke your function once per event type you have configured 57 | # in the Personalize destination in Segment. 58 | 59 | try: 60 | if ('anonymousId' in event and 61 | 'properties' in event and 62 | 'sku' in event['properties']): 63 | 64 | logger.info("Calling Personalize.PutEvents()") 65 | 66 | # Function parameters for put_events call. 67 | params = { 68 | 'trackingId': os.environ['personalize_tracking_id'], 69 | 'sessionId': event['anonymousId'] 70 | } 71 | 72 | # If a user is signed in, we'll get a userId. Otherwise for anonymous 73 | # sessions, we will not have a userId. We still want to call put_events 74 | # in both cases. Once the user identifies themsevles for the session, 75 | # subsequent events will have the userId for the same session and 76 | # Personalize will be able to connect prior anonymous to that user. 77 | if event.get('userId'): 78 | params['userId'] = event['userId'] 79 | 80 | # You will want to modify this part to match the event props 81 | # that come from your events - Personalize needs the event identifier 82 | # that was used to train the model. In this case, we're using the 83 | # product's SKU passed through Segment to represent the eventId. 84 | properties = { 'itemId': event['properties']['sku'] } 85 | 86 | # Build the event that we're sending to Personalize. 87 | personalize_event = { 88 | 'eventId': event['messageId'], 89 | 'sentAt': int(dp.parse(event['timestamp']).strftime('%s')), 90 | 'eventType': event['event'], 91 | 'properties': json.dumps(properties) 92 | } 93 | 94 | params['eventList'] = [ personalize_event ] 95 | 96 | logger.debug('put_events parameters: {}'.format(json.dumps(params, indent = 2))) 97 | # Call put_events 98 | response = personalize_events.put_events(**params) 99 | 100 | if event.get('userId'): 101 | logger.info("Updating recommendations on user profile in Segment Personas") 102 | 103 | # Get recommendations for the user. 104 | params = { 'campaignArn': os.environ['personalize_campaign_arn'], 'userId': event.get('userId') } 105 | 106 | response = personalize_runtime.get_recommendations(**params) 107 | 108 | recommended_items = [d['itemId'] for d in response['itemList'] if 'itemId' in d] 109 | 110 | logger.info(recommended_items) 111 | 112 | # Set the updated recommendations on the user's profile - note that 113 | # this user trait can be anything you want 114 | set_user_traits(event.get('userId'), { 'recommended_products' : recommended_items }) 115 | else: 116 | logger.info('Event from Segment is for anonymous user so skipping setting recommendations on profile') 117 | 118 | else: 119 | logger.warn("Segment event does not contain required fields (anonymousId and sku)") 120 | except ValueError as ve: 121 | logger.error("Invalid JSON format received, check your event sources.") 122 | except KeyError as ke: 123 | logger.error("Invalid configuration for Personalize, most likely.") 124 | except ClientError as ce: 125 | logger.error("ClientError - most likely a boto3 issue.") 126 | logger.error(ce.response['Error']['Code']) 127 | logger.error(ce.response['Error']['Message']) 128 | -------------------------------------------------------------------------------- /exercise4/function.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise4/function.zip -------------------------------------------------------------------------------- /exercise4/images/APIGW_Test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise4/images/APIGW_Test.png -------------------------------------------------------------------------------- /exercise4/images/APIGW_TestGet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise4/images/APIGW_TestGet.png -------------------------------------------------------------------------------- /exercise4/images/APIGW_TestGetResults.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise4/images/APIGW_TestGetResults.png -------------------------------------------------------------------------------- /exercise4/images/APIGW_endpoint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise4/images/APIGW_endpoint.png -------------------------------------------------------------------------------- /exercise4/images/Architecture-Exercise4-Part1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise4/images/Architecture-Exercise4-Part1.png -------------------------------------------------------------------------------- /exercise4/images/Architecture-Exercise4-Part2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise4/images/Architecture-Exercise4-Part2.png -------------------------------------------------------------------------------- /exercise4/images/Architecture-Exercise4-Part3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise4/images/Architecture-Exercise4-Part3.png -------------------------------------------------------------------------------- /exercise4/images/IAM_ExecuteRoleARN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise4/images/IAM_ExecuteRoleARN.png -------------------------------------------------------------------------------- /exercise4/images/IAM_FindExecuteRole.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise4/images/IAM_FindExecuteRole.png -------------------------------------------------------------------------------- /exercise4/images/LambdaConfigFunction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise4/images/LambdaConfigFunction.png -------------------------------------------------------------------------------- /exercise4/images/LambdaCreateFunction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise4/images/LambdaCreateFunction.png -------------------------------------------------------------------------------- /exercise4/images/LambdaFunctionArn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise4/images/LambdaFunctionArn.png -------------------------------------------------------------------------------- /exercise4/images/LambdaRecAPIGW_Config.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise4/images/LambdaRecAPIGW_Config.png -------------------------------------------------------------------------------- /exercise4/images/LambdaRecAPIGW_Trigger.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise4/images/LambdaRecAPIGW_Trigger.png -------------------------------------------------------------------------------- /exercise4/images/LambdaRecCampaignArn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise4/images/LambdaRecCampaignArn.png -------------------------------------------------------------------------------- /exercise4/images/LambdaRecEndpointCreate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise4/images/LambdaRecEndpointCreate.png -------------------------------------------------------------------------------- /exercise4/images/LambdaRecFunctionSave.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise4/images/LambdaRecFunctionSave.png -------------------------------------------------------------------------------- /exercise4/images/LambdaRecFunctionSource.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise4/images/LambdaRecFunctionSource.png -------------------------------------------------------------------------------- /exercise4/images/LambdaSelectFunction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise4/images/LambdaSelectFunction.png -------------------------------------------------------------------------------- /exercise4/images/LambdaUploadFunctionZip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise4/images/LambdaUploadFunctionZip.png -------------------------------------------------------------------------------- /exercise4/images/LambdaUploadFunctionZip2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise4/images/LambdaUploadFunctionZip2.png -------------------------------------------------------------------------------- /exercise4/images/PersonalizeCampaignArn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise4/images/PersonalizeCampaignArn.png -------------------------------------------------------------------------------- /exercise4/images/PersonalizeConfigTracker.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise4/images/PersonalizeConfigTracker.png -------------------------------------------------------------------------------- /exercise4/images/PersonalizeCreateTracker.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise4/images/PersonalizeCreateTracker.png -------------------------------------------------------------------------------- /exercise4/images/PersonalizeTrackerId.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/exercise4/images/PersonalizeTrackerId.png -------------------------------------------------------------------------------- /images/PersonalizeDataIngestion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/images/PersonalizeDataIngestion.png -------------------------------------------------------------------------------- /images/PersonalizeDataIngestionWithSegment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/images/PersonalizeDataIngestionWithSegment.png -------------------------------------------------------------------------------- /images/SegmentPersonalizeArchitecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/james-jory/segment-personalize-workshop/07c8a343051a46d6c96f4a59faaad4e9e86853b3/images/SegmentPersonalizeArchitecture.png --------------------------------------------------------------------------------