├── .gitignore ├── CF-template ├── README.md └── cf-template-sfdc-to-s3.json ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── appflow-terraform-samples ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── salesforce-appflow-amazon-s3 │ ├── CONTRIBUTING.md │ ├── LICENSE │ ├── README.md │ ├── architecture-salesforce-appflow-s3.png │ └── terraform │ │ ├── appflow.tf │ │ ├── iam.tf │ │ ├── outputs.tf │ │ ├── providers.tf │ │ ├── s3.tf │ │ └── variables.tf └── salesforce-private-connect-appflow-amazon-s3 │ ├── CONTRIBUTING.md │ ├── LICENSE │ ├── README.md │ ├── architecture-salesforce-pc-appflow-s3.png │ └── terraform │ ├── appflow.tf │ ├── iam.tf │ ├── outputs.tf │ ├── providers.tf │ ├── s3.tf │ └── variables.tf ├── appflow-time-automation ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── appflow-relative-date-architecture.png ├── appflow.yaml └── lambda-trigger.py ├── sagemaker_appflow_demo ├── README.md └── sagemaker_ml │ ├── build_case_classifier_using_blazingtext.ipynb │ ├── customer_support_classification - data preparation.ipynb │ └── dataset │ └── all_tickets.csv ├── sf-appflow-upsert-redshift-lambda ├── AppFlow_extract_from_Salesforce.md ├── Create_Lambda_Layers_Triggers_Destination.md ├── Create_Redshift_Private.md ├── Create_VPC_S3_Glue.md ├── README.md ├── images │ ├── Add_layer.png │ ├── Add_layer_to_lambda.png │ ├── AppFlow_Connection.png │ ├── AppFlow_Create_SF_Cconnection.png │ ├── AppFlow_Dest_Flow_Trigger.png │ ├── AppFlow_Mapping.png │ ├── AppFlow_SF_Allow.png │ ├── AppFlow_SF_Conn_Created.png │ ├── AppFlow_SF_Conn_Details.png │ ├── AppFlow_Source_Details.png │ ├── AppFlow_Specify_Flow_Details.png │ ├── AppFlow_click_CF.png │ ├── AppFlow_flow_create.png │ ├── Glue_Crawler.png │ ├── Glue_S3_Connection.png │ ├── Lambda_Admin_Role.png │ ├── Lambda_Layer.png │ ├── Lambda_Upload_Zip.png │ ├── Lambda_s3_trigger.png │ ├── Lambda_with_layer.png │ ├── Lamnda_crawl_destination.png │ ├── Redshift_Cluster.png │ ├── Redshift_Cluster_DB_NW.png │ ├── Redshift_Cluster_Subnet_Group.png │ ├── Redshift_IAM_Role.png │ ├── Redshift_Secret.png │ ├── Redshift_Security_Group.png │ ├── Run_appflow.png │ ├── SF_AppFlow_Upsert_Redshift.jpg │ ├── appflow_sfdev_data_lake.png │ ├── stage_lambda_destination.png │ └── vpc_public_private_subnet.png ├── lambda │ ├── functions │ │ ├── f_lambda_crawl_sfdev_appflow_account.zip │ │ ├── f_lambda_move_to_stage_account.zip │ │ └── f_lambda_upsert_sfdev_appflow_account.zip │ └── layers │ │ └── f_lambda_call_redshift_data_api.zip └── sqls │ ├── create_schema_ext_sfdev_appflow.sql │ ├── create_stage_target_schema.sql │ └── create_stage_target_table.sql └── slack-appflow-sentiment ├── README.md └── notebooks └── slack-sentiment.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .DS_Store 3 | -------------------------------------------------------------------------------- /CF-template/README.md: -------------------------------------------------------------------------------- 1 | ### CloudFormation Template for Amazon AppFlow 2 | 3 | Amazon AppFlow supports AWS CloudFormation for creating and configuring Amazon AppFlow resources such as Connector profile and Amazon AppFlow Flow along with the rest of your AWS infrastructure—in a secure, efficient, and repeatable way. The Amazon AppFlow APIs and SDK give developers programmatic access to Amazon AppFlow functionality, enabling developers to set up flows between source and destinations supported by Amazon AppFlow, create connector profiles and execute flows programmatically. 4 | 5 | AWS CloudFormation provides a common language for you to model and provision AWS and third party application resources in your cloud environment. AWS CloudFormation allows you to use programming languages or a simple text file to model and provision, in an automated and secure manner, all the resources needed for your applications across all regions and accounts. This gives you a single source of truth for your AWS and third party resources. AWS CloudFormation support for Amazon AppFlow is available in all regions where Amazon AppFlow is available. 6 | 7 | To learn more about how to use AWS CloudFormation to provision and manage Amazon AppFlow resources, visit our **[documentation](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/AWS_AppFlow.html)**. 8 | 9 | **Note:** We have added comments in the template below for the ease of understanding on how CF Template works for AppFlow. You will not be able to run the template code if it has comments on it. To use the code as it is, you can use this **[clean file](https://github.com/aws-samples/amazon-appflow/blob/master/CF-template/cf-template-sfdc-to-s3.json)** which has the code without comments. 10 | 11 | **About CF Template:** This template helps build a flow from Salesforce to S3. 12 | 13 | **Code:** 14 | 15 | ```js 16 | { 17 | "AWSTemplateFormatVersion": "2010-09-09", 18 | "Description": "Sample CloudFormation Template for AppFlow: Sample template shows how to create a flow", 19 | "Metadata": { 20 | "AWS::CloudFormation::Interface": { 21 | "ParameterGroups": [ 22 | { 23 | "Label": { 24 | "default": "Parameters" 25 | }, 26 | "Parameters": [ 27 | "Connection", 28 | "S3Bucket", 29 | "Prefix" 30 | ] 31 | } 32 | ], 33 | "ParameterLabels": { 34 | "Connection": { 35 | "default": "SFDC Connection Name" 36 | }, 37 | "S3Bucket": { 38 | "default": "S3 Bucket Name to write data to" 39 | }, 40 | "Prefix": { 41 | "default": "S3 prefix to be used to write the data - something like SFDCData" 42 | } 43 | } 44 | } 45 | }, 46 | "Parameters": { 47 | "Connection": { 48 | "Type": "String" 49 | }, 50 | "S3Bucket": { 51 | "Type": "String" 52 | }, 53 | "Prefix": { 54 | "Type": "String" 55 | } 56 | }, 57 | "Resources": { 58 | "S3bucketpolicy": { 59 | "Type": "AWS::S3::BucketPolicy", 60 | "Properties": { 61 | "Bucket": { 62 | "Ref": "S3Bucket" 63 | }, 64 | "PolicyDocument": { 65 | "Version": "2008-10-17", 66 | "Statement": [ 67 | { 68 | "Effect": "Allow", 69 | "Principal": { 70 | "Service": "appflow.amazonaws.com" 71 | }, 72 | "Action": [ 73 | "s3:PutObject", 74 | "s3:AbortMultipartUpload", 75 | "s3:ListMultipartUploadParts", 76 | "s3:ListBucketMultipartUploads", 77 | "s3:GetBucketAcl", 78 | "s3:PutObjectAcl" 79 | ], 80 | "Resource": [ 81 | { 82 | "Fn::Join": [ 83 | "", 84 | [ 85 | "arn:aws:s3:::", 86 | { 87 | "Ref": "S3Bucket" 88 | } 89 | ] 90 | ] 91 | }, 92 | { 93 | "Fn::Join": [ 94 | "", 95 | [ 96 | "arn:aws:s3:::", 97 | { 98 | "Ref": "S3Bucket" 99 | }, 100 | "/*" 101 | ] 102 | ] 103 | } 104 | ] 105 | } 106 | ] 107 | } 108 | } 109 | }, 110 | "SFDCFlow": { 111 | "Type": "AWS::AppFlow::Flow", 112 | "Properties": { 113 | "Description": "AppFlow Flow integrating SFDC Account Data into the Data Lake", 114 | // Properties related to Destination connector. 115 | // Note: many AWS connectors like AmazonS3 don't require a connector profile. 116 | // AppFlow has access to the S3 bucket through a BucketResourcePolicy, therefore a connectorprofile isn't needed. 117 | "DestinationFlowConfigList": [ 118 | { 119 | "ConnectorType": "S3", 120 | "DestinationConnectorProperties": { 121 | "S3": { 122 | "BucketName": { 123 | "Ref": "S3Bucket" 124 | }, 125 | "BucketPrefix": { 126 | "Ref": "Prefix" 127 | }, 128 | //the configuration that determine show Amazon AppFlow should format the flow output data when AmazonS3 is used as the destination. 129 | "S3OutputFormatConfig": { 130 | //the aggregation settings that you can use to customize the output format of your flowdata. Allowed values: None|SingleFile 131 | "AggregationConfig": { 132 | "AggregationType": "None" 133 | }, 134 | //indicates the file type that AmazonAppFlow places in the AmazonS3 bucket.Allowed values: CSV|JSON|PARQUET 135 | "FileType": "PARQUET" 136 | } 137 | } 138 | } 139 | } 140 | ], 141 | "FlowName": "SFDCAccount", 142 | // Properties related to Source connector 143 | "SourceFlowConfig": { 144 | // To create a flow,you must first create a connector profile that contains information about connecting to Salesforce. 145 | // ConnectorProfileName is the name for the connector profile created through console or ref to the resource if created through CFN template. 146 | "ConnectorProfileName": { 147 | "Ref": "Connection" 148 | }, 149 | "ConnectorType": "Salesforce", 150 | "SourceConnectorProperties": { 151 | "Salesforce": { 152 | // The flag that enables dynamic fetching of new(recently added) fields in the Salesforce objects while running a flow. 153 | "EnableDynamicFieldUpdate": false, 154 | // Indicates whether AmazonAppFlow includes deleted files in the flow run. 155 | "IncludeDeletedRecords": false, 156 | // The object specified in the flow source (here, Salesforce). 157 | "Object": "Account" 158 | } 159 | } 160 | }, 161 | // "Tasks" describe what to do with the data once it has been retrieved, but before it is sent to the destination. 162 | // Most connectors require a projection task, a projection task describes what fields should be retrieved from the source object. 163 | "Tasks": [ 164 | { 165 | // Specifies the particular task implementation that AmazonAppFlow performs. Allowed values: Arithmetic|Filter|Map|Mask|Merge|Truncate|Validate 166 | // For projection tasks, selected task type has to be filter 167 | "TaskType": "Filter", 168 | "SourceFields": [ 169 | "Id", 170 | "Name", 171 | "Type", 172 | "BillingAddress", 173 | "ShippingAddress", 174 | "Phone", 175 | "Sic", 176 | "Industry", 177 | "AnnualRevenue" 178 | ], 179 | // Define the operation to be performed on the provided source fields.Allowed values can be found at https: //docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-appflow-flow-connectoroperator.html 180 | "ConnectorOperator": { 181 | "Salesforce": "PROJECTION" 182 | } 183 | }, 184 | { 185 | // Most flows also require atleast one mapping task. mapping tasks map a source field to a destination field (here, mapping Id to Id). 186 | // Note: projected fields will only showup in the destination if they have a mapping task. 187 | "TaskType": "Map", 188 | "SourceFields": [ 189 | "Id" 190 | ], 191 | // A map used to store task-related information. More info at https: //docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-appflow-flow-taskpropertiesobject.html 192 | "TaskProperties": [ 193 | { 194 | "Key": "SOURCE_DATA_TYPE", 195 | "Value": "id" 196 | }, 197 | { 198 | "Key": "DESTINATION_DATA_TYPE", 199 | "Value": "id" 200 | } 201 | ], 202 | "DestinationField": "Id", 203 | "ConnectorOperator": { 204 | "Salesforce": "NO_OP" 205 | } 206 | }, 207 | { 208 | "TaskType": "Map", 209 | "SourceFields": [ 210 | "Name" 211 | ], 212 | "TaskProperties": [ 213 | { 214 | "Key": "SOURCE_DATA_TYPE", 215 | "Value": "string" 216 | }, 217 | { 218 | "Key": "DESTINATION_DATA_TYPE", 219 | "Value": "string" 220 | } 221 | ], 222 | "DestinationField": "Name", 223 | "ConnectorOperator": { 224 | "Salesforce": "NO_OP" 225 | } 226 | }, 227 | { 228 | "TaskType": "Map", 229 | "SourceFields": [ 230 | "Type" 231 | ], 232 | "TaskProperties": [ 233 | { 234 | "Key": "SOURCE_DATA_TYPE", 235 | "Value": "picklist" 236 | }, 237 | { 238 | "Key": "DESTINATION_DATA_TYPE", 239 | "Value": "picklist" 240 | } 241 | ], 242 | "DestinationField": "Type", 243 | "ConnectorOperator": { 244 | "Salesforce": "NO_OP" 245 | } 246 | }, 247 | { 248 | "TaskType": "Map", 249 | "SourceFields": [ 250 | "BillingAddress" 251 | ], 252 | "TaskProperties": [ 253 | { 254 | "Key": "SOURCE_DATA_TYPE", 255 | "Value": "address" 256 | }, 257 | { 258 | "Key": "DESTINATION_DATA_TYPE", 259 | "Value": "address" 260 | } 261 | ], 262 | "DestinationField": "BillingAddress", 263 | "ConnectorOperator": { 264 | "Salesforce": "NO_OP" 265 | } 266 | }, 267 | { 268 | "TaskType": "Map", 269 | "SourceFields": [ 270 | "ShippingAddress" 271 | ], 272 | "TaskProperties": [ 273 | { 274 | "Key": "SOURCE_DATA_TYPE", 275 | "Value": "address" 276 | }, 277 | { 278 | "Key": "DESTINATION_DATA_TYPE", 279 | "Value": "address" 280 | } 281 | ], 282 | "DestinationField": "ShippingAddress", 283 | "ConnectorOperator": { 284 | "Salesforce": "NO_OP" 285 | } 286 | }, 287 | { 288 | "TaskType": "Map", 289 | "SourceFields": [ 290 | "Phone" 291 | ], 292 | "TaskProperties": [ 293 | { 294 | "Key": "SOURCE_DATA_TYPE", 295 | "Value": "phone" 296 | }, 297 | { 298 | "Key": "DESTINATION_DATA_TYPE", 299 | "Value": "phone" 300 | } 301 | ], 302 | "DestinationField": "Phone", 303 | "ConnectorOperator": { 304 | "Salesforce": "NO_OP" 305 | } 306 | }, 307 | { 308 | "TaskType": "Map", 309 | "SourceFields": [ 310 | "Sic" 311 | ], 312 | "TaskProperties": [ 313 | { 314 | "Key": "SOURCE_DATA_TYPE", 315 | "Value": "string" 316 | }, 317 | { 318 | "Key": "DESTINATION_DATA_TYPE", 319 | "Value": "string" 320 | } 321 | ], 322 | "DestinationField": "Sic", 323 | "ConnectorOperator": { 324 | "Salesforce": "NO_OP" 325 | } 326 | }, 327 | { 328 | "TaskType": "Map", 329 | "SourceFields": [ 330 | "Industry" 331 | ], 332 | "TaskProperties": [ 333 | { 334 | "Key": "SOURCE_DATA_TYPE", 335 | "Value": "picklist" 336 | }, 337 | { 338 | "Key": "DESTINATION_DATA_TYPE", 339 | "Value": "picklist" 340 | } 341 | ], 342 | "DestinationField": "Industry", 343 | "ConnectorOperator": { 344 | "Salesforce": "NO_OP" 345 | } 346 | }, 347 | { 348 | "TaskType": "Map", 349 | "SourceFields": [ 350 | "AnnualRevenue" 351 | ], 352 | "TaskProperties": [ 353 | { 354 | "Key": "SOURCE_DATA_TYPE", 355 | "Value": "currency" 356 | }, 357 | { 358 | "Key": "DESTINATION_DATA_TYPE", 359 | "Value": "currency" 360 | } 361 | ], 362 | "DestinationField": "AnnualRevenue", 363 | "ConnectorOperator": { 364 | "Salesforce": "NO_OP" 365 | } 366 | }, 367 | { 368 | "TaskType": "Validate", 369 | "SourceFields": [ 370 | "Id" 371 | ], 372 | "TaskProperties": [ 373 | { 374 | "Key": "VALIDATION_ACTION", 375 | "Value": "DropRecord" 376 | } 377 | ], 378 | "ConnectorOperator": { 379 | "Salesforce": "VALIDATE_NON_NULL" 380 | } 381 | }, 382 | { 383 | "taskType": "Mask", 384 | "sourceFields": [ 385 | "Phone" 386 | ], 387 | "TaskProperties": [ 388 | { 389 | "Key": "MASK_LENGTH", 390 | "Value": "5" 391 | }, 392 | { 393 | "Key": "MASK_VALUE", 394 | "Value": "*" 395 | } 396 | ], 397 | "connectorOperator": { 398 | "Salesforce": "MASK_LAST_N" 399 | } 400 | } 401 | ], 402 | "TriggerConfig": { 403 | // Configuration related to trigger type: OnDemand, Scheduled, Event 404 | "TriggerType": "OnDemand" 405 | } 406 | }, 407 | "DependsOn": "S3bucketpolicy" 408 | } 409 | } 410 | } 411 | ``` 412 | -------------------------------------------------------------------------------- /CF-template/cf-template-sfdc-to-s3.json: -------------------------------------------------------------------------------- 1 | { 2 | "AWSTemplateFormatVersion": "2010-09-09", 3 | "Description": "Sample CloudFormation Template for AppFlow SFDC integration", 4 | "Metadata": { 5 | "AWS::CloudFormation::Interface": { 6 | "ParameterGroups": [ 7 | { 8 | "Label": { 9 | "default": "Parameters" 10 | }, 11 | "Parameters": [ 12 | "Connection", 13 | "S3Bucket", 14 | "Prefix" 15 | ] 16 | } 17 | ], 18 | "ParameterLabels": { 19 | "Connection": { 20 | "default": "SFDC Connection Name" 21 | }, 22 | "S3Bucket": { 23 | "default": "S3 Bucket Name to write data to" 24 | }, 25 | "Prefix": { 26 | "default": "S3 prefix to be used to write the data - something like SFDCData" 27 | } 28 | } 29 | } 30 | }, 31 | "Parameters": { 32 | "Connection": { 33 | "Type": "String" 34 | }, 35 | "S3Bucket": { 36 | "Type": "String" 37 | }, 38 | "Prefix": { 39 | "Type": "String" 40 | } 41 | }, 42 | "Resources": { 43 | "S3bucketpolicy": { 44 | "Type": "AWS::S3::BucketPolicy", 45 | "Properties": { 46 | "Bucket": { 47 | "Ref": "S3Bucket" 48 | }, 49 | "PolicyDocument": { 50 | "Version": "2008-10-17", 51 | "Statement": [ 52 | { 53 | "Effect": "Allow", 54 | "Principal": { 55 | "Service": "appflow.amazonaws.com" 56 | }, 57 | "Action": [ 58 | "s3:PutObject", 59 | "s3:AbortMultipartUpload", 60 | "s3:ListMultipartUploadParts", 61 | "s3:ListBucketMultipartUploads", 62 | "s3:GetBucketAcl", 63 | "s3:PutObjectAcl" 64 | ], 65 | "Resource": [ 66 | { 67 | "Fn::Join": [ 68 | "", 69 | [ 70 | "arn:aws:s3:::", 71 | { 72 | "Ref": "S3Bucket" 73 | } 74 | ] 75 | ] 76 | }, 77 | { 78 | "Fn::Join": [ 79 | "", 80 | [ 81 | "arn:aws:s3:::", 82 | { 83 | "Ref": "S3Bucket" 84 | }, 85 | "/*" 86 | ] 87 | ] 88 | } 89 | ] 90 | } 91 | ] 92 | } 93 | } 94 | }, 95 | "SFDCFlow": { 96 | "Type": "AWS::AppFlow::Flow", 97 | "Properties": { 98 | "Description": "AppFlow Flow integrating SFDC Account Data into the Data Lake", 99 | "DestinationFlowConfigList": [ 100 | { 101 | "ConnectorType": "S3", 102 | "DestinationConnectorProperties": { 103 | "S3": { 104 | "BucketName": { 105 | "Ref": "S3Bucket" 106 | }, 107 | "BucketPrefix": { 108 | "Ref": "Prefix" 109 | }, 110 | "S3OutputFormatConfig": { 111 | "AggregationConfig": { 112 | "AggregationType": "None" 113 | }, 114 | "FileType": "PARQUET" 115 | } 116 | } 117 | } 118 | } 119 | ], 120 | "FlowName": "SFDCAccount", 121 | "SourceFlowConfig": { 122 | "ConnectorProfileName": { 123 | "Ref": "Connection" 124 | }, 125 | "ConnectorType": "Salesforce", 126 | "SourceConnectorProperties": { 127 | "Salesforce": { 128 | "EnableDynamicFieldUpdate": false, 129 | "IncludeDeletedRecords": false, 130 | "Object": "Account" 131 | } 132 | } 133 | }, 134 | "Tasks": [ 135 | { 136 | "TaskType": "Filter", 137 | "SourceFields": [ 138 | "Id", 139 | "Name", 140 | "Type", 141 | "BillingAddress", 142 | "ShippingAddress", 143 | "Phone", 144 | "Sic", 145 | "Industry", 146 | "AnnualRevenue" 147 | ], 148 | "ConnectorOperator": { 149 | "Salesforce": "PROJECTION" 150 | } 151 | }, 152 | { 153 | "TaskType": "Map", 154 | "SourceFields": [ 155 | "Id" 156 | ], 157 | "TaskProperties": [ 158 | { 159 | "Key": "SOURCE_DATA_TYPE", 160 | "Value": "id" 161 | }, 162 | { 163 | "Key": "DESTINATION_DATA_TYPE", 164 | "Value": "id" 165 | } 166 | ], 167 | "DestinationField": "Id", 168 | "ConnectorOperator": { 169 | "Salesforce": "NO_OP" 170 | } 171 | }, 172 | { 173 | "TaskType": "Map", 174 | "SourceFields": [ 175 | "Name" 176 | ], 177 | "TaskProperties": [ 178 | { 179 | "Key": "SOURCE_DATA_TYPE", 180 | "Value": "string" 181 | }, 182 | { 183 | "Key": "DESTINATION_DATA_TYPE", 184 | "Value": "string" 185 | } 186 | ], 187 | "DestinationField": "Name", 188 | "ConnectorOperator": { 189 | "Salesforce": "NO_OP" 190 | } 191 | }, 192 | { 193 | "TaskType": "Map", 194 | "SourceFields": [ 195 | "Type" 196 | ], 197 | "TaskProperties": [ 198 | { 199 | "Key": "SOURCE_DATA_TYPE", 200 | "Value": "picklist" 201 | }, 202 | { 203 | "Key": "DESTINATION_DATA_TYPE", 204 | "Value": "picklist" 205 | } 206 | ], 207 | "DestinationField": "Type", 208 | "ConnectorOperator": { 209 | "Salesforce": "NO_OP" 210 | } 211 | }, 212 | { 213 | "TaskType": "Map", 214 | "SourceFields": [ 215 | "BillingAddress" 216 | ], 217 | "TaskProperties": [ 218 | { 219 | "Key": "SOURCE_DATA_TYPE", 220 | "Value": "address" 221 | }, 222 | { 223 | "Key": "DESTINATION_DATA_TYPE", 224 | "Value": "address" 225 | } 226 | ], 227 | "DestinationField": "BillingAddress", 228 | "ConnectorOperator": { 229 | "Salesforce": "NO_OP" 230 | } 231 | }, 232 | { 233 | "TaskType": "Map", 234 | "SourceFields": [ 235 | "ShippingAddress" 236 | ], 237 | "TaskProperties": [ 238 | { 239 | "Key": "SOURCE_DATA_TYPE", 240 | "Value": "address" 241 | }, 242 | { 243 | "Key": "DESTINATION_DATA_TYPE", 244 | "Value": "address" 245 | } 246 | ], 247 | "DestinationField": "ShippingAddress", 248 | "ConnectorOperator": { 249 | "Salesforce": "NO_OP" 250 | } 251 | }, 252 | { 253 | "TaskType": "Map", 254 | "SourceFields": [ 255 | "Phone" 256 | ], 257 | "TaskProperties": [ 258 | { 259 | "Key": "SOURCE_DATA_TYPE", 260 | "Value": "phone" 261 | }, 262 | { 263 | "Key": "DESTINATION_DATA_TYPE", 264 | "Value": "phone" 265 | } 266 | ], 267 | "DestinationField": "Phone", 268 | "ConnectorOperator": { 269 | "Salesforce": "NO_OP" 270 | } 271 | }, 272 | { 273 | "TaskType": "Map", 274 | "SourceFields": [ 275 | "Sic" 276 | ], 277 | "TaskProperties": [ 278 | { 279 | "Key": "SOURCE_DATA_TYPE", 280 | "Value": "string" 281 | }, 282 | { 283 | "Key": "DESTINATION_DATA_TYPE", 284 | "Value": "string" 285 | } 286 | ], 287 | "DestinationField": "Sic", 288 | "ConnectorOperator": { 289 | "Salesforce": "NO_OP" 290 | } 291 | }, 292 | { 293 | "TaskType": "Map", 294 | "SourceFields": [ 295 | "Industry" 296 | ], 297 | "TaskProperties": [ 298 | { 299 | "Key": "SOURCE_DATA_TYPE", 300 | "Value": "picklist" 301 | }, 302 | { 303 | "Key": "DESTINATION_DATA_TYPE", 304 | "Value": "picklist" 305 | } 306 | ], 307 | "DestinationField": "Industry", 308 | "ConnectorOperator": { 309 | "Salesforce": "NO_OP" 310 | } 311 | }, 312 | { 313 | "TaskType": "Map", 314 | "SourceFields": [ 315 | "AnnualRevenue" 316 | ], 317 | "TaskProperties": [ 318 | { 319 | "Key": "SOURCE_DATA_TYPE", 320 | "Value": "currency" 321 | }, 322 | { 323 | "Key": "DESTINATION_DATA_TYPE", 324 | "Value": "currency" 325 | } 326 | ], 327 | "DestinationField": "AnnualRevenue", 328 | "ConnectorOperator": { 329 | "Salesforce": "NO_OP" 330 | } 331 | }, 332 | { 333 | "TaskType": "Validate", 334 | "SourceFields": [ 335 | "Id" 336 | ], 337 | "TaskProperties": [ 338 | { 339 | "Key": "VALIDATION_ACTION", 340 | "Value": "DropRecord" 341 | } 342 | ], 343 | "ConnectorOperator": { 344 | "Salesforce": "VALIDATE_NON_NULL" 345 | } 346 | }, 347 | { 348 | "taskType": "Mask", 349 | "sourceFields": [ 350 | "Phone" 351 | ], 352 | "TaskProperties": [ 353 | { 354 | "Key": "MASK_LENGTH", 355 | "Value": "5" 356 | }, 357 | { 358 | "Key": "MASK_VALUE", 359 | "Value": "*" 360 | } 361 | ], 362 | "connectorOperator": { 363 | "Salesforce": "MASK_LAST_N" 364 | } 365 | } 366 | ], 367 | "TriggerConfig": { 368 | "TriggerType": "OnDemand" 369 | } 370 | }, 371 | "DependsOn": "S3bucketpolicy" 372 | } 373 | } 374 | } 375 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *master* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | 61 | We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes. 62 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 15 | 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Amazon AppFlow Samples, Blogs, and Demos 2 | 3 | This repository contains example code snippets, blogs and demos for showing how Amazon AppFlow can be used to securely transfer data between SaaS applications (like Salesforce, Marketo, Slack, etc.) and AWS services (like Amazon S3 and Amazon Redshift). 4 | 5 | To learn more about Amazon AppFlow visit: https://aws.amazon.com/appflow/ 6 | 7 | ## Samples/ Blogs 8 | 9 | | Topic | Description | 10 | | ----------------------------------------------------------- | ---------------------------------------------------------- | 11 | | [CloudFormation Template to build a flow from Salesforce to S3](https://github.com/aws-samples/amazon-appflow/tree/master/CF-template) | How to use AWS CloudFormation to setup a flow on AppFlow| 12 | | [AppFlow API Samples](https://docs.aws.amazon.com/appflow/1.0/APIReference/API_Operations.html) | How to use AppFlow APIs to setup flows and connector profiles| 13 | | [Upsert Salesforce data into Redshift (private subnet) with S3 work around](sf-appflow-upsert-redshift-lambda/README.md) | Extract salesforce data using Amazon AppFlow and upsert it to Redshift tables hosted on private subnet via Amazon S3| 14 | | [Setup EventBridge & route Salesforce Events to Lambda](https://aws.amazon.com/blogs/compute/building-salesforce-integrations-with-amazon-eventbridge/) | How to set up the integration, and route Salesforce events to an AWS Lambda function for processing| 15 | | [AppFlow Relative Time Frame Automation Example ](https://github.com/aws-samples/amazon-appflow/tree/master/appflow-time-automation) | How to calculate the time in # days with lambda to create a Flow filtering data from salesforce. | 16 | | [AppFlow Terraform Samples ](https://github.com/aws-samples/amazon-appflow/tree/master/appflow-terraform-samples) | Terraform Samples for automated AppFlow Flows deployment. | 17 | 18 | ## Demos 19 | 20 | | Topic | Description | 21 | | ----------------------------------------------------------- | ---------------------------------------------------------- | 22 | | [How to run sentiment analysis on slack data using AppFlow with Amazon Comprehend](slack-appflow-sentiment/README.md) | Extract conversations data from Slack to S3 using AppFlow and run sentiment analysis on it using Amazon Comprehend and Amazon SageMaker| 23 | 24 | 25 | ## Other Resources 26 | 27 | - [Product Information](https://aws.amazon.com/appflow/) 28 | - [Getting Started Content](https://aws.amazon.com/appflow/getting-started/) 29 | 30 | ## License Summary 31 | 32 | The sample code is made available under the MIT-0 license. See the LICENSE file. 33 | -------------------------------------------------------------------------------- /appflow-terraform-samples/.gitignore: -------------------------------------------------------------------------------- 1 | # Node artifact files 2 | node_modules/ 3 | dist/ 4 | 5 | # Compiled Java class files 6 | *.class 7 | 8 | # Compiled Python bytecode 9 | *.py[cod] 10 | 11 | # Log files 12 | *.log 13 | 14 | # Package files 15 | *.jar 16 | 17 | # Maven 18 | target/ 19 | dist/ 20 | 21 | # JetBrains IDE 22 | .idea/ 23 | 24 | # Unit test reports 25 | TEST*.xml 26 | 27 | # Generated by MacOS 28 | .DS_Store 29 | 30 | # Generated by Windows 31 | Thumbs.db 32 | 33 | # Applications 34 | *.app 35 | *.exe 36 | *.war 37 | 38 | # Large media files 39 | *.mp4 40 | *.tiff 41 | *.avi 42 | *.flv 43 | *.mov 44 | *.wmv 45 | 46 | 47 | # Local .terraform directories 48 | **/.terraform/* 49 | 50 | # .tfstate files 51 | *.tfstate 52 | *.tfstate.* 53 | 54 | # Crash log files 55 | crash.log 56 | crash.*.log 57 | 58 | # Exclude all .tfvars files, which are likely to contain sensitive data, such as 59 | # password, private keys, and other secrets. These should not be part of version 60 | # control as they are data points which are potentially sensitive and subject 61 | # to change depending on the environment. 62 | *.tfvars 63 | *.tfvars.json 64 | 65 | # Ignore override files as they are usually used to override resources locally and so 66 | # are not checked in 67 | override.tf 68 | override.tf.json 69 | *_override.tf 70 | *_override.tf.json 71 | 72 | # Include override files you do wish to add to version control using negated pattern 73 | # !example_override.tf 74 | 75 | # Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan 76 | # example: *tfplan* 77 | 78 | # Ignore CLI configuration files 79 | .terraformrc 80 | terraform.rc 81 | 82 | ### Task List 83 | /terraform/tasks_list.txt 84 | 85 | ### 86 | .terraform.lock.hcl 87 | terraform.tf 88 | -------------------------------------------------------------------------------- /appflow-terraform-samples/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. -------------------------------------------------------------------------------- /appflow-terraform-samples/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /appflow-terraform-samples/README.md: -------------------------------------------------------------------------------- 1 | # HashiCorp Terraform samples for Amazon Appflow 2 | -------------------------------------- 3 | These samples demostrate how you can use HashiCorp Terraform to automate the deployment of your Data Flows between AWS Services and SaaS applications with [Amazon AppFlow](https://aws.amazon.com/appflow/). 4 | 5 | 6 | ## Prerequisites 7 | -------------------------------------- 8 | 9 | The prerequisites depend on your Flow sources and destinations. Please review the [AWS Documentation](https://docs.aws.amazon.com/appflow/latest/userguide/app-specific.html) for Amazon Flow supported sources and destinations. 10 | 11 | Each folder covers a specific use-case, please review the prerequisites in each of them. In general: 12 | 13 | - An AWS Account that will be used to run the [Amazon AppFlows](https://aws.amazon.com/appflow/) Flows 14 | - Access to your AWS Environment and specific resources 15 | - Amazon Appflow connection to your specific Application. 16 | - [Terraform v1.4.5](https://releases.hashicorp.com/terraform/1.4.5/) or later installed 17 | 18 | ## Considerations 19 | -------------------------------------- 20 | 1. Run your Terraform commands as per the best practices and recommendations depending on your use case. 21 | 2. Make sure to store and secure your Terraform State file accordingly. 22 | 3. Ingest Variables using your preferred method 23 | 24 | ## Use-cases 25 | 26 | Current directories in this Sample: 27 | 28 | - salesforce-appflow-amazon-s3: Describes API Connection from Amazon AppFlow to Salesforce, and exports data to Amazon S3. 29 | - salesforce-private-connect-appflow-amazon-s3: Some customers may need to connect privately to Salesforce using [Private Connect feature](https://help.salesforce.com/s/articleView?id=sf.private_connect_overview.htm&type=5) in an AWS Region that is [not supported by Salesforce](https://help.salesforce.com/s/articleView?id=sf.private_connect_considerations.htm&type=5). This sample describes a workaround to this limitation. It establishes Private connection from Amazon AppFlow to Salesforce on a supported AWS Region (Pivot Region) exporting Data on Amazon S3. Then, Data is going to be replicated to an Amazon S3 Bucket in the desired AWS Region. There are multiples ways to replicate Data from one Amazon S3 Bucket to another. For this sample We replicate Data using [S3 Cross-Region replication](https://docs.aws.amazon.com/AmazonS3/latest/userguide/replication.html#crr-scenario) 30 | 31 | ## Security 32 | -------------------------------------- 33 | 34 | See [CONTRIBUTING](CONTRIBUTING.md) for more information. 35 | 36 | ## License 37 | -------------------------------------- 38 | 39 | This library is licensed under the MIT-0 License. See the [LICENSE](LICENSE) file. 40 | 41 | -------------------------------------------------------------------------------- /appflow-terraform-samples/salesforce-appflow-amazon-s3/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. -------------------------------------------------------------------------------- /appflow-terraform-samples/salesforce-appflow-amazon-s3/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /appflow-terraform-samples/salesforce-appflow-amazon-s3/README.md: -------------------------------------------------------------------------------- 1 | ## Use-case 2 | -------------------------------------- 3 | This Sample Describes API Connection from Amazon AppFlow to Salesforce, and exports data to Amazon S3. 4 | 5 | ## Prerequisites 6 | -------------------------------------- 7 | 8 | - An AWS Account that will be used to run the [Amazon AppFlows](https://aws.amazon.com/appflow/) Flows, store the Data in [Amazon S3](https://aws.amazon.com/pm/serv-s3/?gclid=Cj0KCQjwgJyyBhCGARIsAK8LVLMxHJ_mpECi2NPYQ3rX9NZmMo9N5eiwI5iIOWRGtsUAwFreODqHx7saAjaFEALw_wcB&trk=fecf68c9-3874-4ae2-a7ed-72b6d19c8034&sc_channel=ps&ef_id=Cj0KCQjwgJyyBhCGARIsAK8LVLMxHJ_mpECi2NPYQ3rX9NZmMo9N5eiwI5iIOWRGtsUAwFreODqHx7saAjaFEALw_wcB:G:s&s_kwcid=AL!4422!3!536452728638!e!!g!!amazon%20s3!11204620052!112938567994) bucket 9 | - Amazon Appflow connection to [Salesforce] (https://www.salesforce.com/) with Change Data Capture feature enabled and properly configured 10 | - [Terraform v1.4.5](https://releases.hashicorp.com/terraform/1.4.5/) or later installed 11 | 12 | ## Architecture 13 | -------------------------------------- 14 | 15 | ![](./architecture-salesforce-appflow-s3.png) 16 | 17 | ## Terraform Resources 18 | 19 | ## Commands 20 | 21 | Run the the following Terraform Commands to deploy the resources 22 | 23 | ``` 24 | terraform init 25 | terraform plan 26 | terraform apply 27 | ``` 28 | 29 | ## Providers 30 | -------------------------------------- 31 | 32 | | Name | Version | 33 | |------|---------| 34 | | [aws](#provider\_aws) | 5.46.0 | 35 | | [aws.central](#provider\_aws.central) | 5.46.0 | 36 | 37 | ## Resources 38 | -------------------------------------- 39 | 40 | | Name | Type | 41 | |------|------| 42 | | [aws_appflow_flow.test_flow](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/appflow_flow) | resource | 43 | | [aws_s3_bucket.appflow_target_bucket](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket) | resource | 44 | | [aws_s3_bucket_acl.source_bucket_acl](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_acl) | resource | 45 | | [aws_s3_bucket_ownership_controls.s3_bucket_acl_ownership](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_ownership_controls) | resource | 46 | | [aws_s3_bucket_policy.appflow_target_bucket_s3_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_policy) | resource | 47 | | [aws_s3_bucket_versioning.target_bucket_versioning](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_versioning) | resource | 48 | | [aws_iam_policy_document.s3_policy_target_bucket](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | 49 | 50 | ## Inputs 51 | -------------------------------------- 52 | 53 | | Name | Description | Type | Default | Required | 54 | |------|-------------|------|---------|:--------:| 55 | | [customer](#input\_customer) | Customer Target PoC | `string` | n/a | yes | 56 | | [sfdc\_connection\_name](#input\_sfdc\_connection\_name) | AppFlow connector name | `string` | n/a | yes | 57 | 58 | ## Outputs 59 | -------------------------------------- 60 | 61 | | Name | Description | 62 | |------|-------------| 63 | | [flow\_arn](#output\_flow\_arn) | n/a | 64 | 65 | ## Security 66 | -------------------------------------- 67 | 68 | See [CONTRIBUTING](CONTRIBUTING.md) for more information. 69 | 70 | ## License 71 | -------------------------------------- 72 | 73 | This library is licensed under the MIT-0 License. See the [LICENSE](LICENSE) file. 74 | -------------------------------------------------------------------------------- /appflow-terraform-samples/salesforce-appflow-amazon-s3/architecture-salesforce-appflow-s3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/appflow-terraform-samples/salesforce-appflow-amazon-s3/architecture-salesforce-appflow-s3.png -------------------------------------------------------------------------------- /appflow-terraform-samples/salesforce-appflow-amazon-s3/terraform/appflow.tf: -------------------------------------------------------------------------------- 1 | ### AppFlow - Flow - Salesforce to S3 2 | resource "aws_appflow_flow" "test_flow" { 3 | provider = aws.central 4 | name = "${var.customer}_test_flow" 5 | 6 | destination_flow_config { 7 | connector_type = "S3" 8 | destination_connector_properties { 9 | s3 { 10 | bucket_name = aws_s3_bucket.appflow_target_bucket.id 11 | bucket_prefix = "salesforce-data" 12 | s3_output_format_config { 13 | file_type = "JSON" 14 | preserve_source_data_typing = false 15 | 16 | aggregation_config { 17 | aggregation_type = "None" 18 | target_file_size = 0 19 | } 20 | prefix_config { 21 | prefix_type = "PATH" 22 | } 23 | } 24 | } 25 | } 26 | } 27 | 28 | source_flow_config { 29 | connector_type = "Salesforce" 30 | connector_profile_name = var.sfdc_connection_name 31 | source_connector_properties { 32 | 33 | salesforce { 34 | enable_dynamic_field_update = false 35 | include_deleted_records = false 36 | object = "Account" 37 | } 38 | 39 | } 40 | } 41 | 42 | 43 | 44 | task { 45 | destination_field = "AccountNumber" 46 | source_fields = [ 47 | "AccountNumber", 48 | ] 49 | task_properties = { 50 | "DESTINATION_DATA_TYPE" = "string" 51 | "SOURCE_DATA_TYPE" = "string" 52 | } 53 | task_type = "Map" 54 | 55 | connector_operator { 56 | salesforce = "NO_OP" 57 | } 58 | } 59 | 60 | task { 61 | source_fields = [] 62 | task_properties = {} 63 | task_type = "Map_all" 64 | 65 | connector_operator { 66 | salesforce = "NO_OP" 67 | } 68 | } 69 | 70 | 71 | 72 | 73 | trigger_config { 74 | trigger_type = "Event" 75 | } 76 | } 77 | 78 | -------------------------------------------------------------------------------- /appflow-terraform-samples/salesforce-appflow-amazon-s3/terraform/iam.tf: -------------------------------------------------------------------------------- 1 | ## IAM Resources for Appflow 2 | data "aws_iam_policy_document" "s3_policy_target_bucket" { 3 | statement { 4 | sid = "AllowAppFlowDestinationActions" 5 | effect = "Allow" 6 | 7 | principals { 8 | type = "Service" 9 | identifiers = ["appflow.amazonaws.com"] 10 | } 11 | 12 | actions = [ 13 | "s3:PutObject", 14 | "s3:AbortMultipartUpload", 15 | "s3:ListMultipartUploadParts", 16 | "s3:ListBucketMultipartUploads", 17 | "s3:GetBucketAcl", 18 | "s3:PutObjectAcl", 19 | ] 20 | 21 | resources = [ 22 | aws_s3_bucket.appflow_target_bucket.arn, 23 | "${aws_s3_bucket.appflow_target_bucket.arn}/*", 24 | ] 25 | } 26 | } 27 | 28 | -------------------------------------------------------------------------------- /appflow-terraform-samples/salesforce-appflow-amazon-s3/terraform/outputs.tf: -------------------------------------------------------------------------------- 1 | output "flow_arn" { 2 | value = aws_appflow_flow.test_flow.arn 3 | } 4 | -------------------------------------------------------------------------------- /appflow-terraform-samples/salesforce-appflow-amazon-s3/terraform/providers.tf: -------------------------------------------------------------------------------- 1 | provider "aws" { 2 | region = "eu-central-1" 3 | alias = "central" 4 | } -------------------------------------------------------------------------------- /appflow-terraform-samples/salesforce-appflow-amazon-s3/terraform/s3.tf: -------------------------------------------------------------------------------- 1 | ##S3 Bucket configuration 2 | 3 | resource "aws_s3_bucket" "appflow_target_bucket" { 4 | provider = aws.central 5 | bucket = "${var.customer}-target-bucket" 6 | } 7 | 8 | resource "aws_s3_bucket_policy" "appflow_target_bucket_s3_policy" { 9 | provider = aws.central 10 | bucket = aws_s3_bucket.appflow_target_bucket.id 11 | policy = data.aws_iam_policy_document.s3_policy_target_bucket.json 12 | } 13 | 14 | resource "aws_s3_bucket_acl" "source_bucket_acl" { 15 | provider = aws.central 16 | bucket = aws_s3_bucket.appflow_target_bucket.id 17 | acl = "private" 18 | depends_on = [aws_s3_bucket_ownership_controls.s3_bucket_acl_ownership] 19 | } 20 | 21 | resource "aws_s3_bucket_ownership_controls" "s3_bucket_acl_ownership" { 22 | provider = aws.central 23 | bucket = aws_s3_bucket.appflow_target_bucket.id 24 | rule { 25 | object_ownership = "ObjectWriter" 26 | } 27 | } 28 | 29 | resource "aws_s3_bucket_versioning" "target_bucket_versioning" { 30 | provider = aws.central 31 | bucket = aws_s3_bucket.appflow_target_bucket.id 32 | versioning_configuration { 33 | status = "Enabled" 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /appflow-terraform-samples/salesforce-appflow-amazon-s3/terraform/variables.tf: -------------------------------------------------------------------------------- 1 | ## Generic variables 2 | 3 | variable "customer" { 4 | type = string 5 | description = "Customer Target PoC" 6 | } 7 | 8 | ## AppFlow Variables 9 | 10 | variable "sfdc_connection_name" { 11 | type = string 12 | description = "AppFlow connector name " 13 | } -------------------------------------------------------------------------------- /appflow-terraform-samples/salesforce-private-connect-appflow-amazon-s3/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. -------------------------------------------------------------------------------- /appflow-terraform-samples/salesforce-private-connect-appflow-amazon-s3/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /appflow-terraform-samples/salesforce-private-connect-appflow-amazon-s3/README.md: -------------------------------------------------------------------------------- 1 | ## Use-case 2 | -------------------------------------- 3 | Some customers may need to connect privately to Salesforce using [Private Connect feature](https://help.salesforce.com/s/articleView?id=sf.private_connect_overview.htm&type=5) in an AWS Region that is [not supported by Salesforce](https://help.salesforce.com/s/articleView?id=sf.private_connect_considerations.htm&type=5). This sample describes a workaround to this limitation. It establishes Private connection from Amazon AppFlow to Salesforce on a supported AWS Region (Pivot Region) exporting Data on Amazon S3. Then, Data is going to be replicated to an Amazon S3 Bucket in the desired AWS Region. There are multiples ways to replicate Data from one Amazon S3 Bucket to another. For this sample We replicate Data using [S3 Cross-Region replication](https://docs.aws.amazon.com/AmazonS3/latest/userguide/replication.html#crr-scenario) 4 | 5 | ## Prerequisites 6 | -------------------------------------- 7 | 8 | - An AWS Account that will be used to run the [Private Amazon AppFlows flows](https://docs.aws.amazon.com/appflow/latest/userguide/private-flows.html) and store the Data in [Amazon S3 on the pivot region](https://aws.amazon.com/pm/serv-s3/?gclid=Cj0KCQjwgJyyBhCGARIsAK8LVLMxHJ_mpECi2NPYQ3rX9NZmMo9N5eiwI5iIOWRGtsUAwFreODqHx7saAjaFEALw_wcB&trk=fecf68c9-3874-4ae2-a7ed-72b6d19c8034&sc_channel=ps&ef_id=Cj0KCQjwgJyyBhCGARIsAK8LVLMxHJ_mpECi2NPYQ3rX9NZmMo9N5eiwI5iIOWRGtsUAwFreODqHx7saAjaFEALw_wcB:G:s&s_kwcid=AL!4422!3!536452728638!e!!g!!amazon%20s3!11204620052!112938567994) bucket. 9 | - Amazon Appflow connection to [Salesforce] (https://www.salesforce.com/) with Change Data Capture feature enabled and properly configured. 10 | - AWS Regions to be used as Pivot and Centralized 11 | - Additionally, check [Salesforce private connect](https://help.salesforce.com/s/articleView?id=sf.private_connect_overview.htm&type=5) prerequisites. Make sure that Private connect feature is supported and provisioned in your Pivot Region 12 | - [Terraform v1.4.5](https://releases.hashicorp.com/terraform/1.4.5/) or later installed 13 | 14 | ## Architecture 15 | -------------------------------------- 16 | 17 | ![](./architecture-salesforce-pc-appflow-s3.png) 18 | 19 | ## Terraform Resources 20 | 21 | ## Commands 22 | 23 | Run the the following Terraform Commands to deploy the resources 24 | 25 | ``` 26 | terraform init 27 | terraform plan 28 | terraform apply 29 | ``` 30 | 31 | ## Providers 32 | -------------------------------------- 33 | 34 | | Name | Version | 35 | |------|---------| 36 | | [aws](#provider\_aws) | 5.46.0 | 37 | | [aws.central](#provider\_aws.central) | 5.46.0 | 38 | | [aws.pivot](#provider\_aws.pivot) | 5.46.0 | 39 | 40 | ## Resources 41 | -------------------------------------- 42 | 43 | | Name | Type | 44 | |------|------| 45 | | [aws_appflow_flow.test_flow](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/appflow_flow) | resource | 46 | | [aws_iam_policy.iam_policy_replication](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource | 47 | | [aws_iam_role.iam_role_replication](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource | 48 | | [aws_iam_role_policy_attachment.iam_policy_attachment_replication](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | 49 | | [aws_s3_bucket.bucket_centralized](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket) | resource | 50 | | [aws_s3_bucket.bucket_pivot](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket) | resource | 51 | | [aws_s3_bucket_acl.source_bucket_acl](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_acl) | resource | 52 | | [aws_s3_bucket_ownership_controls.s3_bucket_acl_ownership](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_ownership_controls) | resource | 53 | | [aws_s3_bucket_policy.pivot_s3_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_policy) | resource | 54 | | [aws_s3_bucket_replication_configuration.replication](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_replication_configuration) | resource | 55 | | [aws_s3_bucket_versioning.bucket_centralized_versioning](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_versioning) | resource | 56 | | [aws_s3_bucket_versioning.bucket_pivot_versioning](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_versioning) | resource | 57 | | [aws_iam_policy_document.assume_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | 58 | | [aws_iam_policy_document.replication](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | 59 | | [aws_iam_policy_document.s3_policy_pivot_bucket](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | 60 | | [aws_kms_key.s3_key](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/kms_key) | data source | 61 | 62 | ## Inputs 63 | -------------------------------------- 64 | 65 | | Name | Description | Type | Default | Required | 66 | |------|-------------|------|---------|:--------:| 67 | | [customer](#input\_customer) | Customer Target PoC | `string` | n/a | yes | 68 | | [encryption\_key\_central\_region](#input\_encryption\_key\_central\_region) | Encryption key in central region | `string` | n/a | yes | 69 | | [metric\_replication\_minutes](#input\_metric\_replication\_minutes) | The time in minutes after which the replication status is published | `number` | `15` | no | 70 | | [replication\_time\_minutes](#input\_replication\_time\_minutes) | The time in minutes within which Amazon S3 must replicate objects | `number` | `15` | no | 71 | | [sfdc\_connection\_name](#input\_sfdc\_connection\_name) | AppFlow connector name | `string` | n/a | yes | 72 | 73 | ## Outputs 74 | -------------------------------------- 75 | 76 | | Name | Description | 77 | |------|-------------| 78 | | [flow\_arn](#output\_flow\_arn) | n/a | 79 | 80 | ## Security 81 | -------------------------------------- 82 | 83 | See [CONTRIBUTING](CONTRIBUTING.md) for more information. 84 | 85 | ## License 86 | -------------------------------------- 87 | 88 | This library is licensed under the MIT-0 License. See the [LICENSE](LICENSE) file. 89 | -------------------------------------------------------------------------------- /appflow-terraform-samples/salesforce-private-connect-appflow-amazon-s3/architecture-salesforce-pc-appflow-s3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/appflow-terraform-samples/salesforce-private-connect-appflow-amazon-s3/architecture-salesforce-pc-appflow-s3.png -------------------------------------------------------------------------------- /appflow-terraform-samples/salesforce-private-connect-appflow-amazon-s3/terraform/appflow.tf: -------------------------------------------------------------------------------- 1 | ### AppFlow - Flow - Salesforce to S3 2 | resource "aws_appflow_flow" "test_flow" { 3 | provider = aws.pivot 4 | name = "${var.customer}_test_flow" 5 | 6 | destination_flow_config { 7 | connector_type = "S3" 8 | destination_connector_properties { 9 | s3 { 10 | bucket_name = aws_s3_bucket.bucket_pivot.id 11 | bucket_prefix = "salesforce-data" 12 | s3_output_format_config { 13 | file_type = "JSON" 14 | preserve_source_data_typing = false 15 | 16 | aggregation_config { 17 | aggregation_type = "None" 18 | target_file_size = 0 19 | } 20 | prefix_config { 21 | prefix_type = "PATH" 22 | } 23 | } 24 | } 25 | } 26 | } 27 | 28 | source_flow_config { 29 | connector_type = "Salesforce" 30 | connector_profile_name = var.sfdc_connection_name 31 | source_connector_properties { 32 | 33 | salesforce { 34 | enable_dynamic_field_update = false 35 | include_deleted_records = false 36 | object = "Account" 37 | } 38 | 39 | } 40 | } 41 | 42 | 43 | 44 | task { 45 | destination_field = "Id" 46 | source_fields = [ 47 | "Id", 48 | ] 49 | task_properties = { 50 | "DESTINATION_DATA_TYPE" = "string" 51 | "SOURCE_DATA_TYPE" = "string" 52 | } 53 | task_type = "Map" 54 | 55 | connector_operator { 56 | salesforce = "NO_OP" 57 | } 58 | } 59 | 60 | task { 61 | source_fields = [] 62 | task_properties = {} 63 | task_type = "Map_all" 64 | 65 | connector_operator { 66 | salesforce = "NO_OP" 67 | } 68 | } 69 | 70 | trigger_config { 71 | trigger_type = "Event" 72 | } 73 | } 74 | 75 | -------------------------------------------------------------------------------- /appflow-terraform-samples/salesforce-private-connect-appflow-amazon-s3/terraform/iam.tf: -------------------------------------------------------------------------------- 1 | ## IAM Resources for Appflow 2 | data "aws_iam_policy_document" "s3_policy_pivot_bucket" { 3 | statement { 4 | sid = "AllowAppFlowDestinationActions" 5 | effect = "Allow" 6 | 7 | principals { 8 | type = "Service" 9 | identifiers = ["appflow.amazonaws.com"] 10 | } 11 | 12 | actions = [ 13 | "s3:PutObject", 14 | "s3:AbortMultipartUpload", 15 | "s3:ListMultipartUploadParts", 16 | "s3:ListBucketMultipartUploads", 17 | "s3:GetBucketAcl", 18 | "s3:PutObjectAcl", 19 | ] 20 | 21 | resources = [ 22 | "arn:aws:s3:::${var.customer}-b-pivot", 23 | "arn:aws:s3:::${var.customer}-b-pivot/*", 24 | ] 25 | } 26 | } 27 | 28 | 29 | ## IAM Resources for S3 CRR 30 | 31 | data "aws_kms_key" "s3_key" { 32 | provider = aws.central 33 | key_id = var.encryption_key_central_region 34 | } 35 | 36 | data "aws_iam_policy_document" "assume_role" { 37 | statement { 38 | effect = "Allow" 39 | 40 | principals { 41 | type = "Service" 42 | identifiers = ["s3.amazonaws.com", "batchoperations.s3.amazonaws.com"] 43 | } 44 | 45 | actions = ["sts:AssumeRole"] 46 | } 47 | } 48 | 49 | resource "aws_iam_role" "iam_role_replication" { 50 | name = "${var.customer}-iam-role-s3-crr" 51 | assume_role_policy = data.aws_iam_policy_document.assume_role.json 52 | } 53 | 54 | resource "aws_iam_policy" "iam_policy_replication" { 55 | name = "${var.customer}-iam-policy-s3-crr" 56 | policy = data.aws_iam_policy_document.replication.json 57 | } 58 | 59 | resource "aws_iam_role_policy_attachment" "iam_policy_attachment_replication" { 60 | role = aws_iam_role.iam_role_replication.name 61 | policy_arn = aws_iam_policy.iam_policy_replication.arn 62 | } 63 | 64 | data "aws_iam_policy_document" "replication" { 65 | statement { 66 | effect = "Allow" 67 | 68 | actions = [ 69 | "s3:GetReplicationConfiguration", 70 | "s3:ListBucket", 71 | "s3:GetObjectVersionForReplication", 72 | "s3:GetObjectVersionAcl", 73 | "s3:GetObjectVersionTagging", 74 | "s3:GetObjectRetention", 75 | "s3:GetObjectLegalHold", 76 | "s3:PutInventoryConfiguration" 77 | ] 78 | 79 | resources = ["${aws_s3_bucket.bucket_pivot.arn}", "${aws_s3_bucket.bucket_centralized.arn}"] 80 | } 81 | 82 | statement { 83 | effect = "Allow" 84 | 85 | actions = [ 86 | "s3:GetObjectVersionForReplication", 87 | "s3:GetObjectVersionAcl", 88 | "s3:GetObjectVersionTagging", 89 | "s3:GetReplicationConfiguration", 90 | "s3:ListBucket", 91 | "s3:GetObjectRetention", 92 | "s3:GetObjectLegalHold" 93 | ] 94 | 95 | resources = ["${aws_s3_bucket.bucket_pivot.arn}/*", "${aws_s3_bucket.bucket_centralized.arn}/*"] 96 | } 97 | 98 | statement { 99 | effect = "Allow" 100 | 101 | actions = [ 102 | "s3:ReplicateObject", 103 | "s3:ReplicateDelete", 104 | "s3:ReplicateTags", 105 | "s3:ObjectOwnerOverrideToBucketOwner" 106 | ] 107 | 108 | resources = ["${aws_s3_bucket.bucket_pivot.arn}/*", "${aws_s3_bucket.bucket_centralized.arn}/*"] 109 | } 110 | } 111 | 112 | -------------------------------------------------------------------------------- /appflow-terraform-samples/salesforce-private-connect-appflow-amazon-s3/terraform/outputs.tf: -------------------------------------------------------------------------------- 1 | output "flow_arn" { 2 | value = aws_appflow_flow.test_flow.arn 3 | } 4 | -------------------------------------------------------------------------------- /appflow-terraform-samples/salesforce-private-connect-appflow-amazon-s3/terraform/providers.tf: -------------------------------------------------------------------------------- 1 | provider "aws" { 2 | region = "eu-central-1" 3 | alias = "pivot" 4 | } 5 | 6 | provider "aws" { 7 | alias = "central" 8 | region = "eu-west-1" 9 | } -------------------------------------------------------------------------------- /appflow-terraform-samples/salesforce-private-connect-appflow-amazon-s3/terraform/s3.tf: -------------------------------------------------------------------------------- 1 | ## Pivot S3 Bucket configuration 2 | 3 | resource "aws_s3_bucket" "bucket_pivot" { 4 | provider = aws.pivot 5 | bucket = "${var.customer}-b-pivot" 6 | } 7 | 8 | resource "aws_s3_bucket_policy" "pivot_s3_policy" { 9 | provider = aws.pivot 10 | bucket = aws_s3_bucket.bucket_pivot.id 11 | policy = data.aws_iam_policy_document.s3_policy_pivot_bucket.json 12 | } 13 | 14 | resource "aws_s3_bucket_acl" "source_bucket_acl" { 15 | provider = aws.pivot 16 | bucket = aws_s3_bucket.bucket_pivot.id 17 | acl = "private" 18 | depends_on = [aws_s3_bucket_ownership_controls.s3_bucket_acl_ownership] 19 | } 20 | 21 | resource "aws_s3_bucket_ownership_controls" "s3_bucket_acl_ownership" { 22 | provider = aws.pivot 23 | bucket = aws_s3_bucket.bucket_pivot.id 24 | rule { 25 | object_ownership = "ObjectWriter" 26 | } 27 | } 28 | 29 | resource "aws_s3_bucket_versioning" "bucket_pivot_versioning" { 30 | provider = aws.pivot 31 | bucket = aws_s3_bucket.bucket_pivot.id 32 | versioning_configuration { 33 | status = "Enabled" 34 | } 35 | } 36 | 37 | ## Centralized S3 Bucket configuration 38 | resource "aws_s3_bucket" "bucket_centralized" { 39 | provider = aws.central 40 | bucket = "${var.customer}-b-centralized" 41 | } 42 | 43 | resource "aws_s3_bucket_versioning" "bucket_centralized_versioning" { 44 | provider = aws.central 45 | bucket = aws_s3_bucket.bucket_centralized.id 46 | versioning_configuration { 47 | status = "Enabled" 48 | } 49 | } 50 | 51 | ## CRR Configuration 52 | resource "aws_s3_bucket_replication_configuration" "replication" { 53 | provider = aws.pivot 54 | # Must have bucket versioning enabled first 55 | depends_on = [aws_s3_bucket_versioning.bucket_pivot_versioning] 56 | 57 | role = aws_iam_role.iam_role_replication.arn 58 | bucket = aws_s3_bucket.bucket_pivot.id 59 | 60 | rule { 61 | id = "${var.customer}-b-crr-centralized" 62 | filter { 63 | 64 | } 65 | status = "Enabled" 66 | delete_marker_replication { 67 | status = "Disabled" 68 | } 69 | 70 | source_selection_criteria { 71 | replica_modifications { 72 | status = "Enabled" 73 | } 74 | sse_kms_encrypted_objects { 75 | status = "Enabled" 76 | } 77 | } 78 | 79 | 80 | destination { 81 | bucket = aws_s3_bucket.bucket_centralized.arn 82 | replication_time { 83 | status = "Enabled" 84 | time { 85 | minutes = var.replication_time_minutes 86 | } 87 | } 88 | metrics { 89 | status = "Enabled" 90 | event_threshold { 91 | minutes = var.metric_replication_minutes 92 | } 93 | } 94 | encryption_configuration { 95 | replica_kms_key_id = data.aws_kms_key.s3_key.arn 96 | } 97 | } 98 | } 99 | } -------------------------------------------------------------------------------- /appflow-terraform-samples/salesforce-private-connect-appflow-amazon-s3/terraform/variables.tf: -------------------------------------------------------------------------------- 1 | ## Generic variables 2 | 3 | variable "customer" { 4 | type = string 5 | description = "Customer Target PoC" 6 | } 7 | 8 | ## AppFlow Variables 9 | 10 | variable "sfdc_connection_name" { 11 | type = string 12 | description = "AppFlow connector name " 13 | } 14 | 15 | 16 | 17 | ### Replication configuration 18 | 19 | variable "encryption_key_central_region" { 20 | type = string 21 | description = "Encryption key in central region " 22 | } 23 | 24 | variable "replication_time_minutes" { 25 | description = "The time in minutes within which Amazon S3 must replicate objects" 26 | type = number 27 | default = 15 28 | } 29 | 30 | variable "metric_replication_minutes" { 31 | description = "The time in minutes after which the replication status is published" 32 | type = number 33 | default = 15 34 | } -------------------------------------------------------------------------------- /appflow-time-automation/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. -------------------------------------------------------------------------------- /appflow-time-automation/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check [existing open](https://github.com/aws-samples/aws-serverless-airline-booking/issues), or [recently closed](https://github.com/aws-samples/aws-serverless-airline-booking/issues?utf8=%E2%9C%93&q=is%3Aissue%20is%3Aclosed%20), issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *master* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/aws-samples/aws-serverless-airline-booking/labels/help%20wanted) issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](https://github.com/aws-samples/aws-serverless-airline-booking/blob/master/LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | 61 | We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes. -------------------------------------------------------------------------------- /appflow-time-automation/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | SPDX-License-Identifier: MIT-0 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this 5 | software and associated documentation files (the "Software"), to deal in the Software 6 | without restriction, including without limitation the rights to use, copy, modify, 7 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 11 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 12 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 13 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 14 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 15 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 16 | -------------------------------------------------------------------------------- /appflow-time-automation/README.md: -------------------------------------------------------------------------------- 1 | # Amazon AppFlow Relative Time Frame Automation Example 2 | 3 | This repository contains an example of project for the [Amazon AppFlow Service](https://github.com/aws-samples/amazon-appflow). 4 | 5 | ## Table of Contents 6 | 1. [About this Project](#About) 7 | 2. [Arquitecture](#Architecture) 8 | 3. [Pre Requirements](#Prereq) 9 | 4. [How to use this](#Howto) 10 | 5. [License](#License) 11 | 12 | ## About this project 13 | 14 | To run an AWS Cloudformation template to create an Amazon AppFlow flow with and specific time field for time filtering is necessary pass an start date and an end date in EPOCH UNIX format, so this project bring an example of how to do this calculation in a dynamic way with a Python (AWS Lambda) code example to calculate a range of date in a relative way (# days) and pass this to a cloudformation to create a full charge flow in Amazon Appflow with the calculated time frame in epoch format to get data from salesforce and put it into Amazon S3. 15 | 16 | Relative Time Frame means express a range of time in days, weeks, months instead of use an start date and end date. 17 | 18 | # Architecture 19 | 20 | Simple Player Demo 21 | 22 | # Pre Requirements 23 | 24 | Those are the following pre requisits you have made to use this example. 25 | 26 | 1. Create two AWS S3 buckets, one to store the cloudformation template and other as a raw layer of your datalake. 27 | 2. Create a Salesforce Amazon AppFlow connection. 28 | 3. Add a bucket policy in the bucket configuration to allow the write for Amazon AppFlow Service. 29 | 4. You must edit the AWS Cloudformation template to modify the table and the quantity of fields that you want to retrieve from Salesforce. by the default, the AWS Cloudformation template sample retrieve the "OrderItem" table and the following fields: 30 | - Id 31 | - Product2Id 32 | - IsDeleted 33 | - OrderId 34 | - PricebookEntryId 35 | - OriginalOrderItemId 36 | - AvailableQuantity 37 | - Quantity 38 | - UnitPrice 39 | - TotalPrice 40 | - ListPrice 41 | - ServiceDate 42 | - EndDate 43 | - Description 44 | - LastModifiedDate 45 | - CreatedById 46 | - CreatedDate 47 | 48 | 49 | 50 | # How to Use It 51 | 52 | 1. Step One: Upload the AWS Cloudformation template appflow.yml to an S3 bucket. 53 | 54 | 2. Step Two: Create a lambda IAM Role with the Amazon S3 Read Only Access and AWS Cloudformation Full Access permissions to allow the Lambda function to read the AWS Cloudformation template located in Amazon S3 and launch the AWS Cloudformation Stack. 55 | 56 | 3. Step Three: Create a AWS Lambda function with the following configuration: 57 | - Runtime: Python 3.6 58 | - Handler: index.handler 59 | - Memory: 128 60 | - Timeout: 1 min 61 | 62 | 4. Step Four: You also must configure four environment variables in the AWS Lambda function: 63 | 64 | - connName : The value is the Amazon AppFlow Salesforce connection name. (Example: aws95-dev-ed) 65 | - templateUrl : The Value is the AWS Cloudformation template for Amazon S3 bucket URL. (Example: https://s3-external-1.amazonaws.com/cf-templates-105b6q2gor9b3-us-east-1/2021052SSZ-new.template22efnvt9sqah) 66 | - timeField : The field in Salesforce schema to filter the data. (Example: CreatedDate) 67 | - bucketName : Bucket Name to write the data by Amazon Appflow ingestion. (Example: my-bucket-name) 68 | - numPastDays: Are the quantity of days you want retrieve the full data. If you put three meas that the AWS Lambda function will calculate the start date the 00:00 hours of the 3th day in the past the script includes the data of the day when you are launching the process. (Example: 3) 69 | 70 | 5. Step Five: Schedule using Amazon Cloudwatch events or run manually the Flow to have the data ingested to Amazon S3. 71 | 72 | # License 73 | 74 | This sample is licensed under the MIT-0 License. 75 | -------------------------------------------------------------------------------- /appflow-time-automation/appflow-relative-date-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/appflow-time-automation/appflow-relative-date-architecture.png -------------------------------------------------------------------------------- /appflow-time-automation/appflow.yaml: -------------------------------------------------------------------------------- 1 | AWSTemplateFormatVersion: '2010-09-09' 2 | Description: 'Sample CloudFormation Template for AppFlow: Sample template shows how 3 | to create a flow with parameters' 4 | Parameters: 5 | flowname: 6 | Description: "Name of the flow" 7 | Type: String 8 | Default: myflow 9 | connname: 10 | Description: "Salesforce Connection Name" 11 | Type: String 12 | Default: aws95-dev-ed 13 | bucketname: 14 | Description: "Bucket Name" 15 | Type: String 16 | Default: my-aws-bucket 17 | timefield: 18 | Description: "Field base to calculate the desired time" 19 | Type: String 20 | Default: CreatedDate 21 | startdate: 22 | Description: "Start Date in miliseconds epoch format" 23 | Type: Number 24 | Default: 1612148400000 25 | enddate: 26 | Description: "End Date in miliseconds epoch format" 27 | Type: Number 28 | Default: 1614481200000 29 | Resources: 30 | SFDCFlow: 31 | Type: AWS::AppFlow::Flow 32 | Properties: 33 | Description: AppFlow Flow integrating SFDC Account Data into the Data Lake 34 | DestinationFlowConfigList: 35 | - ConnectorType: S3 36 | DestinationConnectorProperties: 37 | S3: 38 | BucketName: !Ref bucketname 39 | S3OutputFormatConfig: 40 | AggregationConfig: 41 | AggregationType: None 42 | FileType: CSV 43 | FlowName: !Ref flowname 44 | SourceFlowConfig: 45 | ConnectorProfileName: !Ref connname 46 | ConnectorType: Salesforce 47 | SourceConnectorProperties: 48 | Salesforce: 49 | EnableDynamicFieldUpdate: false 50 | IncludeDeletedRecords: false 51 | Object: OrderItem 52 | Tasks: 53 | - TaskType: Filter 54 | SourceFields: 55 | - !Ref timefield 56 | ConnectorOperator: 57 | Salesforce: BETWEEN 58 | TaskProperties: 59 | - Key: DATA_TYPE 60 | Value: datetime 61 | - Key: LOWER_BOUND 62 | Value: !Ref startdate 63 | - Key: UPPER_BOUND 64 | Value: !Ref enddate 65 | - TaskType: Filter 66 | SourceFields: 67 | - Id 68 | - Product2Id 69 | - IsDeleted 70 | - OrderId 71 | - PricebookEntryId 72 | - OriginalOrderItemId 73 | - AvailableQuantity 74 | - Quantity 75 | - UnitPrice 76 | - TotalPrice 77 | - ListPrice 78 | - ServiceDate 79 | - EndDate 80 | - Description 81 | - LastModifiedDate 82 | - CreatedById 83 | - CreatedDate 84 | ConnectorOperator: 85 | Salesforce: PROJECTION 86 | - TaskType: Map 87 | SourceFields: 88 | - Id 89 | ConnectorOperator: 90 | Salesforce: NO_OP 91 | DestinationField: Id 92 | TaskProperties: 93 | - Key: DESTINATION_DATA_TYPE 94 | Value: id 95 | - Key: SOURCE_DATA_TYPE 96 | Value: id 97 | - TaskType: Map 98 | SourceFields: 99 | - Product2Id 100 | ConnectorOperator: 101 | Salesforce: NO_OP 102 | DestinationField: Product2Id 103 | TaskProperties: 104 | - Key: DESTINATION_DATA_TYPE 105 | Value : reference 106 | - Key: SOURCE_DATA_TYPE 107 | Value: reference 108 | - TaskType: Map 109 | SourceFields: 110 | - IsDeleted 111 | ConnectorOperator: 112 | Salesforce: NO_OP 113 | DestinationField: IsDeleted 114 | TaskProperties: 115 | - Key: DESTINATION_DATA_TYPE 116 | Value: boolean 117 | - Key: SOURCE_DATA_TYPE 118 | Value: boolean 119 | - TaskType: Map 120 | SourceFields: 121 | - OrderId 122 | ConnectorOperator: 123 | Salesforce: NO_OP 124 | DestinationField: OrderId 125 | TaskProperties: 126 | - Key: DESTINATION_DATA_TYPE 127 | Value: reference 128 | - Key: SOURCE_DATA_TYPE 129 | Value: reference 130 | - TaskType: Map 131 | SourceFields: 132 | - PricebookEntryId 133 | ConnectorOperator: 134 | Salesforce: NO_OP 135 | DestinationField: PricebookEntryId 136 | TaskProperties: 137 | - Key: DESTINATION_DATA_TYPE 138 | Value: reference 139 | - Key: SOURCE_DATA_TYPE 140 | Value: reference 141 | - TaskType: Map 142 | SourceFields: 143 | - OriginalOrderItemId 144 | ConnectorOperator: 145 | Salesforce: NO_OP 146 | DestinationField: OriginalOrderItemId 147 | TaskProperties: 148 | - Key: DESTINATION_DATA_TYPE 149 | Value: reference 150 | - Key: SOURCE_DATA_TYPE 151 | Value: reference 152 | - TaskType: Map 153 | SourceFields: 154 | - AvailableQuantity 155 | ConnectorOperator: 156 | Salesforce: NO_OP 157 | DestinationField: AvailableQuantity 158 | TaskProperties: 159 | - Key: DESTINATION_DATA_TYPE 160 | Value: double 161 | - Key: SOURCE_DATA_TYPE 162 | Value: double 163 | - TaskType: Map 164 | SourceFields: 165 | - Quantity 166 | ConnectorOperator: 167 | Salesforce: NO_OP 168 | DestinationField: Quantity 169 | TaskProperties: 170 | - Key: DESTINATION_DATA_TYPE 171 | Value: double 172 | - Key: SOURCE_DATA_TYPE 173 | Value: double 174 | - TaskType: Map 175 | SourceFields: 176 | - UnitPrice 177 | ConnectorOperator: 178 | Salesforce: NO_OP 179 | DestinationField: UnitPrice 180 | TaskProperties: 181 | - Key: DESTINATION_DATA_TYPE 182 | Value: currency 183 | - Key: SOURCE_DATA_TYPE 184 | Value: currency 185 | - TaskType: Map 186 | SourceFields: 187 | - TotalPrice 188 | ConnectorOperator: 189 | Salesforce: NO_OP 190 | DestinationField: TotalPrice 191 | TaskProperties: 192 | - Key: DESTINATION_DATA_TYPE 193 | Value: currency 194 | - Key: SOURCE_DATA_TYPE 195 | Value: currency 196 | - TaskType: Map 197 | SourceFields: 198 | - ListPrice 199 | ConnectorOperator: 200 | Salesforce: NO_OP 201 | DestinationField: ListPrice 202 | TaskProperties: 203 | - Key: DESTINATION_DATA_TYPE 204 | Value: currency 205 | - Key: SOURCE_DATA_TYPE 206 | Value: currency 207 | - TaskType: Map 208 | SourceFields: 209 | - ServiceDate 210 | ConnectorOperator: 211 | Salesforce: NO_OP 212 | DestinationField: ServiceDate 213 | TaskProperties: 214 | - Key: DESTINATION_DATA_TYPE 215 | Value: date 216 | - Key: SOURCE_DATA_TYPE 217 | Value: date 218 | - TaskType: Map 219 | SourceFields: 220 | - EndDate 221 | ConnectorOperator: 222 | Salesforce: NO_OP 223 | DestinationField: EndDate 224 | TaskProperties: 225 | - Key: DESTINATION_DATA_TYPE 226 | Value: date 227 | - Key: SOURCE_DATA_TYPE 228 | Value: date 229 | - TaskType: Map 230 | SourceFields: 231 | - Description 232 | ConnectorOperator: 233 | Salesforce: NO_OP 234 | DestinationField: Description 235 | TaskProperties: 236 | - Key: DESTINATION_DATA_TYPE 237 | Value: string 238 | - Key: SOURCE_DATA_TYPE 239 | Value: string 240 | - TaskType: Map 241 | SourceFields: 242 | - LastModifiedDate 243 | ConnectorOperator: 244 | Salesforce: NO_OP 245 | DestinationField: LastModifiedDate 246 | TaskProperties: 247 | - Key: DESTINATION_DATA_TYPE 248 | Value: datetime 249 | - Key: SOURCE_DATA_TYPE 250 | Value: datetime 251 | - TaskType: Map 252 | SourceFields: 253 | - CreatedById 254 | ConnectorOperator: 255 | Salesforce: NO_OP 256 | DestinationField: CreatedById 257 | TaskProperties: 258 | - Key: DESTINATION_DATA_TYPE 259 | Value: reference 260 | - Key: SOURCE_DATA_TYPE 261 | Value: reference 262 | - TaskType: Map 263 | SourceFields: 264 | - CreatedDate 265 | ConnectorOperator: 266 | Salesforce: NO_OP 267 | DestinationField: CreatedDate 268 | TaskProperties: 269 | - Key: DESTINATION_DATA_TYPE 270 | Value: datetime 271 | - Key: SOURCE_DATA_TYPE 272 | Value: datetime 273 | TriggerConfig: 274 | TriggerType: OnDemand -------------------------------------------------------------------------------- /appflow-time-automation/lambda-trigger.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: MIT-0 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this 6 | # software and associated documentation files (the "Software"), to deal in the Software 7 | # without restriction, including without limitation the rights to use, copy, modify, 8 | # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 9 | # permit persons to whom the Software is furnished to do so. 10 | # 11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 12 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 13 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 14 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 15 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 16 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | # 18 | 19 | import json 20 | import boto3 21 | import os 22 | from datetime import timedelta 23 | from datetime import datetime 24 | from urllib.parse import urlparse 25 | 26 | template_url = os.environ['templateUrl'] 27 | conn_name = os.environ['connName'] 28 | time_field = os.environ['timeField'] 29 | bucket_name = os.environ['bucketName'] 30 | num_past_days = os.environ['numPastDays'] 31 | 32 | def parse_params(): 33 | start_date = datetime.today() - timedelta(days=num_past_days-1) 34 | print (start_date) 35 | start_date = start_date.date().strftime('%s') + '000' 36 | end_date = datetime.today() 37 | print (end_date) 38 | end_date = end_date.date().strftime('%s') + '000' 39 | current_ts = datetime.now().isoformat().split('.')[0].replace(':','-') 40 | flow_name = 'ajedailyflow' + current_ts 41 | 42 | template_params = [ 43 | { 44 | 'ParameterKey': 'flowname', 45 | 'ParameterValue': flow_name, 46 | }, 47 | { 48 | 'ParameterKey': 'connname', 49 | 'ParameterValue': conn_name, 50 | }, 51 | { 52 | 'ParameterKey': 'timefield', 53 | 'ParameterValue': time_field, 54 | }, 55 | { 56 | 'ParameterKey': 'startdate', 57 | 'ParameterValue': start_date, 58 | }, 59 | { 60 | 'ParameterKey': 'enddate', 61 | 'ParameterValue': end_date, 62 | }, 63 | { 64 | 'ParameterKey': 'bucketname', 65 | 'ParameterValue': bucket_name, 66 | }, 67 | ] 68 | print (template_params) 69 | return template_params 70 | 71 | def launch_stack(): 72 | cfn = boto3.client('cloudformation') 73 | current_ts = datetime.now().isoformat().split('.')[0].replace(':','-') 74 | stackname = 'mystackflow' + current_ts 75 | capabilities = ['CAPABILITY_IAM', 'CAPABILITY_AUTO_EXPAND'] 76 | try: 77 | template_params = parse_params() 78 | stackdata = cfn.create_stack( 79 | StackName=stackname, 80 | DisableRollback=True, 81 | TemplateURL=template_url, 82 | Parameters=template_params, 83 | Capabilities=capabilities) 84 | except Exception as e: 85 | print(str(e)) 86 | return stackdata 87 | 88 | def handler(event, context): 89 | print("Received event:") 90 | stack_result=launch_stack() 91 | print(stack_result) -------------------------------------------------------------------------------- /sagemaker_appflow_demo/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sagemaker_appflow_demo/README.md -------------------------------------------------------------------------------- /sagemaker_appflow_demo/sagemaker_ml/build_case_classifier_using_blazingtext.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import boto3\n", 10 | "import sagemaker\n", 11 | "print(boto3.__version__)\n", 12 | "print(sagemaker.__version__)" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "session = sagemaker.Session()\n", 22 | "bucket = session.default_bucket()\n", 23 | "print(\"Default bucket is {}\".format(bucket))" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "### REPLACE INPUT FROM PREVIOUS SAGEMAKER PROCESSING CONFIG OUTPUT #####\n", 33 | "prefix=\"customer_support_classification\"\n", 34 | "s3_training_path=\"https://sagemaker-us-east-1-123412341234.s3.amazonaws.com/sagemaker-scikit-learn-2020-11-16-19-27-42-281/output/training/training.txt\"\n", 35 | "s3_validation_path=\"https://sagemaker-us-east-1-123412341234.s3.amazonaws.com/sagemaker-scikit-learn-2020-11-16-19-27-42-281/output/validation/validation.txt\"\n", 36 | "s3_output_path=\"s3://{}/{}\".format(bucket, prefix)" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "from sagemaker import image_uris\n", 46 | "region_name = boto3.Session().region_name\n", 47 | "print(\"Training the model in {} region\".format(region_name))" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "# Estimator" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "container = image_uris.retrieve('blazingtext', region=region_name)\n", 64 | "print(\"The algo container is {}\".format(container))" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "blazing_text = sagemaker.estimator.Estimator(\n", 74 | " container,\n", 75 | " role=sagemaker.get_execution_role(),\n", 76 | " instance_count=1,\n", 77 | " instance_type='ml.c4.4xlarge',\n", 78 | " output_path=s3_output_path\n", 79 | ")" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "# set the hyperparameters" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "blazing_text.set_hyperparameters(mode='supervised')" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "from sagemaker import TrainingInput" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "train_data = TrainingInput(s3_training_path, \n", 114 | " distribution='FullyReplicated',\n", 115 | " content_type='text/plain',\n", 116 | " s3_data_type='S3Prefix'\n", 117 | " )\n", 118 | "validation_data = TrainingInput(\n", 119 | " s3_validation_path,\n", 120 | " distribution='FullyReplicated',\n", 121 | " content_type='text/plain',\n", 122 | " s3_data_type='S3Prefix'\n", 123 | ")\n", 124 | "\n", 125 | "s3_channels = {'train': train_data,\n", 126 | " 'validation': validation_data\n", 127 | " }\n", 128 | "blazing_text.fit(inputs=s3_channels)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "blazing_text.latest_training_job.job_name" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "blazing_text_predictor = blazing_text.deploy(\n", 147 | " initial_instance_count=1,\n", 148 | " instance_type='ml.t2.medium'\n", 149 | ")" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [ 158 | "import json\n", 159 | "import nltk\n", 160 | "\n", 161 | "nltk.download('punkt')\n" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [ 170 | "\n", 171 | "\"\"\"\n", 172 | "Hi my outlook app seems to misbehave a lot lately, I cannot sync my emails and it often crashes and asks for\n", 173 | "credentials. Could you help me out?\n", 174 | "\"\"\"\n", 175 | "sentences = [\"Hi my outlook app seems to misbehave a lot lately, I cannot sync my emails and it often crashes and asks for credentials.\", \"Could you help me out?\"]\n", 176 | "tokenized_sentences = [' '.join(nltk.word_tokenize(sent)) for sent in sentences]\n", 177 | "payload = {\"instances\" : tokenized_sentences,\n", 178 | " \"configuration\": {\"k\": 1}}" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": null, 184 | "metadata": {}, 185 | "outputs": [], 186 | "source": [ 187 | "payload" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": {}, 194 | "outputs": [], 195 | "source": [ 196 | "from sagemaker.predictor import Predictor\n", 197 | "from sagemaker.serializers import JSONSerializer\n", 198 | "from sagemaker.deserializers import JSONDeserializer\n", 199 | "\n", 200 | "case_classifier = Predictor(\n", 201 | " endpoint_name=\"blazingtext-2020-11-18-15-13-52-229\", # Replace with sagemaker endpoint deployed in the previous step\n", 202 | " serializer=JSONSerializer()\n", 203 | ")\n", 204 | "response = case_classifier.predict(payload)\n", 205 | "\n", 206 | "predictions = json.loads(response)\n", 207 | "print(predictions)" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": null, 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [ 216 | "predictions = sorted(predictions, key=lambda i: i['prob'], reverse=True)" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": {}, 223 | "outputs": [], 224 | "source": [ 225 | "print(json.dumps(predictions[0], indent=2))" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": null, 231 | "metadata": {}, 232 | "outputs": [], 233 | "source": [] 234 | } 235 | ], 236 | "metadata": { 237 | "kernelspec": { 238 | "display_name": "conda_python3", 239 | "language": "python", 240 | "name": "conda_python3" 241 | }, 242 | "language_info": { 243 | "codemirror_mode": { 244 | "name": "ipython", 245 | "version": 3 246 | }, 247 | "file_extension": ".py", 248 | "mimetype": "text/x-python", 249 | "name": "python", 250 | "nbconvert_exporter": "python", 251 | "pygments_lexer": "ipython3", 252 | "version": "3.6.10" 253 | } 254 | }, 255 | "nbformat": 4, 256 | "nbformat_minor": 4 257 | } 258 | -------------------------------------------------------------------------------- /sagemaker_appflow_demo/sagemaker_ml/customer_support_classification - data preparation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import sagemaker\n", 11 | "print(pd.__version__)\n", 12 | "print(sagemaker.__version__)\n", 13 | "pd.set_option('display.max_colwidth', None)" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "session = sagemaker.Session()\n", 23 | "role = sagemaker.get_execution_role()\n", 24 | "bucket = session.default_bucket()\n", 25 | "prefix = \"dataset/appflow-sagemakerdemo\"" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "#upload dataset\n", 35 | "input_data = session.upload_data(path='dataset/all_tickets.csv', key_prefix=prefix)" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "# Processing script" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "%%writefile preprocessing.py\n", 52 | "import argparse, os, subprocess, sys\n", 53 | "import pandas as pd\n", 54 | "import numpy as np\n", 55 | "import re\n", 56 | "import string\n", 57 | "from sklearn.model_selection import train_test_split\n", 58 | "\n", 59 | "\n", 60 | "def install(package):\n", 61 | " subprocess.call([\n", 62 | " sys.executable, \"-m\",\"pip\", \"install\", package\n", 63 | " ])\n", 64 | "\n", 65 | "def remove_non_alphanumeric(row):\n", 66 | " row = [word for word in row if word.isalpha()]\n", 67 | " return row\n", 68 | "\n", 69 | "\n", 70 | "\n", 71 | "if __name__ == '__main__':\n", 72 | " install('nltk')\n", 73 | " import nltk\n", 74 | " from nltk.corpus import stopwords\n", 75 | " nltk.download('punkt')\n", 76 | " nltk.download('stopwords')\n", 77 | " parser = argparse.ArgumentParser()\n", 78 | " parser.add_argument('--filename', type=str)\n", 79 | " parser.add_argument('--num-cases', type=int, default=20000)\n", 80 | " parser.add_argument('--split-ratio', type=float, default=0.1)\n", 81 | "\n", 82 | " args, _ = parser.parse_known_args()\n", 83 | "\n", 84 | " print(\"Recieved arguments {}\".format(args))\n", 85 | "\n", 86 | " filename = args.filename\n", 87 | " num_cases = args.num_cases\n", 88 | " split_ratio = args.split_ratio\n", 89 | " \n", 90 | " #load dataset\n", 91 | "\n", 92 | " input_data_path = os.path.join('/opt/ml/processing/input', filename)\n", 93 | " print(\"Reading input data from {}\".format(input_data_path))\n", 94 | "\n", 95 | " data = pd.read_csv(input_data_path)\n", 96 | "\n", 97 | " #remove lines with missing values\n", 98 | " data.dropna(inplace=True)\n", 99 | "\n", 100 | " if num_cases is not None:\n", 101 | " data = data[:num_cases]\n", 102 | "\n", 103 | " #drop unwanted columns\n", 104 | " data = data[['category', 'body']]\n", 105 | "\n", 106 | " data['label'] = data.category.replace({\n", 107 | " 0: '__label__Category0__',\n", 108 | " 1: '__label__Category1__',\n", 109 | " 2: '__label__Category2__',\n", 110 | " 3: '__label__Category3__',\n", 111 | " 4: '__label__Category4__',\n", 112 | " 5: '__label__Category5__',\n", 113 | " 6: '__label__Category6__',\n", 114 | " 7: '__label__Category7__',\n", 115 | " 8: '__label__Category8__',\n", 116 | " 9: '__label__Category9__',\n", 117 | " 10: '__label__Category10__',\n", 118 | " 11: '__label__Category11__',\n", 119 | " 12: '__label__Category12__',\n", 120 | " 13: '__label__Category12__'\n", 121 | " }\n", 122 | " ) \n", 123 | " data = data.drop(['category'], axis=1)\n", 124 | "\n", 125 | " #move the label column to the front\n", 126 | " data = data[['label', 'body']]\n", 127 | "\n", 128 | " #tokenize the data\n", 129 | " print(\"Tokenizing the reviews\")\n", 130 | "\n", 131 | " data['body'] = data['body'].apply(nltk.word_tokenize)\n", 132 | "\n", 133 | " #remove none alpanumeric chars\n", 134 | " data['body'] = data['body'].apply(remove_non_alphanumeric)\n", 135 | "\n", 136 | " #remove punctuation\n", 137 | " #data['body'] = data['body'].apply(remove_punctuation)\n", 138 | "\n", 139 | " #remove stop words\n", 140 | " def remove_stop_words(row):\n", 141 | " stop_words = set(stopwords.words('english'))\n", 142 | " words = [w for w in row if not w in stop_words]\n", 143 | " return words\n", 144 | " \n", 145 | " data['body'] = data['body'].apply(remove_stop_words)\n", 146 | "\n", 147 | " #convert all text to lowercase\n", 148 | " data['email_body'] = data.apply(lambda row: \" \".join(row['body']).lower(), axis=1)\n", 149 | "\n", 150 | " #drop unwanted columns\n", 151 | " data = data.drop(['body'], axis=1)\n", 152 | "\n", 153 | " # Process data\n", 154 | " print('Splitting data with ratio {}'.format(split_ratio))\n", 155 | " training, validation = train_test_split(data, test_size=split_ratio)\n", 156 | "\n", 157 | " training_output_path = os.path.join('/opt/ml/processing/train', 'training.txt')\n", 158 | " validation_output_path = os.path.join('/opt/ml/processing/validation', 'validation.txt')\n", 159 | "\n", 160 | " print('Saving training data to {}'.format(training_output_path))\n", 161 | " np.savetxt(training_output_path, training.values, fmt='%s')\n", 162 | "\n", 163 | " print('Saving validation data to {}'.format(validation_output_path))\n", 164 | " np.savetxt(validation_output_path, validation.values, fmt='%s')\n" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "metadata": {}, 171 | "outputs": [], 172 | "source": [ 173 | "from sagemaker.sklearn.processing import SKLearnProcessor\n", 174 | "from sagemaker.processing import ProcessingInput, ProcessingOutput" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "sklearn_processor = SKLearnProcessor(\n", 184 | " framework_version='0.20.0',\n", 185 | " role=role,\n", 186 | " instance_type='ml.c5.2xlarge',\n", 187 | " instance_count=1\n", 188 | " \n", 189 | ")" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": {}, 196 | "outputs": [], 197 | "source": [ 198 | "%%time\n", 199 | "\n", 200 | "sklearn_processor.run(\n", 201 | " code='preprocessing.py',\n", 202 | " inputs=[ProcessingInput(source=input_data, # Our data from s3\n", 203 | " destination='/opt/ml/processing/input')],\n", 204 | " outputs=[\n", 205 | " ProcessingOutput(output_name=\"training\", \n", 206 | " source='/opt/ml/processing/train'),\n", 207 | " ProcessingOutput(output_name=\"validation\", \n", 208 | " source='/opt/ml/processing/validation')\n", 209 | " ],\n", 210 | " arguments=[\n", 211 | " \"--filename\", \"all_tickets.csv\",\n", 212 | " \"--num-cases\", \"35000\",\n", 213 | " \"--split-ratio\", \"0.05\"\n", 214 | " ]\n", 215 | " \n", 216 | ")" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": {}, 223 | "outputs": [], 224 | "source": [ 225 | "sklearn_processor.latest_job.describe()['ProcessingOutputConfig']['Outputs'][0]['S3Output']['S3Uri']" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": null, 231 | "metadata": {}, 232 | "outputs": [], 233 | "source": [ 234 | "sklearn_processor.latest_job.describe()['ProcessingOutputConfig']['Outputs'][1]['S3Output']['S3Uri']" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": null, 240 | "metadata": {}, 241 | "outputs": [], 242 | "source": [ 243 | "### The output above will be used in the training notebook" 244 | ] 245 | } 246 | ], 247 | "metadata": { 248 | "kernelspec": { 249 | "display_name": "conda_python3", 250 | "language": "python", 251 | "name": "conda_python3" 252 | }, 253 | "language_info": { 254 | "codemirror_mode": { 255 | "name": "ipython", 256 | "version": 3 257 | }, 258 | "file_extension": ".py", 259 | "mimetype": "text/x-python", 260 | "name": "python", 261 | "nbconvert_exporter": "python", 262 | "pygments_lexer": "ipython3", 263 | "version": "3.6.10" 264 | } 265 | }, 266 | "nbformat": 4, 267 | "nbformat_minor": 4 268 | } 269 | -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/AppFlow_extract_from_Salesforce.md: -------------------------------------------------------------------------------- 1 | ## Create a Salesforce developer account, and export using AppFlow 2 | 3 | Amazon AppFlow is a fully managed integration service that enables customers to securely transfer data between AWS services and software-as-a-service (SaaS) applications in just a few clicks. [Amazon AppFlow now supports automatic import of newly created Salesforce fields into Amazon S3 without requiring the user to update their flow configurations](https://aws.amazon.com/about-aws/whats-new/2020/07/amazon-appflow-supports-new-salesforce-integrations/). Also, Amazon AppFlow now allows users to import deleted records from Salesforce to any supported destination. 4 | 5 | Following are the steps needed to create a connection to salesforce to extract the object, _**Account**_, and store in S3 bucket in JSON format. 6 | 7 | 1. Create a free developer account at [developer.salesforce.com](https://developer.salesforce.com) 8 | 9 | 1. If you have a salesforce account connected with your organization email, signed up with an email like username+sfdev@orgdomain.com 10 | 2. Even though you use a new email, oddly enough your email notifications for new signup still would go to org email 11 | 12 | 2. Login to the developer account using the [url](https://login.salesforce.com) using the newly created credential. 13 | 3. Log on to AWS Console, navigate to Amazon AppFlow Service, expand the left pane to click on _**Connections**_ as shown below. 14 | ![Appflow_connection](images/AppFlow_Connection.png) 15 | 16 | 4. From the drop-down of connectors select _**Salesforce**_ and click on _**Create connection**_ to get started on creating a new connection. 17 | ![Create SF connections](images/AppFlow_Create_SF_Cconnection.png) 18 | 19 | 5. Enter _**Salesforce environment**_. Since the target is S3, disable PrivateLink, enter _**Connection name**_ and click _**Continue**_. 20 | ![Appflow_SF_Conn_Details](images/AppFlow_SF_Conn_Details.png) 21 | 22 | 6. Click _**Allow**_ to proceed further 23 | ![AppFlow_SF_Allow](images/AppFlow_SF_Allow.png) 24 | 25 | 7. The Salesforce connection is created as shown below. 26 | ![AppFlow_SF_Conn_Created](images/AppFlow_SF_Conn_Created.png) 27 | 28 | 8. Create a private s3 bucket in the same region as the AppFlow as I created the s3 bucket named _**appflow-sfdev**_. 29 | 30 | 9. Go back to the AppFlow console and click on _**Create flow**_ to create a flow. 31 | ![AppFlow_click_CF](images/AppFlow_click_CF.png) 32 | 33 | 10. The first step is to specify specify flow details and hence enter _**Flow name**_, _**Flow description**_ and click _**Next**_. 34 | ![AppFlow_Specify_Flow_Details](images/AppFlow_Specify_Flow_Details.png) 35 | 36 | 11. Specify the _**Source name**_ as Salesforce, connection as created earlier, and pick the _**Salesforce objects**_ to import. 37 | ![AppFlow_Source_Details](images/AppFlow_Source_Details.png) 38 | 39 | 12. Enter _**Destination details**_ by picking Amazon S3 as the _**Destination name**_, pick the bucket name created earlier and enter the _**Flow trigger**_ to _**Run on demand**_ and then click _**Next**_. 40 | ![AppFlow_Dest_Flow_Trigger](images/AppFlow_Dest_Flow_Trigger.png) 41 | 42 | 13. For the _**Mapping method**_ choose to _**Manually map fields**_. For the _**Source to destination field mapping**_ chose the _**Map all fields directly**_ under _**Bulk actions**_ and click _**Next**_. 43 | ![AppFlow_Mapping](images/AppFlow_Mapping.png) 44 | 45 | 14. Click _**Next**_ to get past the _**Add filters**_ section, finally get to the “Review and create” step, scroll down to click on _**Create flow**_ to materialize the flow creation. 46 | ![AppFlow_flow_create](images/AppFlow_flow_create.png) 47 | 48 | -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/Create_Lambda_Layers_Triggers_Destination.md: -------------------------------------------------------------------------------- 1 | ## Create Lambda functions along with layers and destination 2 | 3 | [AWS Lambda](https://aws.amazon.com/lambda/) lets you run code without provisioning or managing servers. You pay only for the compute time you consume. You can configure your Lambda function to pull in additional code and content in the form of layers also know as [AWS Lambda layers](https://docs.aws.amazon.com/lambda/latest/dg/configuration-layers.html) 4 | 5 | We will be using Lambda as an s3 event trigger to move _**Account**_ data file to data lake stage, also to trigger the Glue crawler to crawl the data to update the metadata in Glue catalog, and finally calling redshift data API to upsert to redshift target tables. 6 | 7 | 1. Create a Lambda layer to make the latest version of boto3 available to use [redshift-data](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/redshift-data.html) API. 8 | 1. From the AWS console navigate to AWS Lambda console, and click on _**Layers**_ under _**Additional resources**_. Click on _**Create layer**_. 9 | 2. Name the layer, _**f_lambda_call_redshift_data_api**_ with an appropriate description, upload [f_lambda_call_redshift_data_api.zip](lambda/layers/f_lambda_call_redshift_data_api.zip), choose Python 3 and MIT as _**Compatible runtimes**_ and _**License**_ respectively. 10 | 3. Click on _**Create**_ to create the layer as I created the layer, _**f_lambda_call_redshift_data_api**_ as shown below. 11 | ![Lambda_Layer](images/Lambda_Layer.png) 12 | 13 | 2. Follow the guidance in section _**To create an execution role**_ in [Building Lambda functions with Python](https://docs.aws.amazon.com/lambda/latest/dg/lambda-python.html) to create a lambda execution role. Just like I created an all permissive _**Lambda_Admin**_ role as shown below. 14 | ![Lambda_Admin_Role](images/Lambda_Admin_Role.png) 15 | 16 | 3. Follow the guidance in the section _**To create a Python function**_ [Building Lambda functions with Python](https://docs.aws.amazon.com/lambda/latest/dg/lambda-python.html) to create 3 lambda functions as below. _**Click on the Advanced settings to select the VPC we created earlier**_. 17 | 1. _**f_lambda_upsert_sfdev_appflow_account**_ - In the _**Function code**_ section click on _**Actions**_ to upload the zip file, [f_lambda_upsert_sfdev_appflow_account.zip](lambda/functions/f_lambda_upsert_sfdev_appflow_account.zip) 18 | ![Lambda_Upload_Zip](images/Lambda_Upload_Zip.png) 19 | 2. _**f_lambda_crawl_sfdev_appflow_account**_ - In the _**Function code**_ section click on _**Actions**_ to upload the zip file, [f_lambda_crawl_sfdev_appflow_account.zip](lambda/functions/f_lambda_crawl_sfdev_appflow_account.zip) 20 | 3. _**f_lambda_move_to_stage_account**_ - In the _**Function code**_ section click on _**Actions**_ to upload the zip file, [f_lambda_move_to_stage_account.zip](lambda/functions/f_lambda_move_to_stage_account.zip) 21 | 22 | **Change the _**new_bucket_name**_ variable value to appropriate data lake bucket name as you created in step 3 of [Create_VPC_S3_Glue](Create_VPC_S3_Glue.md) in the lambda function, _**f_lambda_move_to_stage_account**_** 23 | 24 | 4. Select the _**Layers**_ of the lambda function, _**f_lambda_upsert_sfdev_appflow_account**_, and click on _**Add a layer**_. In the next screen choose the latest version of the _**Custom layers**_ named _**f_lambda_call_redshift_data_api**_ created earlier as shown below. 25 | ![Add_layer_to_lambda](images/Add_layer_to_lambda.png) 26 | ![Add_layer](images/Add_layer.png) 27 | ![Lambda_with_layer](images/Lambda_with_layer.png) 28 | 29 | 5. Add the lambda function _**f_lambda_upsert_sfdev_appflow_account**_ as the destination for lambda function, _**f_lambda_crawl_sfdev_appflow_account**_ selecting the _**Source**_ as _**Asynchronous invocation**_, and _**Condition**_ as _**On success**_ as shown below. 30 | ![Lamnda_crawl_destination](images/Lamnda_crawl_destination.png) 31 | 32 | 6. Add an S3 trigger to lambda function, _**f_lambda_move_to_stage_account**_ for any object created in bucket named _**appflow-sfdev**_, and _**Prefix**_, _**sfdev-account/**_ as shown below. 33 | ![Lambda_s3_trigger](images/Lambda_s3_trigger.png) 34 | 35 | 7. Add the lambda function _**f_lambda_crawl_sfdev_appflow_account**_ as the destination for lambda function, _**f_lambda_move_to_stage_account**_ selecting the _**Source**_ as _**Asynchronous invocation**_, and _**Condition**_ as _**On success**_ as shown below. 36 | ![stage_lambda_destination](images/stage_lambda_destination.png) 37 | -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/Create_Redshift_Private.md: -------------------------------------------------------------------------------- 1 | ## Create Redshift Cluster in private subnet along with all database objects 2 | 3 | [Amazon Redshift](https://aws.amazon.com/redshift/) is the most popular and fastest cloud data warehouse. [Amazon AppFlow does support Amazon Redshift as a destination](https://docs.aws.amazon.com/appflow/latest/userguide/requirements.html#redshift) but AppFlow still cannot connect to Redshift in private subnet. Curstomers almost always create a data warehouse in the private subnet for security reasons. 4 | 5 | Redshift cluster will be created in the private subnet of the VPC we created earlier. Also AWS Glue would need access to the Redshift cluster. Also an IAM role is required for Redshift and Redshift Spectrum. 6 | 7 | 1. Create an [Amazon Redshift cluster subnet group](https://docs.aws.amazon.com/redshift/latest/mgmt/working-with-cluster-subnet-groups.html). I created a subnet group named _**sfdev-appflow-subnet-group**_ choosing the VPC and the private subnet created earlier as shown below. 8 | ![Redshift_Cluster_Subnet_Group](images/Redshift_Cluster_Subnet_Group.png) 9 | 10 | 2. Navigate to the VPC console and create a security group for the Redshift Cluster in the VPC we created earlier. [Make sure to add a self referencing inbound rule for ALL TCP for Glue to access the Redshift Cluster](https://docs.aws.amazon.com/glue/latest/dg/connection-JDBC-VPC.html). I created a security group named _**sfdev-appflow-redshift-sg**_ as shown below. 11 | ![Redshift_Security_Group](images/Redshift_Security_Group.png) 12 | 13 | 3. Create a Redshift Service IAM role called _**sfdev-appflow-redshift**_ with managed policies like AmazonAthenaFullAccess, AmazonS3ReadOnlyAccess, and AWSGlueConsoleFullAccess as mentioned below. You can follow [Create IAM role for Redshift](https://docs.aws.amazon.com/redshift/latest/gsg/rs-gsg-create-an-iam-role.html), [Create IAM role for Redshift Spectrum](https://docs.aws.amazon.com/redshift/latest/dg/c-getting-started-using-spectrum-create-role.html) for guidance. 14 | ![Redshift_IAM_Role](images/Redshift_IAM_Role.png) 15 | 16 | 4. Create a redshift cluster in the private subnet of the VPC created earlier using the subnet group, security group, and the IAM role created above. Follow the link to a [Sample Amazon Redshift Cluster](https://docs.aws.amazon.com/redshift/latest/gsg/rs-gsg-launch-sample-cluster.html). I created a cluster with an id, _**sfdev-appflow-redshift-cluster**_ with DB name, _**sfdev-appflow**_, and user name, _**awsuser**_ 17 | ![Redshift_Cluster](images/Redshift_Cluster.png) 18 | ![Redshift_Cluster_DB_NW](images/Redshift_Cluster_DB_NW.png) 19 | 20 | 5. Log on to the AWS Console and navigate to Redshift to use the inbuilt query editor to the run the following SQLs to create necessary schemas and tables. 21 | 1. Run [create_schema_ext_sfdev_appflow.sql](sqls/create_schema_ext_sfdev_appflow.sql) to create an external schema _**ext_sfdev_appflow**_ using the AWS Glue catalog DB, _**sfdev-appflow**_ 22 | 2. Run [create_stage_target_schema.sql](sqls/create_stage_target_schema.sql) to create stage, and target schemas. 23 | 3. Run [create_stage_target_table.sql](sqls/create_stage_target_table.sql) to create the stage, and target tables. 24 | 25 | 6. Navigate to [AWS Secrets Manager](https://aws.amazon.com/secrets-manager/), click on _**Store a new secret**_ to _**Select secret type**_ as _**Credentials for Redshift cluster**_, enter _**User name**_, _**Password**_, select the cluster id created above, and click _**Next**_. Enter a _**Secret name**_ and click _**Next**_. _**Disable automatic rotation**_ and click _**Next**_ to review. Finally click on _**Store**_ to save the secret. As I created the _**Secret name**_, _**dev/sfdev-appflow/awsuser**_ as shown below. 26 | ![Redshift_Secret](images/Redshift_Secret.png) -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/Create_VPC_S3_Glue.md: -------------------------------------------------------------------------------- 1 | ## Create VPC, S3, Glue 2 | 3 | In order to secure all the resource created, create a VPC with private subnet, vpc endpoint for S3 in order to prevent the traffic from going over the public internet. Finally the AWS Glue crawler will be crawl the _**Account**_ data extract from the data lake to update the Glue catalog. 4 | 5 | 1. [Amazon Virtual Private Cloud (Amazon VPC)](https://aws.amazon.com/vpc/) lets you provision a logically isolated section of the AWS Cloud where you can launch AWS resources in a virtual network that you define. You have complete control over your virtual networking environment, including selection of your own IP address range, creation of subnets, and configuration of route tables and network gateways. In order to create a secure data platform customers almost always create it in a private subnet. Follow the tutorial : [Creating a VPC with Public and Private Subnets for Your Compute Environments](https://docs.aws.amazon.com/batch/latest/userguide/create-public-private-vpc.html) to create a VPC with public and private subnets. 6 | ![vpc_public_private_subnet](images/vpc_public_private_subnet.png) 7 | 8 | 2. A [VPC endpoint](https://docs.aws.amazon.com/vpc/latest/userguide/endpoint-services-overview.html) enables you to privately connect your VPC to supported AWS services and VPC endpoint services powered by AWS PrivateLink without requiring an internet gateway, NAT device, VPN connection, or AWS Direct Connect connection. Instances in your VPC do not require public IP addresses to communicate with resources in the service. Traffic between your VPC and the other service does not leave the Amazon network. [Create Amazon VPC Endpoints for Amazon S3](https://docs.aws.amazon.com/glue/latest/dg/vpc-endpoints-s3.html) inorder to make sure that the data does not travel over the public internet. You can also follow the [blog to create a VPC Endpoint for Amazon S3](https://aws.amazon.com/blogs/aws/new-vpc-endpoint-for-amazon-s3/). 9 | 10 | 3. Create an S3 bucket to denote your data lake in the same region as the AppFlow resources created in the previous section. For example the S3 bucket called _**appflow-sfdev-data-lake**_ as shown below. 11 | ![appflow_sfdev_data_lake](images/appflow_sfdev_data_lake.png) 12 | 13 | 4. Navigate to the AWS Glue console and click on _**Databases**_ from the left pane, click on _**Add database**_ to type a name of the database to create an [AWS Glue database](https://docs.aws.amazon.com/glue/latest/dg/define-database.html). In my case I named it _**sfdev-appflow**_. 14 | 15 | 5. Click on _**Connections**_ from the left pane in the AWS Glue console and then click on _**Add connection**_ to create a connection for S3 using the VPC, private subnet, default subnet, S3 endpoint crated earlier. You can also follow [Crawling an Amazon S3 Data Store using a VPC Endpoint](https://docs.aws.amazon.com/glue/latest/dg/connection-S3-VPC.html) for reference. I named the connection _**sfdev-appflow-s3**_ as shown below. 16 | ![Glue_S3_Connection](images/Glue_S3_Connection.png) 17 | 18 | 6. Click on _**Crawlers**_ from the left pane in the AWS Glue console and then click on _**Add crawler**_ to create a crawler using _**sfdev-appflow-s3**_ as connection, _**sfdev-appflow**_ as database, _**s3://unique-bucket-name/account**_ as the s3 path. Replace _**unique-bucket-name**_ with the bucket name created in step 3. Also create an IAM role as needed. You can also follow [Crawling an Amazon S3 Data Store using a VPC Endpoint](https://docs.aws.amazon.com/glue/latest/dg/connection-S3-VPC.html) for reference. Named the crawler _**sfdev-appflow-account**_ as shown below. 19 | ![Glue_Crawler](images/Glue_Crawler.png) 20 | -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/README.md: -------------------------------------------------------------------------------- 1 | ## Extract salesforce data using Amazon AppFlow and upsert it to Redshift tables hosted on private subnet using data APIs 2 | 3 | [Amazon AppFlow](https://aws.amazon.com/appflow/) is a fully managed integration service that enables you to securely transfer data between Software-as-a-Service (SaaS) applications like Salesforce, Marketo, Slack, and ServiceNow, and AWS services like Amazon S3 and Amazon Redshift, in just a few clicks. Most customers would run their Redshift cluster in their private subnet hence AppFlow would not be able to load data to the Redshift cluster running in private subnet. Additionally you may also want to upsert data to your Data Warehouse or Redshift Cluster as opposed to just insert. 4 | 5 | Here we show you how to extract salesforce objects using Amazon AppFlow, store it in data lake (S3), crawl the dataset using AWS Glue crawler to update the catalog, and call the [redshift-data](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/redshift-data.html#client) API to upsert or merge to Redshift tables. 6 | 7 | ## Scope 8 | Pulling in the _**Account**_ object from salesforce for this demonstration. 9 | 10 | ## Solution Overview 11 | 12 | In order for us to build this solution we first need to create 13 | 14 | * [Salesforce developer account](https://developer.salesforce.com) 15 | * [AppFlow connection and flow](https://aws.amazon.com/appflow/getting-started/) 16 | * [Create VPC with public and private subnets](https://docs.aws.amazon.com/batch/latest/userguide/create-public-private-vpc.html) to create our own network where we can create the resources. 17 | * [Create Amazon Redshift Cluster in private subnet](https://docs.aws.amazon.com/redshift/latest/gsg/rs-gsg-launch-sample-cluster.html) in the private subnet of the VPC to make sure its not reachable from the internet. 18 | * [Define AWS Glue Crawler](https://docs.aws.amazon.com/glue/latest/dg/add-crawler.html) to crawl the data and update the catalog 19 | * [AWS Lambda functions to be invoked for S3 event](https://docs.aws.amazon.com/lambda/latest/dg/with-s3.html), call AWS Glue crawler, call [data API for Amazon Redshift](https://aws.amazon.com/about-aws/whats-new/2020/09/announcing-data-api-for-amazon-redshift/) to upsert data to Redshift 20 | 21 | ## Solution Architecture 22 | 23 | The following architecture diagram shows how AppFlow can consume Salesforce data to land it on S3. This event will trigger a lambda to move the data to data lake to be crawled by AWS Glue crawler in order to update the metadata in the Glue catalog. The next lambda would execute a query using the data API for Redshift to query the data from data lake using Redshift Spectrum and upsert the data to redshift tables. 24 | 25 | ![architecture](images/SF_AppFlow_Upsert_Redshift.jpg) 26 | 27 | *Please note that although [Amazon AppFlow does support Amazon Redshift as a destination](https://docs.aws.amazon.com/appflow/latest/userguide/requirements.html#redshift) but it still cannot connect to Redshift in private subnet. Customers almost always create a data warehouse in the private subnet for security reasons.* 28 | 29 | ## Pre-requisites 30 | - [Create AWS Account](https://aws.amazon.com/premiumsupport/knowledge-center/create-and-activate-aws-account/) 31 | - Familiarity with [Amazon AppFlow](https://aws.amazon.com/appflow/), [Amazon S3](https://aws.amazon.com/s3/), [AWS Glue Crawler](https://docs.aws.amazon.com/glue/latest/dg/add-crawler.html), [AWS Lambda](https://aws.amazon.com/lambda/), [Amazon Redshift](https://aws.amazon.com/redshift/) 32 | 33 | ---- 34 | 35 | #### 1. [Create a Salesforce developer account and extract data using AppFlow](AppFlow_extract_from_Salesforce.md) 36 | 37 | ---- 38 | 39 | #### 2. [Create VPC, S3, Glue](Create_VPC_S3_Glue.md) 40 | 41 | ---- 42 | 43 | #### 3. [Set up the Redshift Cluster in private subnet](Create_Redshift_Private.md) 44 | 45 | ---- 46 | 47 | #### 4. [Create Lambda, its layers, triggers, destination](Create_Lambda_Layers_Triggers_Destination.md) 48 | 49 | ---- 50 | 51 | #### 5. Run the flow, sfdev-account. 52 | 53 | Before running the flow named _**sfdev-account**_ 54 | 1. Change the _**new_bucket_name**_ variable value to appropriate data lake bucket name in the lambda function, _**f_lambda_move_to_stage_account**_ 55 | 2. Log on to the [url](https://login.salesforce.com) with the credentials created earlier, and from the _**Apps**_ search for _**Accounts**_ under _**Items**. Click _**New**_ to create a new account or edit an existing one. 56 | 57 | Go back to the Amazon AppFlow console and select the flow named _**sfdev-account**_ and click _**Run flow**_. It will show the run metrics post completion as shown below. 58 | ![Run_appflow](images/Run_appflow.png) 59 | 60 | Go back to the Redshift console and select the _**EDITOR**_ to run the below sql to verify that the records got loaded to the redshift table. 61 | ```sql 62 | select * from tgt_sfdev_appflow.account; 63 | ``` 64 | 65 | ---- 66 | 67 | ## Clean Up 68 | 69 | Post testing, clean up all resource created to avoid incurring charges when resources are not in use. 70 | 71 | ---- 72 | 73 | ## Conclusion 74 | 75 | We showed you how you extract Salesforce objects to data lake, and upsert to tables in Redshift tables running in private subnet. 76 | -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/Add_layer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/Add_layer.png -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/Add_layer_to_lambda.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/Add_layer_to_lambda.png -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/AppFlow_Connection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/AppFlow_Connection.png -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/AppFlow_Create_SF_Cconnection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/AppFlow_Create_SF_Cconnection.png -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/AppFlow_Dest_Flow_Trigger.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/AppFlow_Dest_Flow_Trigger.png -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/AppFlow_Mapping.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/AppFlow_Mapping.png -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/AppFlow_SF_Allow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/AppFlow_SF_Allow.png -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/AppFlow_SF_Conn_Created.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/AppFlow_SF_Conn_Created.png -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/AppFlow_SF_Conn_Details.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/AppFlow_SF_Conn_Details.png -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/AppFlow_Source_Details.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/AppFlow_Source_Details.png -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/AppFlow_Specify_Flow_Details.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/AppFlow_Specify_Flow_Details.png -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/AppFlow_click_CF.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/AppFlow_click_CF.png -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/AppFlow_flow_create.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/AppFlow_flow_create.png -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/Glue_Crawler.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/Glue_Crawler.png -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/Glue_S3_Connection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/Glue_S3_Connection.png -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/Lambda_Admin_Role.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/Lambda_Admin_Role.png -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/Lambda_Layer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/Lambda_Layer.png -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/Lambda_Upload_Zip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/Lambda_Upload_Zip.png -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/Lambda_s3_trigger.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/Lambda_s3_trigger.png -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/Lambda_with_layer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/Lambda_with_layer.png -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/Lamnda_crawl_destination.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/Lamnda_crawl_destination.png -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/Redshift_Cluster.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/Redshift_Cluster.png -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/Redshift_Cluster_DB_NW.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/Redshift_Cluster_DB_NW.png -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/Redshift_Cluster_Subnet_Group.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/Redshift_Cluster_Subnet_Group.png -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/Redshift_IAM_Role.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/Redshift_IAM_Role.png -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/Redshift_Secret.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/Redshift_Secret.png -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/Redshift_Security_Group.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/Redshift_Security_Group.png -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/Run_appflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/Run_appflow.png -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/SF_AppFlow_Upsert_Redshift.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/SF_AppFlow_Upsert_Redshift.jpg -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/appflow_sfdev_data_lake.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/appflow_sfdev_data_lake.png -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/stage_lambda_destination.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/stage_lambda_destination.png -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/images/vpc_public_private_subnet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/images/vpc_public_private_subnet.png -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/lambda/functions/f_lambda_crawl_sfdev_appflow_account.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/lambda/functions/f_lambda_crawl_sfdev_appflow_account.zip -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/lambda/functions/f_lambda_move_to_stage_account.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/lambda/functions/f_lambda_move_to_stage_account.zip -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/lambda/functions/f_lambda_upsert_sfdev_appflow_account.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/lambda/functions/f_lambda_upsert_sfdev_appflow_account.zip -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/lambda/layers/f_lambda_call_redshift_data_api.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-appflow/549f19c914aabdf3946737fea189cb1f0985e971/sf-appflow-upsert-redshift-lambda/lambda/layers/f_lambda_call_redshift_data_api.zip -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/sqls/create_schema_ext_sfdev_appflow.sql: -------------------------------------------------------------------------------- 1 | /* Create external schema using Spectrum from the pre-defined Glue Catalog */ 2 | /* Replace XXXXXXXXXXXX with an actual AWS account number */ 3 | create external schema ext_sfdev_appflow 4 | from data catalog 5 | database 'sfdev-appflow' 6 | iam_role 'arn:aws:iam::XXXXXXXXXXXX:role/sfdev-appflow-redshift' 7 | create external database if not exists; -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/sqls/create_stage_target_schema.sql: -------------------------------------------------------------------------------- 1 | /* Create target schema */ 2 | create schema if not exists tgt_sfdev_appflow; 3 | 4 | /* Create stage schema */ 5 | create schema if not exists stg_sfdev_appflow; -------------------------------------------------------------------------------- /sf-appflow-upsert-redshift-lambda/sqls/create_stage_target_table.sql: -------------------------------------------------------------------------------- 1 | /* Create target table */ 2 | create table tgt_sfdev_appflow.account( 3 | id varchar, 4 | isdeleted boolean, 5 | name varchar, 6 | type varchar, 7 | billingstreet varchar, 8 | billingcity varchar, 9 | billingstate varchar, 10 | billingpostalcode integer, 11 | billingcountry varchar, 12 | phone varchar, 13 | fax varchar, 14 | accountnumber varchar, 15 | website varchar, 16 | industry varchar, 17 | tickersymbol varchar, 18 | description varchar, 19 | rating varchar, 20 | createddate timestamp, 21 | lastmodifieddate timestamp 22 | ); 23 | 24 | create table stg_sfdev_appflow.account (like tgt_sfdev_appflow.account); -------------------------------------------------------------------------------- /slack-appflow-sentiment/README.md: -------------------------------------------------------------------------------- 1 | ## How to run sentiment analysis on slack data using Amazon AppFlow with Amazon Comprehend 2 | 3 | [Amazon AppFlow](https://aws.amazon.com/appflow/) is a fully managed integration service that enables you to securely transfer data between Software-as-a-Service (SaaS) applications like Salesforce, Marketo, Slack, and ServiceNow, and AWS services like Amazon S3 and Amazon Redshift, in just a few clicks. 4 | 5 | Here I am going to demonstrate how you can extract conversations from slack using Amazon AppFlow to Amazon S3. Then I will call the [Amazon Comprehend](https://aws.amazon.com/comprehend/) service which is a natural language processing (NLP) service that uses machine learning to find insights and relationships in text to detect sentiments of the text. I will be using the [Amazon SageMaker](https://aws.amazon.com/sagemaker/) which is a fully managed service that provides every developer and data scientist with the ability to build, train, and deploy machine learning (ML) models quickly, to read the data from Amazon S3, call the Amazon Comprehend API to detect sentiments, and visualize it in Amazon SageMaker Notebook. 6 | 7 | 8 | ## Demonstration Video 9 | [How to run sentiment analysis on slack data using Amazon AppFlow with Amazon Comprehend](https://youtu.be/fCHkIwbcRtg) 10 | 11 | ## SageMaker Notebook 12 | * [Jupyter Notebook Viewer](https://nbviewer.jupyter.org/github/aws-samples/amazon-appflow/blob/master/slack-appflow-sentiment/notebooks/slack-sentiment.ipynb) 13 | * Notebook Link: https://github.com/aws-samples/amazon-appflow/blob/master/slack-appflow-sentiment/notebooks/slack-sentiment.ipynb 14 | -------------------------------------------------------------------------------- /slack-appflow-sentiment/notebooks/slack-sentiment.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Import libraries" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import boto3\n", 17 | "import json\n", 18 | "import pandas as pd" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "Installing file system interface for s3" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "!pip install s3fs" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "Set up the bucket name and key by entering the\n", 42 | "bucket_name=*enter your bucket name*,\n", 43 | "and *in_key_name=prefix/path/file*" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "bucket_name=''\n", 53 | "in_key_name='//'" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "Reading the file content from s3" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "s3 = boto3.resource('s3')\n", 70 | "obj = s3.Object(bucket_name,in_key_name)\n", 71 | "text=obj.get()['Body'].read().decode('utf-8')" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "Calling Comprehend API detect_sentiment for sentiment analysis" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "client = boto3.client('comprehend')\n", 88 | "lst=[]\n", 89 | "for line in text.splitlines():\n", 90 | " line_dict=json.loads(line)\n", 91 | " line_dict['Sentiment']=client.detect_sentiment(Text=line_dict['text'],LanguageCode='en')['Sentiment']\n", 92 | " lst.append(json.dumps(line_dict))\n", 93 | "joined_lines='\\n'.join(lst)" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "slack_df = pd.read_json(joined_lines,lines=True)" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "slack_df.head()" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "slack_df[['text','Sentiment']]" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "slack_df[['text','Sentiment']].groupby(['Sentiment']).agg(['count'])" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "slack_df[['text','Sentiment']].groupby(['Sentiment']).agg(['count']).plot(kind='pie',subplots=True)" 139 | ] 140 | } 141 | ], 142 | "metadata": { 143 | "instance_type": "ml.t3.medium", 144 | "kernelspec": { 145 | "display_name": "Python 3 (Data Science)", 146 | "language": "python", 147 | "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/datascience-1.0" 148 | }, 149 | "language_info": { 150 | "codemirror_mode": { 151 | "name": "ipython", 152 | "version": 3 153 | }, 154 | "file_extension": ".py", 155 | "mimetype": "text/x-python", 156 | "name": "python", 157 | "nbconvert_exporter": "python", 158 | "pygments_lexer": "ipython3", 159 | "version": "3.7.6" 160 | } 161 | }, 162 | "nbformat": 4, 163 | "nbformat_minor": 4 164 | } 165 | --------------------------------------------------------------------------------