├── .gitignore ├── terraform ├── .gitignore ├── plan.out ├── outputs.tf ├── terraform_backend.tf.template ├── terraform.tfvars.template ├── main.tf ├── template │ └── sagemaker_instance_init.sh ├── steps.txt ├── kinesis.tf ├── s3_kinesis.tf ├── cloudwatch_event.tf ├── lambda.tf ├── sagemaker.tf ├── variables.tf ├── s3_sagemaker.tf ├── s3_function.tf ├── s3_lambda.tf ├── iam_kinesis.tf ├── iam_lambda.tf ├── iam_sagemaker.tf ├── plan.txt └── terraform.tfstate.backup ├── .github └── PULL_REQUEST_TEMPLATE.md ├── cloudformation ├── run-unit-tests.sh ├── resources.txt ├── build-s3-dist.sh └── fraud-detection-using-machine-learning.template ├── README.md ├── source ├── fraud_detection │ └── index.py └── notebooks │ └── sagemaker_fraud_detection.ipynb └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | dist/ 2 | -------------------------------------------------------------------------------- /terraform/.gitignore: -------------------------------------------------------------------------------- 1 | .terraform/ 2 | terraform.tfvars 3 | tfplan 4 | terraform.tfstate 5 | terraform_backend.tf -------------------------------------------------------------------------------- /terraform/plan.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qtangs/tf-fraud-detection-using-machine-learning/HEAD/terraform/plan.out -------------------------------------------------------------------------------- /terraform/outputs.tf: -------------------------------------------------------------------------------- 1 | output "basic_notebook_instance_id" { 2 | value = "${aws_sagemaker_notebook_instance.basic.id}" 3 | } 4 | 5 | -------------------------------------------------------------------------------- /terraform/terraform_backend.tf.template: -------------------------------------------------------------------------------- 1 | terraform { 2 | backend "s3" { 3 | bucket = "" 4 | key = "fraud-detection/terraform.tfstate" 5 | region = "" 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /terraform/terraform.tfvars.template: -------------------------------------------------------------------------------- 1 | aws_region="" 2 | aws_profile="" 3 | function_bucket_name="" 4 | function_version="" 5 | s3_bucket_name_1="" 6 | s3_bucket_name_2="" -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | *Issue #, if available:* 2 | 3 | *Description of changes:* 4 | 5 | 6 | By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. 7 | -------------------------------------------------------------------------------- /terraform/main.tf: -------------------------------------------------------------------------------- 1 | provider "aws" { 2 | region = var.aws_region 3 | profile = var.aws_profile 4 | version = "~> 2.21" 5 | } 6 | 7 | provider "template" { 8 | version = "~> 2.1" 9 | } 10 | 11 | provider "archive" { 12 | version = "~> 1.2" 13 | } 14 | 15 | data "aws_caller_identity" "current" {} 16 | -------------------------------------------------------------------------------- /terraform/template/sagemaker_instance_init.sh: -------------------------------------------------------------------------------- 1 | cd /home/ec2-user/SageMaker 2 | aws s3 cp s3://${function_bucket_name}-${aws_region}/fraud-detection-using-machine-learning/${function_version}/notebooks/sagemaker_fraud_detection.ipynb . 3 | sed -i 's/fraud-detection-end-to-end-demo/${s3_bucket_name_1}/g' sagemaker_fraud_detection.ipynb -------------------------------------------------------------------------------- /cloudformation/run-unit-tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script should be run from the repo's deployment directory 4 | # cd deployment 5 | # ./run-unit-tests.sh 6 | 7 | # Run unit tests 8 | echo "Running unit tests" 9 | echo "cd ../source" 10 | cd ../source 11 | echo "No unit tests to run, so sad ..." 12 | echo "Completed unit tests" 13 | -------------------------------------------------------------------------------- /terraform/steps.txt: -------------------------------------------------------------------------------- 1 | 2 | terraform_backend.tf.template -> terraform_backend.tf 3 | terraform.tfvars.template -> terraform.tfvars 4 | 5 | export AWS_PROFILE= 6 | 7 | terraform init 8 | terraform validate 9 | terraform plan -out=tfplan 10 | terraform apply --auto-approve tfplan 11 | 12 | 13 | 14 | terraform plan -destroy -out=tfplan 15 | terraform apply tfplan 16 | -------------------------------------------------------------------------------- /terraform/kinesis.tf: -------------------------------------------------------------------------------- 1 | resource "aws_kinesis_firehose_delivery_stream" "fraud_detection_firehose_stream" { 2 | name = "fraud-detection-firehose-stream" 3 | destination = "s3" 4 | 5 | s3_configuration { 6 | bucket_arn = aws_s3_bucket.s3_bucket_2.arn 7 | prefix = var.kinesis_firehose_prefix 8 | buffer_interval = 60 9 | buffer_size = 100 10 | compression_format = "GZIP" 11 | role_arn = aws_iam_role.fraud_detection_firehose_role.arn 12 | } 13 | 14 | tags = { 15 | Group = var.default_resource_group 16 | CreatedBy = var.default_created_by 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /terraform/s3_kinesis.tf: -------------------------------------------------------------------------------- 1 | resource "aws_s3_bucket" "s3_bucket_2" { 2 | bucket = "${var.s3_bucket_name_2}-${var.aws_region}" 3 | acl = "private" 4 | force_destroy = true # delete all data from this bucket before destroy 5 | 6 | server_side_encryption_configuration { 7 | rule { 8 | apply_server_side_encryption_by_default { 9 | sse_algorithm = "AES256" 10 | } 11 | } 12 | } 13 | 14 | tags = { 15 | Description = "Bucket for storing processed events for visualization features." 16 | Group = "${var.default_resource_group}" 17 | CreatedBy = "${var.default_created_by}" 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /terraform/cloudwatch_event.tf: -------------------------------------------------------------------------------- 1 | resource "aws_cloudwatch_event_rule" "fraud_detection_scheduled_rule" { 2 | name = "fraud-detection-scheduled-rule" 3 | description = "ScheduledRule" 4 | schedule_expression = "rate(1 minute)" 5 | is_enabled = false 6 | 7 | tags = { 8 | Group = "${var.default_resource_group}" 9 | CreatedBy = "${var.default_created_by}" 10 | } 11 | } 12 | 13 | resource "aws_cloudwatch_event_target" "fraud_detection" { 14 | target_id = "TargetFunctionV1" 15 | rule = aws_cloudwatch_event_rule.fraud_detection_scheduled_rule.name 16 | arn = aws_lambda_function.fraud_detection_event_processor.arn 17 | } 18 | 19 | resource "aws_lambda_permission" "fraud_detection" { 20 | action = "lambda:InvokeFunction" 21 | function_name = aws_lambda_function.fraud_detection_event_processor.function_name 22 | principal = "events.amazonaws.com" 23 | source_arn = aws_cloudwatch_event_rule.fraud_detection_scheduled_rule.arn 24 | } 25 | -------------------------------------------------------------------------------- /terraform/lambda.tf: -------------------------------------------------------------------------------- 1 | resource "aws_lambda_function" "fraud_detection_event_processor" { 2 | handler = "index.lambda_handler" 3 | function_name = "fraud-detection-event-processor" 4 | role = aws_iam_role.fraud_detection_lambda_role.arn 5 | s3_bucket = aws_s3_bucket.fraud_detection_function_bucket.id 6 | s3_key = "fraud-detection-using-machine-learning/${var.function_version}/fraud_detection.zip" 7 | 8 | # The filebase64sha256() function is available in Terraform 0.11.12 and later 9 | # For Terraform 0.11.11 and earlier, use the base64sha256() function and the file() function: 10 | # source_code_hash = "${base64sha256(file("lambda_function_payload.zip"))}" 11 | source_code_hash = filebase64sha256(data.archive_file.fraud_detection_archive.output_path) 12 | 13 | runtime = "python3.6" 14 | 15 | tags = { 16 | Group = var.default_resource_group 17 | CreatedBy = var.default_created_by 18 | } 19 | 20 | depends_on = [aws_s3_bucket_object.s3_fraud_detection_archive] 21 | } 22 | -------------------------------------------------------------------------------- /terraform/sagemaker.tf: -------------------------------------------------------------------------------- 1 | resource "aws_sagemaker_notebook_instance" "basic" { 2 | name = "FraudDetectionNotebookInstance" 3 | role_arn = aws_iam_role.sm_notebook_instance_role.arn 4 | instance_type = "ml.t2.medium" 5 | lifecycle_config_name = aws_sagemaker_notebook_instance_lifecycle_configuration.basic_lifecycle.name 6 | 7 | tags = { 8 | Group = var.default_resource_group 9 | CreatedBy = var.default_created_by 10 | } 11 | 12 | depends_on = [aws_s3_bucket_object.s3_fraud_detection_notebook ] 13 | } 14 | 15 | data "template_file" "instance_init" { 16 | template = file("${path.module}/template/sagemaker_instance_init.sh") 17 | 18 | vars = { 19 | s3_bucket_name_1 = aws_s3_bucket.s3_bucket_1.id 20 | aws_region = var.aws_region 21 | function_bucket_name = var.function_bucket_name 22 | function_version = var.function_version 23 | } 24 | } 25 | 26 | resource "aws_sagemaker_notebook_instance_lifecycle_configuration" "basic_lifecycle" { 27 | name = "BasicNotebookInstanceLifecycleConfig" 28 | on_start = base64encode(data.template_file.instance_init.rendered) 29 | 30 | depends_on = [aws_s3_bucket.s3_bucket_1] 31 | } 32 | -------------------------------------------------------------------------------- /terraform/variables.tf: -------------------------------------------------------------------------------- 1 | # Default Tags 2 | variable "default_resource_group" { 3 | description = "Default value to be used in resources' Group tag." 4 | default = "fraud-detection" 5 | } 6 | 7 | variable "default_created_by" { 8 | description = "Default value to be used in resources' CreatedBy tag." 9 | default = "terraform" 10 | } 11 | 12 | # AWS Settings 13 | variable "aws_region" { 14 | default = "eu-west-2" 15 | } 16 | 17 | variable "aws_profile" { 18 | default = "default" 19 | } 20 | 21 | # Parameters 22 | variable "function_bucket_name" { 23 | description = "Name of the S3 bucket hosting the code for fraud_detection Lambda function." 24 | } 25 | 26 | variable "function_version" { 27 | description = "Version of the fraud_detection Lambda function to use." 28 | } 29 | 30 | variable "s3_bucket_name_1" { 31 | description = "New bucket for storing the Amazon SageMaker model and training data." 32 | } 33 | 34 | variable "s3_bucket_name_2" { 35 | description = "New bucket for storing processed events for visualization features." 36 | } 37 | 38 | variable "kinesis_firehose_prefix" { 39 | description = "Kinesis Firehose prefix for delivery of processed events." 40 | default = "fraud-detection/firehose/" 41 | } 42 | -------------------------------------------------------------------------------- /terraform/s3_sagemaker.tf: -------------------------------------------------------------------------------- 1 | resource "aws_s3_bucket" "s3_bucket_1" { 2 | bucket = "${var.s3_bucket_name_1}-${var.aws_region}" 3 | acl = "private" 4 | force_destroy = true # delete all data from this bucket before destroy 5 | 6 | server_side_encryption_configuration { 7 | rule { 8 | apply_server_side_encryption_by_default { 9 | sse_algorithm = "AES256" 10 | } 11 | } 12 | } 13 | 14 | tags = { 15 | Description = "Bucket for storing the Amazon SageMaker model and training data." 16 | Group = "${var.default_resource_group}" 17 | CreatedBy = "${var.default_created_by}" 18 | } 19 | } 20 | 21 | resource "aws_s3_bucket_object" "s3_fraud_detection_notebook" { 22 | bucket = aws_s3_bucket.fraud_detection_function_bucket.id 23 | key = "fraud-detection-using-machine-learning/${var.function_version}/notebooks/sagemaker_fraud_detection.ipynb" 24 | source = "${path.module}/../source/notebooks/sagemaker_fraud_detection.ipynb" 25 | 26 | # The filemd5() function is available in Terraform 0.11.12 and later 27 | # For Terraform 0.11.11 and earlier, use the md5() function and the file() function: 28 | # etag = "${md5(file("path/to/file"))}" 29 | etag = filemd5("${path.module}/../source/notebooks/sagemaker_fraud_detection.ipynb") 30 | } 31 | -------------------------------------------------------------------------------- /terraform/s3_function.tf: -------------------------------------------------------------------------------- 1 | resource "aws_s3_bucket" "fraud_detection_function_bucket" { 2 | bucket = "${var.function_bucket_name}-${var.aws_region}" 3 | acl = "private" 4 | 5 | server_side_encryption_configuration { 6 | rule { 7 | apply_server_side_encryption_by_default { 8 | sse_algorithm = "AES256" 9 | } 10 | } 11 | } 12 | 13 | tags = { 14 | Description = "Bucket hosting the code for fraud_detection Lambda function." 15 | Group = var.default_resource_group 16 | CreatedBy = var.default_created_by 17 | } 18 | } 19 | 20 | data "archive_file" "fraud_detection_archive" { 21 | type = "zip" 22 | source_file = "${path.module}/../source/fraud_detection/index.py" 23 | output_path = "${path.module}/../dist/fraud_detection.zip" 24 | } 25 | 26 | resource "aws_s3_bucket_object" "s3_fraud_detection_archive" { 27 | bucket = aws_s3_bucket.fraud_detection_function_bucket.id 28 | key = "fraud-detection-using-machine-learning/${var.function_version}/fraud_detection.zip" 29 | source = data.archive_file.fraud_detection_archive.output_path 30 | 31 | # The filemd5() function is available in Terraform 0.11.12 and later 32 | # For Terraform 0.11.11 and earlier, use the md5() function and the file() function: 33 | # etag = "${md5(file("path/to/file"))}" 34 | etag = filemd5(data.archive_file.fraud_detection_archive.output_path) # use md5 of index.py to detect changes in the function 35 | } 36 | -------------------------------------------------------------------------------- /terraform/s3_lambda.tf: -------------------------------------------------------------------------------- 1 | resource "aws_s3_bucket" "fraud_detection_function_bucket" { 2 | bucket = "${var.function_bucket_name}-${var.aws_region}" 3 | acl = "private" 4 | 5 | server_side_encryption_configuration { 6 | rule { 7 | apply_server_side_encryption_by_default { 8 | sse_algorithm = "AES256" 9 | } 10 | } 11 | } 12 | 13 | tags = { 14 | Description = "Bucket hosting the code for fraud_detection Lambda function." 15 | Group = var.default_resource_group 16 | CreatedBy = var.default_created_by 17 | } 18 | } 19 | 20 | data "archive_file" "fraud_detection_archive" { 21 | type = "zip" 22 | source_file = "${path.module}/../source/fraud_detection/index.py" 23 | output_path = "${path.module}/../dist/fraud_detection.zip" 24 | } 25 | 26 | resource "aws_s3_bucket_object" "s3_fraud_detection_archive" { 27 | bucket = aws_s3_bucket.fraud_detection_function_bucket.id 28 | key = "fraud-detection-using-machine-learning/${var.function_version}/fraud_detection.zip" 29 | source = data.archive_file.fraud_detection_archive.output_path 30 | 31 | # The filemd5() function is available in Terraform 0.11.12 and later 32 | # For Terraform 0.11.11 and earlier, use the md5() function and the file() function: 33 | # etag = "${md5(file("path/to/file"))}" 34 | etag = filemd5(data.archive_file.fraud_detection_archive.output_path) # use md5 of index.py to detect changes in the function 35 | } 36 | -------------------------------------------------------------------------------- /cloudformation/resources.txt: -------------------------------------------------------------------------------- 1 | Logical ID Physical ID Type Status Status reason 2 | BasicNotebookInstance arn:aws:sagemaker:us-east-1:989069432549:notebook-instance/frauddetectionnotebookinstance AWS::SageMaker::NotebookInstance CREATE_COMPLETE - 3 | BasicNotebookInstanceLifecycleConfig arn:aws:sagemaker:us-east-1:989069432549:notebook-instance-lifecycle-config/basicnotebookinstancelifecycleconfig-nbtnsfov4xei AWS::SageMaker::NotebookInstanceLifecycleConfig CREATE_COMPLETE - 4 | FirehoseDeliveryIAMPolicy aws-f-Fire-1OAVA6HNY7OCK AWS::IAM::Policy CREATE_COMPLETE - 5 | FirehoseDeliveryIAMRole aws-fraud-detection-using-FirehoseDeliveryIAMRole-AOTARKHYRERQ AWS::IAM::Role CREATE_COMPLETE - 6 | KinesisFirehoseDeliveryStream fraud-detection-firehose-stream AWS::KinesisFirehose::DeliveryStream CREATE_COMPLETE - 7 | LambdaExecutionRole aws-fraud-detection-using-mach-LambdaExecutionRole-1GQPB8RU28LAN AWS::IAM::Role CREATE_COMPLETE - 8 | LambdaFunction fraud-detection-event-processor AWS::Lambda::Function CREATE_COMPLETE - 9 | NotebookInstanceExecutionRole aws-fraud-detection-using-NotebookInstanceExecutio-XYJKAT5HT62K AWS::IAM::Role CREATE_COMPLETE - 10 | NotebookInstanceIAMPolicy aws-f-Note-1XD0QCDLFYVMY AWS::IAM::Policy CREATE_COMPLETE - 11 | PermissionForEventsToInvokeLambda aws-fraud-detection-using-machine-learning-PermissionForEventsToInvokeLambda-1BYURKUXUSOPL AWS::Lambda::Permission CREATE_COMPLETE - 12 | S3Bucket1 aws-fraud-detection-model-data AWS::S3::Bucket CREATE_COMPLETE - 13 | S3Bucket2 aws-fraud-detection-processed-events AWS::S3::Bucket CREATE_COMPLETE - 14 | ScheduledRule aws-fraud-detection-using-machine-le-ScheduledRule-KMHPLMNKWI32 AWS::Events::Rule CREATE_COMPLETE - -------------------------------------------------------------------------------- /terraform/iam_kinesis.tf: -------------------------------------------------------------------------------- 1 | resource "aws_iam_role" "fraud_detection_firehose_role" { 2 | name = "fraud-detection-firehose-role" 3 | 4 | tags = { 5 | Group = "${var.default_resource_group}" 6 | CreatedBy = "${var.default_created_by}" 7 | } 8 | 9 | assume_role_policy = < 26 | 27 | terraform init 28 | terraform validate 29 | terraform plan -out=tfplan 30 | terraform apply --auto-approve tfplan 31 | ``` 32 | 33 | 34 | ## Clean up 35 | 36 | ``` 37 | terraform plan -destroy -out=tfplan 38 | terraform apply tfplan 39 | ``` 40 | 41 | ## Original source 42 | https://github.com/awslabs/fraud-detection-using-machine-learning 43 | 44 | Original CloudFormation script can be found at `cloudformation` folder (renamed from `deployment`). 45 | 46 | 47 | ## License 48 | 49 | This library is licensed under the Apache 2.0 License. 50 | -------------------------------------------------------------------------------- /terraform/iam_lambda.tf: -------------------------------------------------------------------------------- 1 | resource "aws_iam_role" "fraud_detection_lambda_role" { 2 | name = "fraud-detection-lambda-role" 3 | 4 | tags = { 5 | Group = "${var.default_resource_group}" 6 | CreatedBy = "${var.default_created_by}" 7 | } 8 | 9 | assume_role_policy = <