├── .github └── PULL_REQUEST_TEMPLATE.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── lab1-serveless-cloudfront-log-analysis ├── README.md ├── assets │ ├── alb-access-optimized.png │ ├── amazon-s3-create-bucket.png │ ├── amazon-s3.png │ ├── architecture-diagram.png │ ├── architecture-overview-all.png │ ├── assets.txt │ ├── athena-database.png │ ├── athena-table.png │ ├── cf-access-optimized.png │ ├── combine-schema.png │ ├── combined-logs-all.png │ ├── device-form-factor-chart.png │ ├── device-form-factor-visualize-2.png │ ├── device-form-factor-visualize.png │ ├── edge-to-origin-chart.png │ ├── edge-to-origin-filter-summary.png │ ├── edge-to-origin-filter.png │ ├── edge-to-origin-visualize.png │ ├── edge-to-origin-x-axis.png │ ├── glue-job-complete.png │ ├── lambda-edge.png │ ├── le-combined-logs.png │ ├── log-collection.png │ ├── origin-request-optimized.png │ ├── product-category-chart.png │ ├── product-category-filter.png │ ├── product-category-sort.png │ ├── quicksight-account-create.png │ ├── quicksight-athena-ds.png │ ├── quicksight-datasource.png │ ├── quicksight-edition.png │ ├── quicksight-manage.png │ ├── quicksight-new-field.png │ ├── quicksight-permission.png │ ├── quicksight-region-selection.png │ ├── quicksight-s3-bucket-selection.png │ ├── quicksight-signup.png │ ├── quicksight-status-code-filter-summary.png │ ├── quicksight-status-code-pop-filter.png │ ├── quicksight-status-code-pop.png │ ├── quicksight-status-code-visualize-1.png │ ├── quicksight-status-code-visualize-2.png │ ├── quicksight-table-selection.png │ ├── quicksight-visualization-all.png │ ├── time-taken-chart.png │ ├── time-taken-filter-summary.png │ ├── time-taken-visualize-2.png │ ├── time-taken-visualize.png │ └── viewer-request-optimized.png ├── lelogconverter.py ├── log-combiner-glue-script.py ├── originRequest-Lambda │ └── index.js ├── sample-logs │ └── raw-logs │ │ ├── sample-alb-logs.gz │ │ ├── sample-cloudfront-access-logs.gz │ │ ├── sample-lambda-at-edge-origin-request-logs.gz │ │ └── sample-lambda-at-edge-viewer-request.gz └── viewerRequest-Lambda │ └── index.js └── lab2-elk-cloudfront-log-analysis ├── README.md ├── assets ├── Cf1.png ├── Cf2.png ├── Cf3.png ├── Cf4.png ├── Cf5png.png ├── architecture.png ├── asset.txt ├── cleanup1.png ├── esDomain1.png ├── esDomain2.png ├── esDomain3.png ├── esIndices1.png ├── esIndices2.png ├── esIndices3.png ├── keyPair1.png ├── keyPair2.png ├── keyPair3.png ├── kibana1.png ├── kibana10.png ├── kibana11.png ├── kibana12.png ├── kibana13.png ├── kibana14.png ├── kibana15.png ├── kibana16.png ├── kibana17.png ├── kibana18.png ├── kibana2.png ├── kibana3.png ├── kibana4.png ├── kibana5.png ├── kibana6.png ├── kibana7.png ├── kibana8.png ├── kibana9.png ├── s3bucket1.png └── s3bucket2.png ├── config ├── CloudFront-Analysis-ELK-Lab.json ├── cloudfront.conf ├── indextemplate.json └── lab2-nginx.conf ├── kibanageorequests.json └── kibanamaxlatencypercity.json /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | *Issue #, if available:* 2 | 3 | *Description of changes:* 4 | 5 | 6 | By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. 7 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check [existing open](https://github.com/aws-samples/amazon-cloudfront-log-analysis/issues), or [recently closed](https://github.com/aws-samples/amazon-cloudfront-log-analysis/issues?utf8=%E2%9C%93&q=is%3Aissue%20is%3Aclosed%20), issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *master* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/aws-samples/amazon-cloudfront-log-analysis/labels/help%20wanted) issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](https://github.com/aws-samples/amazon-cloudfront-log-analysis/blob/master/LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | 61 | We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes. 62 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this 4 | software and associated documentation files (the "Software"), to deal in the Software 5 | without restriction, including without limitation the rights to use, copy, modify, 6 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 7 | permit persons to whom the Software is furnished to do so. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 10 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 11 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 12 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 13 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 14 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Analyze & Visualize Amazon CloudFront and Lambda@Edge Logs to Improve Customer Experience on your Website. 2 | 3 | ## Overview 4 | 5 | Nowadays, web servers are often fronted by a global content delivery network, such as Amazon CloudFront, to accelerate delivery of websites, APIs, media content, and other web assets. In this hands-on-workshop, learn to improve website availability, optimize content based on devices, browser and user demographics, identify and analyze CDN usage patterns, and perform end-to-end debugging by correlating logs from various points in a request-response pipeline. Build an end-to-end serverless solution to analyze Amazon CloudFront logs using AWS Glue and Amazon Athena, generate visualization to derive deeper insights using Amazon QuickSight, and correlate with other logs such Lambda@Edge logs, ALB logs to provide finer debugging experiences. You will also learn how to use the popular ELK(Elasticsearch,Logstash,Kibana) solution for geospatial visualization of CloudFront logs. Discuss how you can extend the pipeline you just built to generate deeper insights needed to improve the overall experience for your users. 6 | 7 | ## AWS Console 8 | 9 | ### Verifying your region in the AWS Management Console 10 | 11 | With Amazon Ec2, you can place instances in multiple locations. Amazon EC2 locations are composed of regions that contain more that one Availability Zones. Regions are dispersed and located in separate geographic areas (US, EU, etc.). Availability Zones are distinct locations within a region. They are are engineered to be isolated from failures in other Availability Zones and to provide inexpensive, low-latency network connectivity to other Availability Zones in the same region. 12 | 13 | By launching instances in separate regions, you can design your application to be closer to specific customers or to meet legal or other requirements. By launching instances in separate Availability Zones, you can protect your application from localized regional failures. 14 | 15 | ### Verify your Region 16 | 17 | The AWS region name is always listed in the upper-right corner of the AWS Management Console, in the navigation bar. 18 | 19 | * Make a note of the AWS region name, for example, for this lab you will need to choose the **EU West-1 (Ireland)** region. 20 | * Use the chart below to determine the region code. Choose **eu-west-1 for this lab.** 21 | 22 | | Region Name |Region Code| 23 | |---|---| 24 | |US East (Northern Virginia) Region|us-east-1 | 25 | |US West (Oregon) Region|us-west-2| 26 | |Asia Pacific (Tokyo) Region|ap-northeast-1| 27 | |Asia Pacific (Seoul) Region|ap-northeast-2| 28 | |Asia Pacific (Singapore) Region|ap-southeast-1| 29 | |Asia Pacific (Sydney) Region|ap-southeast-2| 30 | |EU (Ireland) Region|eu-west-1| 31 | |EU (Frankfurt) Region|eu-central-1| 32 | 33 | --- 34 | ## Labs 35 | 36 | ### Pre-requisites 37 | You should have active AWS account with Administrator IAM role 38 | 39 | |Lab|Name| 40 | |---|----| 41 | |Lab 1|[Serverless Amazon CloudFront Log Analysis Pipeline](./lab1-serveless-cloudfront-log-analysis)| 42 | |Lab 2|[Amazon CloudFront Log Analysis using ELK](./lab2-elk-cloudfront-log-analysis)| 43 | 44 | ## Deploy Solution 45 | We recommend to deploy the solution for Lab2 using CloudFormation template while we go through the presentation. This is to save your time for Lab #2. Please complete the following 2 steps to deploy solution. The CloudFormation will take about 10 minutes to complete. 46 | - [Create a Key Pair for EC2 Instances](https://github.com/aws-samples/amazon-cloudfront-log-analysis/tree/master/lab2-elk-cloudfront-log-analysis#create-a-key-pair-for-ec2-instances) 47 | - [Deploy Solution](https://github.com/aws-samples/amazon-cloudfront-log-analysis/tree/master/lab2-elk-cloudfront-log-analysis#deploy-solution) 48 | 49 | ## License Summary 50 | 51 | This sample code is made available under a modified MIT license. See the LICENSE file. 52 | -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/README.md: -------------------------------------------------------------------------------- 1 | # LAB 1: Serverless Amazon CloudFront Log Analysis Pipeline 2 | 3 | 4 | * [Overview](#overview) 5 | * [Log collection](#log-collection) 6 | * [Lab Overview](#lab-overview) 7 | * [Lambda @ Edge](#lambda--edge) 8 | * [Pre-requisites](#pre-requisites) 9 | * [Create Amazon S3 Bucket](#create-amazon-s3-bucket) 10 | * [Creating Glue Data Catalog Database and Table using Amazon Athena](#creating-glue-data-catalog-database-and-table-using-amazon-athena) 11 | * [Create Glue Data Catalog Database using Amazon Athena](#create-glue-data-catalog-database-using-amazon-athena) 12 | * [Create Glue Data Catalog for CloudFront Access Logs in optimized Parquet Format](#create-glue-data-catalog-for-cloudfront-access-logs-in-optimized-parquet-format) 13 | * [Create Glue Data Catalog for Application Load Balancer(ALB) Access Logs in optimized Parquet Format](#create-glue-data-catalog-for-application-load-balanceralb-access-logs-in-optimized-parquet-format) 14 | * [Create Glue Data Catalog for Lambda@Edge Logs - Viewer Request in optimized Parquet Format](#create-glue-data-catalog-for-lambdaedge-logs---viewer-request-in-optimized-parquet-format) 15 | * [Create Glue Data Catalog for Lambda@Edge Logs - Origin Request in optimized Parquet Format](#create-glue-data-catalog-for-lambdaedge-logs---origin-request-in-optimized-parquet-format) 16 | * [Combine the logs using an AWS Glue ETL Job](#combine-the-logs-using-an-aws-glue-elt-job) 17 | * [Create AWS IAM Role](#create-aws-iam-role) 18 | * [Create AWS Glue ETL Job](#create-aws-glue-etl-job) 19 | * [Combine the logs using an AWS Glue ETL Job](#combine-the-logs-using-an-aws-glue-etl-job) 20 | * [(Optional)Create AWS Glue Data Catalog for the combined Lamabda@Eddge logs using Amazon Athena](#optional-create-aws-glue-data-catalog-for-the-combined-lamabdaeddge-logs-using-amazon-athena) 21 | * [Create AWS Glue Data Catalog for the combined logs using Amazon Athena](#create-aws-glue-data-catalog-for-the-combined-logs-using-amazon-athena) 22 | * [Visualization using Amazon QuickSight](#visualization-using-amazon-quicksight) 23 | * [Signing Up for Amazon QuickSight Standard Edition](#signing-up-for-amazon-quicksight-standard-edition) 24 | * [Configure Amazon S3 bucket Permission](#configure-amazon-s3-bucket-permission) 25 | * [Configuring Amazon QuickSight to use Amazon Athena as data source](#configuring-amazon-quicksight-to-use-amazon-athena-as-data-source) 26 | * [Generating new calculated fields in Amazon QuickSight](#generating-new-calculated-fields-in-amazon-quickSight) 27 | * [Create new calculated fields “EdgeToOriginTimeTaken” in Amazon QuickSight](#create-new-calculated-fields-edgetoorigintimetaken-in-amazon-quicksight) 28 | * [Create new calculated fields "HourOfDay" in Amazon QuickSight](#create-new-calculated-fields-hourofday-in-amazon-quicksight) 29 | * [Create new calculated fields "TotalTimeTakenAtALB" in Amazon QuickSight](#create-new-calculated-fields-totaltimetakenatalb-in-amazon-quicksight) 30 | * [Generate Visualization using Amazon QuickSight](#visualization-using-amazon-quicksight) 31 | * [Generate visualization to status code by edge location](#generate-visualization-to-status-code-by-edge-location) 32 | * [(Optional)Generate visualization to status code by URI](#optional-generate-visualization-to-status-code-by-uri) 33 | * [Generate visualization to show hourly average time taken between edge and origin by country where the end user request originated from](#generate-visualization-to-show-hourly-average-time-taken-between-edge-and-origin-by-country-where-the-end-user-request-originated-from) 34 | * [Generate visualization to show hourly average time taken (total Vs. edge to origin Vs. server-side processing) by country where the end user request originated from](#generate-visualization-to-show-hourly-average-time-taken-total-vs-edge-to-origin-vs-server-side-processing-by-country-where-the-end-user-request-originated-from) 35 | * [(Optional)Generate visualization to show hourly average time taken (total Vs. edge to origin V.s server-side processing) by country where the end user request originated from for a different viewer country](#optional-generate-visualization-to-show-hourly-average-time-taken-total-vs-edge-to-origin-vs-server-side-processing-by-country-where-the-end-user-request-originated-from-for-a-different-viewer-country) 36 | * [Generate Visualization to show product category request by country](#generate-visualization-to-show-product-category-request-by-country) 37 | * [(Optional)Generate visualization to show device form factor ratio](#optional-generate-visualization-to-show-device-form-factor-ratio) 38 | * [(Optional)Generate visualization to show device form factor ration by viewer country](#optional-generate-visualization-to-show-device-form-factor-ration-by-viewer-country) 39 | 40 | --- 41 | --- 42 | 43 | ## Overview 44 | 45 | ### Log collection 46 | 47 | ![log-collection.png](./assets/log-collection.png) 48 | 49 | As part of the log data generation generation, the following four different logs have been collected 50 | 51 | |Log Name|Raw Log Location|Format|Log Entries| 52 | |---|----|---|-----| 53 | |Viewer request triggered Lambda@Edge logs|aws s3 ls s3://eu-west-1.data-analytics/raw/lelogs/viewer-request/|JSON|{executionregion, requestid, distributionid, distributionname, eventtype, requestdata, customtraceid, useragentstring}| 54 | |Amazon CloudFront access logs|aws s3 ls s3://eu-west-1.data-analytics/raw/cf-accesslogs/|CSV|[Web Distribution Log File Format](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/AccessLogs.html#BasicDistributionFileFormat)| 55 | |Origin request triggered Lambda@Edge logs|aws s3 ls s3://eu-west-1.data-analytics/raw/lelogs/origin-request/|JSON|{executionregion, requestid, distributionid, distributionname, eventtype, requestdata, customtraceid, viewercountry, deviceformfactor}| 56 | |Application Load Balancer(ALB) logs|aws s3 ls s3://eu-west-1.data-analytics/raw/lblogs/|JSON|[Access Log Entries](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-access-logs.html#access-log-entry-format)| 57 | 58 | --- 59 | 60 | ### Lab Overview 61 | 62 | ![architecture-overview-all.png](./assets/architecture-overview-all.png) 63 | 64 | In this lab, you are going to build a serverless architecture to combine all the four logs - 1) Viewer request triggered Lambda@Edge logs, 2) Origin request triggered Lambda@Edge logs, 3) Amazon CloudFront access logs and 4) Application Load Balancer(ALB) logs using AWS Glue and then analyze the combined logs using Amazon Athena and visualize in Amazon QuickSight. The logs you are going to use is already converter from raw logs in CSV or JSON format to optimized logs into partition and compresses parquet format. 65 | 66 | |Log Name|Partition|Conversion Script|Github|Optimized Log Location| 67 | |---|----|---|---|---| 68 | |Viewer request triggered Lambda@Edge logs|year, month, day, hour|[lelogconverter.py](./lelogconverter.py)|-|aws s3 ls s3://eu-west-1.data-analytics/cflogworkshop/optimized/lelogs/viewer-request/| 69 | |Amazon CloudFront access logs|year, month, day|[sample_cloudfront_job.py](https://github.com/awslabs/athena-glue-service-logs/blob/master/scripts/sample_cloudfront_job.py)|[Link](https://github.com/awslabs/athena-glue-service-logs)|aws s3 ls s3://us-east-1.data-analytics/cflogworkshop/optimized/cf-accesslogs/| 70 | |Origin request triggered Lambda@Edge logs|year, month, day, hour|[lelogconverter.py](./lelogconverter.py)|-|aws s3 ls s3://eu-west-1.data-analytics/cflogworkshop/optimized/lelogs/origin-request/| 71 | |Application Load Balancer(ALB) logs|region, year, month, day|[sample_alb_job.py](https://github.com/awslabs/athena-glue-service-logs/blob/master/scripts/sample_alb_job.py)|[Link](https://github.com/awslabs/athena-glue-service-logs)|aws s3 ls s3://eu-west-1.data-analytics/cflogworkshop/optimized/lblogs/| 72 | 73 | --- 74 | 75 | ### Lambda @ Edge 76 | 77 | ![lambda-edge.png](./assets/lambda-edge.png) 78 | 79 | |EventType|Script| 80 | |---|----| 81 | |Viewer Request|[index.js](./viewerRequest-Lambda/index.js)| 82 | |Origin Request|[index.js](./originRequest-Lambda/index.js)| 83 | 84 | --- 85 | --- 86 | 87 | ## Pre-requisites 88 | This module requires: 89 | - You should have active AWS account with Administrator IAM role. 90 | 91 | --- 92 | --- 93 | 94 | ## Create Amazon S3 Bucket 95 | 96 | In this section you will be creating an Amazon S3 bucket to store the combined (by joining Viewer request triggered Lambda@Edge logs, Origin request triggered Lambda@Edge logs, Amazon CloudFront access logs and Application Load Balancer(ALB) logs) and optimized logs written by the AWS Glue ETL job that you create and execute as part of this workshop. 97 | 98 | - Open the AWS Management console for Amazon S3 from [here](https://s3.console.aws.amazon.com/s3/home?region=eu-west-1) 99 | - On the S3 Dashboard, Click on **Create Bucket.** 100 | 101 | ![amazon-s3.png](./assets/amazon-s3.png) 102 | 103 | - In the **Create Bucket** pop-up page, input a unique **Bucket name**. Choose a large bucket name with many random characters and numbers (no spaces). You will need this Bucket name later in this exercise. 104 | - Select the region as **EU (Ireland)** 105 | - Click **Next** to navigate to next tab 106 | - In the **Configure Options** tab, leave all options as default 107 | - Click **Next** to navigate to next tab 108 | - In the **Set permissions** tag, leave all options as default 109 | - Click **Next** to navigate to next tab 110 | - In the **Review** tab, click on **Create Bucket** 111 | 112 | ![amazon-s3-create-bucket.png](./assets/amazon-s3-create-bucket.png) 113 | 114 | --- 115 | --- 116 | 117 | ## Creating Glue Data Catalog Database and Table using Amazon Athena 118 | 119 | In this section you will be creating an AWS Data Catalog Database along with the tables pointing to the optimized logs. These logs have been pre-generated as part of the workshop. You will be creating the following tables, loading the partitions into each of these tables, and previewing the fields. 120 | 121 | |Table Name|Log Name|Partition| 122 | |---|---|----| 123 | |lambdaedge_logs_viewer_request_optimized|Viewer request triggered Lambda@Edge logs|year, month, day, hour| 124 | |cf_access_optimized|Amazon CloudFront access logs|year, month, day| 125 | |lambdaedge_logs_origin_request_optimized|Origin request triggered Lambda@Edge logs|year, month, day, hour| 126 | |alb_access_optimized|Application Load Balancer(ALB) logs|region, year, month, day| 127 | 128 | The AWS Glue ETL job that will combine all the four logs will refer to metadata in AWS Glue data catalog to read the logs from Amazon S3. 129 | 130 | ### Create Glue Data Catalog Database using Amazon Athena 131 | 132 | - Open the AWS Management Console for Athena from [here](https://console.aws.amazon.com/athena/home). 133 | - If this is your first time visiting the AWS Management Console for Athena, you will get a Getting Started page. Choose Get Started to open the Query Editor. If this isn't your first time, the Athena Query Editor opens. 134 | - Make a note of the AWS region name, for example, for this lab you will need to choose the *EU (Ireland)* region. 135 | - In the *Athena Query Editor*, you will see a query pane with an example query. Now you can start entering your query in the query pane. 136 | - To create a database named reInvent2018_aws_service_logs, copy the following statement, and then choose Run Query: 137 | 138 | ```sql 139 | CREATE DATABASE IF NOT EXISTS reInvent2018_aws_service_logs 140 | ``` 141 | 142 | ![athena-database.png](./assets/athena-database.png) 143 | 144 | - Ensure *reInvent2018_aws_service_logs* appears in the DATABASE list on the Catalog dashboard 145 | 146 | --- 147 | 148 | ### Create Glue Data Catalog for CloudFront Access Logs in optimized Parquet Format 149 | 150 | - Ensure that current AWS region is **EU (Ireland)** region 151 | - Ensure *reInvent2018_aws_service_logs* is selected from the DATABASE list and then choose New Query. 152 | - In the query pane, copy the following statement to create a the *cf_access_optimized* table, and then choose **Run Query**: 153 | 154 | ```sql 155 | CREATE EXTERNAL TABLE IF NOT EXISTS reInvent2018_aws_service_logs.cf_access_optimized( 156 | time timestamp, 157 | location string, 158 | bytes bigint, 159 | requestip string, 160 | method string, 161 | host string, 162 | uri string, 163 | status int, 164 | referrer string, 165 | useragent string, 166 | querystring string, 167 | cookie string, 168 | resulttype string, 169 | requestid string, 170 | hostheader string, 171 | requestprotocol string, 172 | requestbytes bigint, 173 | timetaken double, 174 | xforwardedfor string, 175 | sslprotocol string, 176 | sslcipher string, 177 | responseresulttype string, 178 | httpversion string) 179 | PARTITIONED BY ( 180 | year string, 181 | month string, 182 | day string) 183 | ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' 184 | STORED AS PARQUET 185 | LOCATION 's3://us-east-1.data-analytics/cflogworkshop/optimized/cf-accesslogs' 186 | TBLPROPERTIES("parquet.compress"="SNAPPY") 187 | ``` 188 | 189 | ![athena-table.png](./assets/athena-table.png) 190 | 191 | Now that you have created the table you need to add the partition metadata to the AWS Glue Data Catalog. 192 | 193 | Choose **New Query**, copy the following statement into the query pane, and then choose **Run Query** to add partition metadata 194 | 195 | ```sql 196 | MSCK REPAIR TABLE reInvent2018_aws_service_logs.cf_access_optimized 197 | ``` 198 | 199 | - Get the total number of CloudFront Access Log records: 200 | 201 | ```sql 202 | SELECT count(*) AS rowcount FROM reInvent2018_aws_service_logs.cf_access_optimized 203 | ``` 204 | 205 | > :warning: Ensure that the rowcount = **207535** 206 | 207 | - Get the first ten records: 208 | 209 | ```sql 210 | SELECT * FROM reInvent2018_aws_service_logs.cf_access_optimized LIMIT 10 211 | ``` 212 | 213 | *After a few seconds, Athena will display your query results as shown below:* 214 | 215 | ![cf-access-optimized.png](./assets/cf-access-optimized.png) 216 | 217 | 218 |
219 | 220 | Click to expand to review the values in the following fields/columns as you will be using them in this workshop 221 | 222 | |Field Name|Description|type 223 | |---|----|---| 224 | |requestid|An encrypted string that uniquely identifies a request. This field value is used to join the optimized CloudFront access logs with the optimized Lambda@Edge logs|string| 225 | |time|The time when the CloudFront server finished responding to the request (in UTC), for example, 01:42:39|timestamp| 226 | |location|The edge location that served the request. Each edge location is identified by a three-letter code and an arbitrarily assigned number, for example, DFW3. The three-letter code typically corresponds with the International Air Transport Association airport code for an airport near the edge location. (These abbreviations might change in the future.) For a list of edge locations, see the Amazon CloudFront detail page, [http://aws.amazon.com/cloudfront](http://aws.amazon.com/cloudfront)|string| 227 | |uri|The query string portion of the URI, if any. When a URI doesn't contain a query string, the value of cs-uri-query is a hyphen (-). For more information, see [Caching Content Based on Query String Parameters](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/QueryStringParameters.html).|string| 228 | |status| One of the following values:
237 | 238 | --- 239 | 240 | ### Create Glue Data Catalog for Application Load Balancer(ALB) Access Logs in optimized Parquet Format 241 | 242 | In the query pane, copy the following statement to create a the **alb_access_optimized** table, and then choose **Run Query**: 243 | 244 | ```sql 245 | CREATE EXTERNAL TABLE IF NOT EXISTS reInvent2018_aws_service_logs.alb_access_optimized( 246 | type string, 247 | time timestamp, 248 | elb string, 249 | client_ip_port string, 250 | target_ip_port string, 251 | request_processing_time double, 252 | target_processing_time double, 253 | response_processing_time double, 254 | elb_status_code string, 255 | target_status_code string, 256 | received_bytes bigint, 257 | sent_bytes bigint, 258 | request_verb string, 259 | request_url string, 260 | request_proto string, 261 | user_agent string, 262 | ssl_cipher string, 263 | ssl_protocol string, 264 | target_group_arn string, 265 | trace_id string, 266 | domain_name string, 267 | chosen_cert_arn string) 268 | PARTITIONED BY ( 269 | region string, 270 | year string, 271 | month string, 272 | day string) 273 | ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' 274 | STORED AS PARQUET 275 | LOCATION 's3://eu-west-1.data-analytics/cflogworkshop/optimized/lblogs' 276 | TBLPROPERTIES("parquet.compress"="SNAPPY") 277 | ``` 278 | 279 | Now that you have created the table you need to add the partition metadata to the AWS Glue Catalog. 280 | 281 | - Choose **New Query**, copy the following statement into the query pane, and then choose **Run Query** to add partition metadata. 282 | 283 | ```sql 284 | MSCK REPAIR TABLE reInvent2018_aws_service_logs.alb_access_optimized 285 | ``` 286 | 287 | - Get the total number of ALB Access Log records: 288 | 289 | ```sql 290 | SELECT count(*) AS rowcount FROM reInvent2018_aws_service_logs.alb_access_optimized 291 | ``` 292 | 293 | > :warning: Ensure that the rowcount = **15355** 294 | 295 | - Get the first ten records: 296 | 297 | ```sql 298 | SELECT * FROM reInvent2018_aws_service_logs.alb_access_optimized LIMIT 10 299 | ``` 300 | 301 | After a few seconds, Athena will display your query results as shown below: 302 | 303 | ![alb-access-optimized.png](./assets/alb-access-optimized.png) 304 | 305 |
306 | 307 | Click to expand to review the values in the following fields/columns as you will be using them in this workshop 308 | 309 | |Field Name|Description|type 310 | |---|----|---| 311 | |trace_id|The contents of the X-Amzn-Trace-Id header, enclosed in double quotes. This field is used to join the optimized ALB logs with the optimized Lambda@Edge logs which in turn is used to correlate with the optimized CloudFront access logs using the requestId filed. For more information see [Request Tracing for Your Application Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-request-tracing.html). Example value: ```X-Amzn-Trace-Id: Self=1-67891234-12456789abcdef012345678;Root=1-67891233-abcdef012345678912345678```|string| 312 | |request_processing_time|The total time elapsed (in seconds, with millisecond precision) from the time the load balancer received the request until the time it sent it to a target. This value is set to -1 if the load balancer can't dispatch the request to a target. This can happen if the target closes the connection before the idle timeout or if the client sends a malformed request. This value can also be set to -1 if the registered target does not respond before the idle timeout.| double| 313 | |target_processing_time|The total time elapsed (in seconds, with millisecond precision) from the time the load balancer sent the request to a target until the target started to send the response headers. This value is set to -1 if the load balancer can't dispatch the request to a target. This can happen if the target closes the connection before the idle timeout or if the client sends a malformed request. This value can also be set to -1 if the registered target does not respond before the idle timeout. |double| 314 | |response_processing_time|The total time elapsed (in seconds, with millisecond precision) from the time the load balancer received the response header from the target until it started to send the response to the client. This includes both the queuing time at the load balancer and the connection acquisition time from the load balancer to the client. This value is set to -1 if the load balancer can't send the request to a target. This can happen if the target closes the connection before the idle timeout or if the client sends a malformed request. |double| 315 | |region(partition|The region of the load balancer and S3 bucket.|string| 316 | |year(partition)|The year the log was delivered.|string| 317 | |month(partition)|The month the log was delivered.|string| 318 | |day(partition)|The day the logs was delivered.|string| 319 | 320 |
321 | 322 | --- 323 | 324 | ### Create Glue Data Catalog for Lambda@Edge Logs - Viewer Request in optimized Parquet Format 325 | 326 | In the query pane, copy the following statement to create a the *lambdaedge_logs_viewer_request_optimized* table, and then choose **Run Query**: 327 | 328 | ```sql 329 | CREATE EXTERNAL TABLE IF NOT EXISTS reInvent2018_aws_service_logs.lambdaedge_logs_viewer_request_optimized( 330 | executionregion string, 331 | requestid string, 332 | distributionid string, 333 | distributionname string, 334 | eventtype string, 335 | requestdata string, 336 | customtraceid string, 337 | useragentstring string) 338 | PARTITIONED BY ( 339 | year string, 340 | month string, 341 | date string, 342 | hour string) 343 | ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' 344 | STORED AS PARQUET 345 | LOCATION 's3://eu-west-1.data-analytics/cflogworkshop/optimized/lelogs/viewer-request' 346 | TBLPROPERTIES("parquet.compress"="SNAPPY") 347 | ``` 348 | 349 | Now that you have created the table you need to add the partition metadata to the AWS Glue Catalog. 350 | 351 | - Choose **New Query**, copy the following statement into the query pane, and then choose **Run Query** to add partition metadata. 352 | 353 | ```sql 354 | MSCK REPAIR TABLE reInvent2018_aws_service_logs.lambdaedge_logs_viewer_request_optimized 355 | ``` 356 | 357 | - Get the total number of Lambda@Edge Log - Viewer Request records: 358 | 359 | ```sql 360 | SELECT count(*) AS rowcount FROM reInvent2018_aws_service_logs.lambdaedge_logs_viewer_request_optimized 361 | ``` 362 | 363 | > :warning: Ensure that the rowcount = **207837** 364 | 365 | - Get the first ten records: 366 | 367 | ```sql 368 | SELECT * FROM reInvent2018_aws_service_logs.lambdaedge_logs_viewer_request_optimized LIMIT 10 369 | ``` 370 | 371 | After a few seconds, Athena will display your query results as shown below: 372 | 373 | ![viewer-request-optimized.png](./assets/viewer-request-optimized.png) 374 | 375 |
376 | 377 | Click to expand to review the values in the following fields/columns 378 | 379 | |Field Name|Description|type 380 | |---|----|---| 381 | |requestid|An encrypted string that uniquely identifies a request. This field value is used to join the optimized CloudFront access logs with the optimized Lambda@Edge logs. The requestId value also appears in CloudFront access logs as x-edge-request-id. For more information, see [Configuring and Using Access Logs](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/AccessLogs.html) and [Web Distribution Log File Format](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/AccessLogs.html#BasicDistributionFileFormat).|string| 382 | |customtraceid|A uniquely generated value per request to join the ALB logs with Lambda@Edge logs. As part of client side instrumentation an unique value (Sample Value: ```Root=1-67891233-abcdef012345678912345678```) per request is generated and added two headers **x-my-trace-id** and **X-Amzn-Trace-Id**. The viewer-request triggered Lambda@Edge function extract the **x-my-trace-id** header and logs the value. For more details see [Viewer Request Trigger Lambda Function](./viewerRequest-Lambda/index.js). The **X-Amzn-Trace-Id** value is logged by the ALB. For more details refer, [Request Tracing for Your Application Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-request-tracing.html). |string| 383 | |executionregion|The AWS region where the Lambda@Edge function was executed.|string| 384 | |eventtype|The type of trigger that's associated with the request. Value = "veiwer-request"|string| 385 | |distributionid|The ID of the distribution that's associated with the request.|string| 386 | |distributionname|The domain name of the distribution that's associated with the request.|string| 387 | |year(partition)|The year on which the event occurred.|string| 388 | |month(partition)|The month on which the event occurred.|string| 389 | |day(partition)|The day on which the event occurred.|string| 390 | |hour(partition)|The hour on which the event occurred.|string| 391 | 392 |
393 | 394 | --- 395 | 396 | ### Create Glue Data Catalog for Lambda@Edge Logs - Origin Request in optimized Parquet Format 397 | 398 | In the query pane, copy the following statement to create a the *lambdaedge_logs_origin_request_optimized* table, and then choose **Run Query**: 399 | 400 | ```sql 401 | CREATE EXTERNAL TABLE IF NOT EXISTS reInvent2018_aws_service_logs.lambdaedge_logs_origin_request_optimized( 402 | executionregion string, 403 | requestid string, 404 | distributionid string, 405 | distributionname string, 406 | eventtype string, 407 | requestdata string, 408 | customtraceid string, 409 | viewercountry string, 410 | deviceformfactor string) 411 | PARTITIONED BY ( 412 | year string, 413 | month string, 414 | date string, 415 | hour string) 416 | ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' 417 | STORED AS PARQUET 418 | LOCATION 's3://eu-west-1.data-analytics/cflogworkshop/optimized/lelogs/origin-request' 419 | TBLPROPERTIES("parquet.compress"="SNAPPY") 420 | ``` 421 | 422 | Now that you have created the table you need to add the partition metadata to the AWS Glue Catalog. 423 | 424 | - Choose **New Query**, copy the following statement into the query pane, and then choose **Run Query** to add partition metadata. 425 | 426 | ```sql 427 | MSCK REPAIR TABLE reInvent2018_aws_service_logs.lambdaedge_logs_origin_request_optimized 428 | ``` 429 | 430 | - Get the total number of Lambda@Edge Log - Viewer Request records: 431 | 432 | ```sql 433 | SELECT count(*) AS rowcount FROM reInvent2018_aws_service_logs.lambdaedge_logs_origin_request_optimized 434 | ``` 435 | 436 | > :warning: Ensure that the rowcount = **14517** 437 | 438 | - Get the first ten records: 439 | 440 | ```sql 441 | SELECT * FROM reInvent2018_aws_service_logs.lambdaedge_logs_origin_request_optimized LIMIT 10 442 | ``` 443 | 444 | After a few seconds, Athena will display your query results as shown below: 445 | 446 | ![origin-request-optimized.png](./assets/origin-request-optimized.png) 447 | 448 |
449 | 450 | Click to expand to review the values in the following fields/columns 451 | 452 | |Field Name|Description|type 453 | |---|----|---| 454 | |requestid|An encrypted string that uniquely identifies a request. This field value is used to join the optimized CloudFront access logs with the optimized Lambda@Edge logs. The requestId value also appears in CloudFront access logs as x-edge-request-id. For more information, see [Configuring and Using Access Logs](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/AccessLogs.html) and [Web Distribution Log File Format](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/AccessLogs.html#BasicDistributionFileFormat).|string| 455 | |customtraceid|A uniquely generated value per request to join the ALB logs with Lambda@Edge logs. As part of client side instrumentation an unique value (Sample Value: ```Root=1-67891233-abcdef012345678912345678```) per request is generated and added two headers **x-my-trace-id** and **X-Amzn-Trace-Id**. The origin-request triggered Lambda@Edge function extract the **x-my-trace-id** header and logs the value. For more details see [Origin Request Trigger Lambda Function](./originRequest-Lambda/index.js). The **X-Amzn-Trace-Id** value is logged by the ALB. For more details refer, [Request Tracing for Your Application Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-request-tracing.html). |string| 456 | |executionregion|The AWS region where the Lambda@Edge function was executed.|string| 457 | |eventtype|The type of trigger that's associated with the request. Value = "origin-request"|string| 458 | |distributionid|The ID of the distribution that's associated with the request.|string| 459 | |distributionname|The domain name of the distribution that's associated with the request.|string| 460 | |viewercountry|Two letter country code based on IP address where the request came from. For more details [Configuring Caching Based on the Location of the Viewer](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/header-caching.html#header-caching-web-location). For an easy-to-use list of country codes, sortable by code and by country name, see the Wikipedia entry [ISO 3166-1 alpha-2](http://en.wikipedia.org/wiki/ISO_3166-1_alpha-2).|string| 461 | |deviceformfactor|Category or form factor of the device based on the user agent associated with the request. For more details see [Configuring Caching Based on the Device Type](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/header-caching.html#header-caching-web-device). Possible values: |string| 462 | |year(partition)|The year on which the event occurred.|string| 463 | |month(partition)|The month on which the event occurred.|string| 464 | |day(partition)|The day on which the event occurred.|string| 465 | |hour(partition)|The hour on which the event occurred.|string| 466 | 467 |
468 | 469 | --- 470 | --- 471 | 472 | ## Combine the logs using an AWS Glue ETL Job 473 | 474 | Now that you have created all the AWS Glue data catalog tables for the optimized logs, in this section you will create an AWS Glue ETL job to join the four optimized logs - 1) Viewer request triggered Lambda@Edge logs, 2) Origin request triggered Lambda@Edge logs, 3)Amazon CloudFront access logs and 4) Application Load Balancer(ALB) logs. The output of the combined logs is written in optimized parquet format to the Amazon S3 bucket that you created at the beginning of this lab. The data is partition by year followed by month follow by day. You will also create an IAM role that grants AWS Glue service permission to read and write to Amazon S3 bucket and access the AWS Glue data catalog tables. 475 | 476 | ### Create AWS IAM Role 477 | 478 | Create an IAM role that has permission to your Amazon S3 sources, targets, temporary directory, scripts, AWSGlueServiceRole and any libraries used by the job. 479 | 480 | - Open the AWS Management console for AWS IAM from [here](https://console.aws.amazon.com/iam/home?region=us-west-2#/roles) 481 | - On the IAM **Role** page click on **Create role** 482 | - Choose **Glue** under **Choose the service that will use this role section** 483 | - Ensure that **Glue** is shown under the **Select your use case** section 484 | - Click on **Next:Permissions** on the bottom 485 | - On the Attach permissions policies, search policies for S3 and check the box for **AmazonS3FullAccess** 486 | 487 | > :warning: Do not click on the policy, you just have to check the corresponding checkbox 488 | 489 | - On the same page, now search policies for Glue and check the box for **AWSGlueConsoleFullAccess** and **AWSGlueServiceRole**. 490 | 491 | > :warning: Do not click on the policy, you just have to check the corresponding checkbox 492 | 493 | - Click on **Next: Tags** 494 | - Click on **Next: Review** 495 | - Type the **Role name** *(e.g. ReInvent2018-CTD410-GlueRole)* 496 | - Type the **Role description** (optional) 497 | - Ensure that **AmazonS3FullAccess**, **AWSGlueConsoleFullAccess** and **AWSGlueServiceRole** are listed under policies 498 | - Click **Create role** 499 | 500 | --- 501 | 502 | ### Create AWS Glue ETL Job 503 | 504 | - Now that you have created the IAM role, open the AWS Management console for AWS Glue service from [here](https://eu-west-1.console.aws.amazon.com/glue/home?region=eu-west-1) 505 | - If this is your first time visiting the AWS Management Console for AWS Glue, you will get a Getting Started page. Choose **Get Started**. If this isn't your first time, the **Tables** pages opens. 506 | - Make a note of the AWS region name, for example, for this lab you will need to choose the **eu-west-1 (Ireland)** region 507 | - Click on **Jobs** under the **ETL** section in the navigation pane on the left 508 | - Click on **Add job** to create a new ETL job to join the Amazon CloudFront access logs, Lambda@Edge(viewer-request and origin-request) logs and Application Load Balancer logs 509 | - On the **Job properties** page, type the **Name** *(e.g. ReInvent2018-CTD410-LogCombiner)* of the AWS Glue ETL job 510 | - Choose the **IAM role** you created *(e.g. ReInvent2018-CTD410-GlueRole)* as part of the previous section in this lab from the drop down menu 511 | - Select **A new script to be authored by you** for **This job runs** 512 | - Select **Python** as the **ETL language** 513 | - Click **Next** 514 | - On the **Connections** page, click **Next** 515 | - On the **Review** page, click **Save job and edit script** 516 | - If this your first time, a **Script editor tips** page will pop up. Close the pop up page by clicking on the **x** symbol on the top right 517 | - Copy and paste the LogCombiner script [log-combiner-glue-script.py](./log-combiner-glue-script.py) to AWS Glue script editor pane 518 | - Click **Save** 519 | - Click **Run job** 520 | - Expand **Security configuration, script libraries, and job parameters** section on the popped up **Parameters(optional)** page 521 | - Under **Job parameters**, type **--target_s3_bucket** into the text box under **Key** 522 | - Into the text box under **Value**, type the name of the Amazon S3 bucket that you created at the beginning of this lab. 523 | 524 | > :warning: Type only the name of the S3 bucket and **not** the Amazon S3 path starting with S3:// 525 | 526 | - Click **Run job** 527 | - Close the script editor page by click on **X** symbol on the right hand side of the page 528 | 529 | ![glue-job-complete.png](./assets/glue-job-complete.png) 530 | 531 | - On the Jobs pages check the box next to the name of the Glue ETL job *(e.g. ReInvent2018-CTD410-LogCombiner)* *to view the current status of the job under the **History** tab at the bottom of the page 532 | - Ensure that the **Run status** is displaced as **Running** 533 | - Wait until the Run status changes to **Succeeded** 534 | 535 | > :warning: This step may take from upto 15 minutes to complete. 536 | 537 | ![combine-schema](./assets/combine-schema.png) 538 | 539 | The AWS Glue ETL job performs an left outer join with the Amazon CloudFront access logs with the viewer request and origin triggered Lambda@Edge logs based on the 'requiestid' field. This is followed by another left outer join will Application Load Balancer (ALB) logs based on 'customtraceid' field in the Lambda@Edge logs and 'trace_id' field ALB logs. The duplicate fields in the logs are also removed. For more details, see [log-combiner-glue-script.py](./log-combiner-glue-script.py). 540 | 541 | --- 542 | --- 543 | 544 | ## Create AWS Glue Data Catalog for Combined Logs 545 | 546 | Now that you have successfully generated the combined logs, in this section you will be creating an AWS Data Catalog tables pointing to the combined logs written by the AWS Glue ETL job that you just executed. You will be creating the following tables, loading the partitions into each of these tables, and previewing the fields. 547 | 548 | |Table Name|Log Name|Partition| 549 | |---|---|----| 550 | |lambdaedge_logs_combined_optimized(optional)|Combined Lambda@Edge Logs obtained by joining viewer-request and origin-request logs |year, month, day, hour| 551 | |combined_log_optimized|Combined all the four following logs |year, month, day| 552 | 553 | The above AWS Glue data catalogs will be referred by AWS Athena service when you query the logs directly from Amazon S3 bucket for generating visualizations using Amazon QuickSight. 554 | 555 | ### Create AWS Glue Data Catalog for the combined logs using Amazon Athena 556 | - In the query pane, copy the following statement to create a the *combined_log_optimized* table, and then choose **Run Query*: 557 | 558 | > :warning: Replace in the query below with the unique name of the S3 Bucket you created in step 1 earlier. 559 | 560 | ```sql 561 | CREATE EXTERNAL TABLE reInvent2018_aws_service_logs.combined_log_optimized( 562 | received_bytes int, 563 | trace_id string, 564 | distributionname string, 565 | executionregion string, 566 | distributionid string, 567 | location string, 568 | sent_bytes int, 569 | responseresulttype string, 570 | xforwardedfor string, 571 | type string, 572 | customtraceid string, 573 | querystring string, 574 | client_ip_port string, 575 | response_processing_time double, 576 | elb string, 577 | deviceformfactor string, 578 | elb_status_code string, 579 | uri string, 580 | request_verb string, 581 | col24 string, 582 | request_url string, 583 | region string, 584 | hostheader string, 585 | request_processing_time double, 586 | resulttype string, 587 | method string, 588 | useragent string, 589 | httpversion string, 590 | target_status_code string, 591 | target_ip_port string, 592 | requestdata string, 593 | host string, 594 | referrer string, 595 | cookie string, 596 | bytes bigint, 597 | target_processing_time double, 598 | alb_time timestamp, 599 | requestid string, 600 | viewercountry string, 601 | timetaken double, 602 | requestbytes bigint, 603 | target_group_arn string, 604 | sslprotocol string, 605 | requestprotocol string, 606 | status int, 607 | time timestamp, 608 | requestip string, 609 | sslcipher string, 610 | request_proto string, 611 | col25 string, 612 | user_agent string) 613 | PARTITIONED BY ( 614 | year string, 615 | month string, 616 | day string) 617 | ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' 618 | STORED AS PARQUET 619 | LOCATION 's3:///combined/logs/' 620 | TBLPROPERTIES("parquet.compress"="SNAPPY") 621 | ``` 622 | 623 | Now that you have created the table you need to add the partition metadata to the AWS Glue Catalog. 624 | 625 | - Choose **New Query**, copy the following statement into the query pane, and then choose **Run Query** to add partition metadata. 626 | 627 | ```sql 628 | MSCK REPAIR TABLE reInvent2018_aws_service_logs.combined_log_optimized 629 | ``` 630 | - Get the total number of combined log records: 631 | 632 | ```sql 633 | SELECT count(*) AS rowcount FROM reInvent2018_aws_service_logs.combined_log_optimized 634 | ``` 635 | 636 | > :warning: Ensure that the rowcount = **207537** 637 | 638 | - Get the first ten records: 639 | 640 | ```sql 641 | SELECT * FROM reInvent2018_aws_service_logs.combined_log_optimized LIMIT 10 642 | ``` 643 | 644 | ![combined-logs-all.png](./assets/combined-logs-all.png) 645 | 646 |
647 | Click to expand to review the values in the following fields/columns as you will be using them in this workshop 648 | 649 | |Field Name|Description|type 650 | |---|----|---| 651 | |requestid|An encrypted string that uniquely identifies a request. This field value is used to join the optimized CloudFront access logs with the optimized Lambda@Edge logs|string| 652 | |trace_id|The contents of the X-Amzn-Trace-Id header, enclosed in double quotes. This field is used to join the optimized ALB logs with the optimized Lambda@Edge logs which in turn is used to correlate with the optimized CloudFront access logs using the requestId filed. For more information see [Request Tracing for Your Application Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-request-tracing.html). Example value: ```X-Amzn-Trace-Id: Self=1-67891234-12456789abcdef012345678;Root=1-67891233-abcdef012345678912345678```|string| 653 | |time|The time when the CloudFront server finished responding to the request (in UTC), for example, 01:42:39|timestamp| 654 | |location|The edge location that served the request. Each edge location is identified by a three-letter code and an arbitrarily assigned number, for example, DFW3. The three-letter code typically corresponds with the International Air Transport Association airport code for an airport near the edge location. (These abbreviations might change in the future.) For a list of edge locations, see the Amazon CloudFront detail page, [http://aws.amazon.com/cloudfront](http://aws.amazon.com/cloudfront)|string| 655 | |uri|The query string portion of the URI, if any. When a URI doesn't contain a query string, the value of cs-uri-query is a hyphen (-). For more information, see [Caching Content Based on Query String Parameters](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/QueryStringParameters.html).|string| 656 | |status| One of the following values:
  • An HTTP status code (for example, 200). For a list of HTTP status codes, see [RFC 2616](http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html), [Hypertext Transfer Protocol—HTTP 1.1, section 10, Status Code Definitions]((http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html)). For more information, see [How CloudFront Processes and Caches HTTP 4xx and 5xx Status Codes from Your Origin](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/HTTPStatusCodes.html).
  • 000, which indicates that the viewer closed the connection (for example, closed the browser tab) before CloudFront could respond to a request. If the viewer closes the connection after CloudFront starts to send the object, the log contains the applicable HTTP status code.
    • |string| 657 | |useragent| The value of the User-Agent header in the request. The User-Agent header identifies the source of the request, such as the type of device and browser that submitted the request and, if the request came from a search engine, which search engine. For more information, see [User-Agent Header](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/RequestAndResponseBehaviorCustomOrigin.html#request-custom-user-agent-header). 658 | |responseresulttype|How CloudFront classified the response just before returning the response to the viewer. Possible values include:
      • Hit – CloudFront served the object to the viewer from the edge cache.For information about a situation in which CloudFront classifies the result type as Hit even though the response from the origin contains a Cache-Control: no-cache header, see [Simultaneous Requests for the Same Object (Traffic Spikes)](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/RequestAndResponseBehaviorCustomOrigin.html#request-custom-traffic-spikes).
      • RefreshHit – CloudFront found the object in the edge cache but it had expired, so CloudFront contacted the origin to determine whether the cache has the latest version of the object and, if not, to get the latest version.
      • Miss – The request could not be satisfied by an object in the edge cache, so CloudFront forwarded the request to the origin server and returned the result to the viewer.
      • LimitExceeded – The request was denied because a CloudFront limit was exceeded.
      • CapacityExceeded – CloudFront returned an HTTP 503 status code (Service Unavailable) because the CloudFront edge server was temporarily unable to respond to requests.
      • Error – Typically, this means the request resulted in a client error (sc-status is 4xx) or a server error (sc-status is 5xx).
      • Redirect – CloudFront redirects from HTTP to HTTPS.If sc-status is 403 and you configured CloudFront to restrict the geographic distribution of your content, the request might have come from a restricted location. For more information about geo restriction, see [Restricting the Geographic Distribution of Your Content](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/georestrictions.html).If the value of x-edge-result-type is Error and the value of x-edge-response-result-type is not Error, the client disconnected before finishing the download.
      |string| 659 | |timetaken|The number of seconds (to the thousandth of a second, for example, 0.002) between the time that a CloudFront edge server receives a viewer's request and the time that CloudFront writes the last byte of the response to the edge server's output queue as measured on the server. From the perspective of the viewer, the total time to get the full object will be longer than this value due to network latency and TCP buffering.|double| 660 | |request_processing_time|The total time elapsed (in seconds, with millisecond precision) from the time the load balancer received the request until the time it sent it to a target. This value is set to -1 if the load balancer can't dispatch the request to a target. This can happen if the target closes the connection before the idle timeout or if the client sends a malformed request. This value can also be set to -1 if the registered target does not respond before the idle timeout.| double| 661 | |target_processing_time|The total time elapsed (in seconds, with millisecond precision) from the time the load balancer sent the request to a target until the target started to send the response headers. This value is set to -1 if the load balancer can't dispatch the request to a target. This can happen if the target closes the connection before the idle timeout or if the client sends a malformed request. This value can also be set to -1 if the registered target does not respond before the idle timeout. |double| 662 | |response_processing_time|The total time elapsed (in seconds, with millisecond precision) from the time the load balancer received the response header from the target until it started to send the response to the client. This includes both the queuing time at the load balancer and the connection acquisition time from the load balancer to the client. This value is set to -1 if the load balancer can't send the request to a target. This can happen if the target closes the connection before the idle timeout or if the client sends a malformed request. |double| 663 | |customtraceid|A uniquely generated value per request to join the ALB logs with Lambda@Edge logs. As part of client side instrumentation an unique value (Sample Value: ```Root=1-67891233-abcdef012345678912345678```) per request is generated and added two headers **x-my-trace-id** and **X-Amzn-Trace-Id**. The origin-request triggered Lambda@Edge function extract the **x-my-trace-id** header and logs the value. For more details see [Origin Request Trigger Lambda Function](./originRequest-Lambda/index.js). The **X-Amzn-Trace-Id** value is logged by the ALB. For more details refer, [Request Tracing for Your Application Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-request-tracing.html). |string| 664 | |viewercountry|Two letter country code based on IP address where the request came from. For more details [Configuring Caching Based on the Location of the Viewer](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/header-caching.html#header-caching-web-location). For an easy-to-use list of country codes, sortable by code and by country name, see the Wikipedia entry [ISO 3166-1 alpha-2](http://en.wikipedia.org/wiki/ISO_3166-1_alpha-2).|string| 665 | |deviceformfactor|Category or form factor of the device based on the user agent associated with the request. For more details see [Configuring Caching Based on the Device Type](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/header-caching.html#header-caching-web-device). Possible values:
      • desktop
      • mobile
      • smarttv
      • tablet
      |string| 666 | |year(partition)|The year on which the event occurred.|string| 667 | |month(partition)|The month on which the event occurred.|string| 668 | |day(partition)|The day on which the event occurred.|string| 669 | 670 |
671 | 672 | --- 673 | 674 | ### (Optional) Create AWS Glue Data Catalog for the combined Lamabda@Eddge logs using Amazon Athena 675 | 676 |
677 | CLICK TO EXPAND FOR OPTIONAL SECTION 678 | 679 | - Open the AWS Management Console for Athena from [here](https://console.aws.amazon.com/athena/home). 680 | - In the query pane, copy the following statement to create a the *lambdaedge_logs_combined_optimized* table, and then choose **Run Query**: 681 | 682 | > :warning: Replace in the query below with the unique name of the S3 Bucket you created in beginning of this lab. 683 | 684 | ```sql 685 | CREATE EXTERNAL TABLE IF NOT EXISTS reInvent2018_aws_service_logs.lambdaedge_logs_combined_optimized( 686 | executionregion string, 687 | requestid string, 688 | distributionid string, 689 | distributionname string, 690 | requestdata string, 691 | customtraceid string, 692 | useragentstring string, 693 | deviceformfactor string, 694 | viewercountry string) 695 | PARTITIONED BY ( 696 | year string, 697 | month string, 698 | date string, 699 | hour string) 700 | ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' 701 | STORED AS PARQUET 702 | LOCATION 's3:///combined/lelogs/' 703 | TBLPROPERTIES("parquet.compress"="SNAPPY") 704 | ``` 705 | 706 | Now that you have created the table you need to add the partition metadata to the AWS Glue Catalog. 707 | 708 | 1. Choose **New Query**, copy the following statement into the query pane, and then choose **Run Query** to add partition metadata. 709 | 710 | ```sql 711 | MSCK REPAIR TABLE reInvent2018_aws_service_logs.lambdaedge_logs_combined_optimized 712 | ``` 713 | 714 | - Get the total number of combined Lambda@Edge Log records: 715 | 716 | ```sql 717 | SELECT count(*) AS rowcount FROM reInvent2018_aws_service_logs.lambdaedge_logs_combined_optimized 718 | ``` 719 | 720 | > :warning: Ensure that the rowcount = **207837** 721 | 722 | - Get the first ten records: 723 | ```sql 724 | SELECT * FROM reInvent2018_aws_service_logs.lambdaedge_logs_combined_optimized LIMIT 10 725 | ``` 726 | 727 | After a few seconds, Athena will display your query results as shown below: 728 | 729 | ![le-combined-logs.png](./assets/le-combined-logs.png) 730 | 731 |
732 | Click to expand to review the values in the following fields/columns 733 | 734 | |Field Name|Description|type 735 | |---|----|---| 736 | |requestid|An encrypted string that uniquely identifies a request. This field value is used to join the optimized CloudFront access logs with the optimized Lambda@Edge logs. The requestId value also appears in CloudFront access logs as x-edge-request-id. For more information, see [Configuring and Using Access Logs](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/AccessLogs.html) and [Web Distribution Log File Format](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/AccessLogs.html#BasicDistributionFileFormat).|string| 737 | |customtraceid|A uniquely generated value per request to join the ALB logs with Lambda@Edge logs. As part of client side instrumentation an unique value (Sample Value: ```Root=1-67891233-abcdef012345678912345678```) per request is generated and added two headers **x-my-trace-id** and **X-Amzn-Trace-Id**. The origin-request triggered Lambda@Edge function extract the **x-my-trace-id** header and logs the value. For more details see [Viewer Request Trigger Lambda Function](./viewerRequest-Lambda/index.js) and [Origin Request Trigger Lambda Function](./originRequest-Lambda/index.js) and . The **X-Amzn-Trace-Id** value is logged by the ALB. For more details refer, [Request Tracing for Your Application Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-request-tracing.html). |string| 738 | |executionregion|The AWS region where the Lambda@Edge function was executed.|string| 739 | |eventtype|The type of trigger that's associated with the request. Possible Values
  • viewer-request
  • origin-request
|string| 740 | |distributionid|The ID of the distribution that's associated with the request.|string| 741 | |distributionname|The domain name of the distribution that's associated with the request.|string| 742 | |viewercountry|Two letter country code based on IP address where the request came from. For more details [Configuring Caching Based on the Location of the Viewer](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/header-caching.html#header-caching-web-location). For an easy-to-use list of country codes, sortable by code and by country name, see the Wikipedia entry [ISO 3166-1 alpha-2](http://en.wikipedia.org/wiki/ISO_3166-1_alpha-2).|string| 743 | |deviceformfactor|Category or form factor of the device based on the user agent associated with the request. For more details see [Configuring Caching Based on the Device Type](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/header-caching.html#header-caching-web-device). Possible values:
  • desktop
  • mobile
  • smarttv
  • tablet
|string| 744 | |year(partition)|The year on which the event occurred.|string| 745 | |month(partition)|The month on which the event occurred.|string| 746 | |day(partition)|The day on which the event occurred.|string| 747 | |hour(partition)|The hour on which the event occurred.|string| 748 | 749 |
750 | 751 |
752 | 753 | --- 754 | --- 755 | 756 | ## Visualization using Amazon QuickSight 757 | 758 | ### Signing Up for Amazon QuickSight Standard Edition 759 | 760 |
761 | IF YOU HAVE NEVER USER AMAZON QUICKSIGHT WITHIN THIS ACCOUNT, CLICK TO EXPAND THE INSTRUCTIONS TO SIGN-UP FOR AN AMAZON QUICKSIGHT ACCOUNT 762 | 763 | ![quicksight-signup.png](./assets/quicksight-signup.png) 764 | 765 | - Open the AWS Management console for Amazon QuickSight from [here](https://eu-west-1.quicksight.aws.amazon.com/sn/start) 766 | - If this is the first time you are accessing QuickSight, you will see a sign-up landing page for QuickSight. 767 | - Click on **Sign up for QuickSight**. 768 | 769 | ![quicksight-edition.png](./assets/quicksight-edition.png) 770 | 771 | - On the **Create your QuickSight account** page, select **Standard Edition** for the subscription type. 772 | - Click **Continue** 773 | 774 | ![quicksight-account-create.png](./assets/quicksight-account-create.png) 775 | 776 | - On the next page, type a unique **QuickSight account name** *(e.g. REInvent2018-CTD410-QuickSight)* 777 | - Type a valid email id for **Notification email address** 778 | - Just for this step, ensure that **US East(N. Virginia)** is selected from the drop down menu for *QuickSight capacity region* 779 | - Ensure that boxes next to **Enable autodiscovery of your data and users in your Amazon Redshift, Amazon RDS and AWS IAM Services** and **Amazon Athena** are checked 780 | - Click **Finish** 781 | - Wait until the page with message **Congratulations! You are signed up for Amazon QuickSight!** on successful sign up is presented. 782 | - Click on **Go to Amazon QuickSight**. 783 | 784 |
785 | 786 | --- 787 | 788 | ### Configure Amazon S3 bucket Permission 789 | 790 | In this section you will configure the permission for Amazon QuickSight to access the Amazon S3 bucket to read the combined logs that you generated as part of the ETL job. 791 | 792 | ![quicksight-manage.png](./assets/quicksight-manage.png) 793 | 794 | - On the Amazon QuickSight dashboard, navigate to user settings page on the top right section and click **Manage QuickSight**. 795 | 796 | ![quicksight-permissionpng](./assets/quicksight-permission.png) 797 | 798 | - On the next page, click on **Account Settings** 799 | - Click on **Manage QuickSight permissions** 800 | - Click **Choose S3 buckets** to select the Amazon S3 buckets for which auto-discovery needs to be enabled for QuickSight 801 | 802 | ![quicksight-s3-bucket-selection.png](./assets/quicksight-s3-bucket-selection.png) 803 | 804 | - On the pop up **Select Amazon S3 buckets** page check the box next to **Select all** or the name of the Amazon S3 bucket you created at the beginning of the lab 805 | - Click **Select buckets** 806 | - Ensure that the box next to **Amazon S3** is checked 807 | - Click **Apply** 808 | 809 | --- 810 | 811 | ### Configuring Amazon QuickSight to use Amazon Athena as data source 812 | 813 | In this section you will configure Amazon Athena as the data source to query the combined logs directly from Amazon S3 bucket by referencing the AWS Glue data catalog - *reInvent2018_aws_service_logs.combined_log_optimized*. 814 | 815 | ![quicksight-region-selection.png](./assets/quicksight-region-selection.png) 816 | 817 | - Select **EU(Ireland)** as the region for this lab 818 | - If this is first time you are using Amazon QuickSight in this region, close **Welcome to QuickSight** pop up page by clicking on the **x** symbol. 819 | - Click on **Manage data** in the upper right hand corner 820 | - Click on **New data set** on the upper left hand corner 821 | 822 | ![quicksight-datasource.png](./assets/quicksight-datasource.png) 823 | 824 | - Select **Athena** as the data source 825 | 826 | ![quicksight-athena-ds.png](./assets/quicksight-athena-ds.png) 827 | 828 | - Type a **Data source name** *(e.g. ReInvent-CTD410-DS)* 829 | - Click on **Create data source** 830 | 831 | ![quicksight-table-selection.png](./assets/quicksight-table-selection.png) 832 | 833 | - Select **reinvent2018_aws_service_logs** from the drop down menu for **Database: contain sets of tables** 834 | - Choose **combined_log_optimized** from the list under **Tables: contains the data you can visualize** 835 | - Click **Edit/Preview data** 836 | 837 | > :warning: **THIS IS A CRUCIAL STEP. PLEASE ENSURE YOU CHOOSE Edit/Preview data.**\ 838 | > :warning: **THIS IS A CRUCIAL STEP. PLEASE ENSURE YOU CHOOSE Edit/Preview data.** 839 | 840 | --- 841 | --- 842 | 843 | ## Generating new calculated fields in Amazon QuickSight 844 | 845 | Now that you have configured the Amazon S3 permission and the data source in Amazon QuickSight, in this section you will generated following additional fields - HourOfDay, EdgeToOriginTimeTaken, TotalTimeTakenAtALB. 846 | 847 | ### Create new calculated fields “EdgeToOriginTimeTaken” in Amazon QuickSight 848 | 849 | > **Formula:**\ 850 | > EdgeToOriginTimeTaken = timetaken - target_processing_time + response_processing_time + request_processing_time\ 851 | >       = timetaken, when target_processing_time = null i.e. response was served by Amazon CloudFront\ 852 | >       = 0, when (target_processing_time || response_processing_time || request_processing_time) == -1 (request timeout) 853 | 854 | |Field Name|Description|type 855 | |---|----|---| 856 | |timetaken|The number of seconds (to the thousandth of a second, for example, 0.002) between the time that a CloudFront edge server receives a viewer's request and the time that CloudFront writes the last byte of the response to the edge server's output queue as measured on the server. From the perspective of the viewer, the total time to get the full object will be longer than this value due to network latency and TCP buffering.|double| 857 | |request_processing_time|The total time elapsed (in seconds, with millisecond precision) from the time the load balancer received the request until the time it sent it to a target. This value is set to -1 if the load balancer can't dispatch the request to a target. This can happen if the target closes the connection before the idle timeout or if the client sends a malformed request. This value can also be set to -1 if the registered target does not respond before the idle timeout.| double| 858 | |target_processing_time|The total time elapsed (in seconds, with millisecond precision) from the time the load balancer sent the request to a target until the target started to send the response headers. This value is set to -1 if the load balancer can't dispatch the request to a target. This can happen if the target closes the connection before the idle timeout or if the client sends a malformed request. This value can also be set to -1 if the registered target does not respond before the idle timeout. |double| 859 | |response_processing_time|The total time elapsed (in seconds, with millisecond precision) from the time the load balancer received the response header from the target until it started to send the response to the client. This includes both the queuing time at the load balancer and the connection acquisition time from the load balancer to the client. This value is set to -1 if the load balancer can't send the request to a target. This can happen if the target closes the connection before the idle timeout or if the client sends a malformed request. |double| 860 | 861 | - Open the AWS Management console for Amazon QuickSight from [here](https://eu-west-1.quicksight.aws.amazon.com/sn/start) 862 | - Under **Fields** on the left column, click **Add calculated field** 863 | 864 | ![quicksight-new-field.png](./assets/quicksight-new-field.png) 865 | 866 | - In the **Add calculated field** pop up page, type **EdgeToOriginTimeTaken** under **Calculated field name** 867 | - Copy and paste the formula below in the **Formula** text box 868 | 869 | ```$xslt 870 | ifelse(isNull(target_processing_time), {timetaken}, ifelse(target_processing_time = -1 or response_processing_time = -1 or request_processing_time = -1, 0, {timetaken} - {target_processing_time} + {response_processing_time} +{request_processing_time})) 871 | ``` 872 | 873 | - Click **Create** 874 | - Ensure that **#EdgeToOriginTimeTaken** appears under *Calculated fields* 875 | 876 | --- 877 | 878 | ### Create new calculated fields "HourOfDay" in Amazon QuickSight 879 | 880 | > ** Formula:**\ 881 | > HourofDay = extract("HH",{time}) 882 | 883 | |Field Name|Description|type 884 | |---|----|---| 885 | |time|The time when the CloudFront server finished responding to the request (in UTC), for example, 01:42:39|timestamp| 886 | 887 | 888 | - Under **Fields** on the left column, click **Add calculated field** 889 | - In the **Add calculated field** pop up page, type **HourOfDay** under **Calculated field name** 890 | - Copy and paste the formula below in the **Formula** text box 891 | 892 | ```$xslt 893 | extract("HH",{time}) 894 | ``` 895 | 896 | - Click **Create** 897 | - Ensure that **#HourOfDay** *appears under **Calculated fields** 898 | 899 | --- 900 | 901 | ### Create new calculated fields "TotalTimeTakenAtALB" in Amazon QuickSight 902 | 903 | > **Formula**\ 904 | > TotalTimeTakenAtALB = target_processing_time + response_processing_time + request_processing_time\ 905 | >        = 0, when target_processing_time = null i.e. response was served by Amazon CloudFront\ 906 | >        = 0, when (target_processing_time || response_processing_time || request_processing_time) == -1 (request timeout) 907 | 908 | |Field Name|Description|type 909 | |---|----|---| 910 | |timetaken|The number of seconds (to the thousandth of a second, for example, 0.002) between the time that a CloudFront edge server receives a viewer's request and the time that CloudFront writes the last byte of the response to the edge server's output queue as measured on the server. From the perspective of the viewer, the total time to get the full object will be longer than this value due to network latency and TCP buffering.|double| 911 | |request_processing_time|The total time elapsed (in seconds, with millisecond precision) from the time the load balancer received the request until the time it sent it to a target. This value is set to -1 if the load balancer can't dispatch the request to a target. This can happen if the target closes the connection before the idle timeout or if the client sends a malformed request. This value can also be set to -1 if the registered target does not respond before the idle timeout.| double| 912 | |target_processing_time|The total time elapsed (in seconds, with millisecond precision) from the time the load balancer sent the request to a target until the target started to send the response headers. This value is set to -1 if the load balancer can't dispatch the request to a target. This can happen if the target closes the connection before the idle timeout or if the client sends a malformed request. This value can also be set to -1 if the registered target does not respond before the idle timeout. |double| 913 | |response_processing_time|The total time elapsed (in seconds, with millisecond precision) from the time the load balancer received the response header from the target until it started to send the response to the client. This includes both the queuing time at the load balancer and the connection acquisition time from the load balancer to the client. This value is set to -1 if the load balancer can't send the request to a target. This can happen if the target closes the connection before the idle timeout or if the client sends a malformed request. |double| 914 | 915 | - Under **Fields** on the left column, click **Add calculated field** 916 | - In the **Add calculated field** pop up page, type **TotalTimeTakenAtALB** under **Calculated field name** 917 | - Copy and paste the formula below in the **Formula** text box 918 | 919 | ```$xslt 920 | ifelse(isNull(target_processing_time), 0, ifelse(target_processing_time = -1 or response_processing_time = -1 or request_processing_time = -1, 0, {target_processing_time} + {response_processing_time} +{request_processing_time})) 921 | ``` 922 | - Click **Create** 923 | - Ensure that **#TotatlTimeTakenAtALB** appears under **Calculated fields** 924 | - Click on **Save & visualize** on the top of the page 925 | 926 | --- 927 | --- 928 | 929 | ## Generate visualization using Amazon QuickSight 930 | 931 | ![quicksight-visualization-all.png](./assets/quicksight-visualization-all.png) 932 | 933 | Now that you have configure Athena as the data source to query the combined logs directly from Amazon S3 and created additional fields in Amazon QuickSight, you are ready to generation visualization for the following use cases: 934 | * Status code count by Amazon CloudFront Edge / PoP 935 | * Status code count by user requested URI 936 | * Time taken (averaged over hour) from Amazon CloudFront edge to origin (located in AWS region us-east-1 (N. Virginia)) by country where the user request originated from 937 | * Total time taken (averaged over hour) Vs. time taken (averaged over hour) from Amazon CloudFront edge to origin (located in AWS region us-east-1 (N. Virginia)) Vs. total server-side processing time (averaged over hour) for a country where the user request originated from 938 | * Count of product category request by the country where the request originated from 939 | * Ratio of device form factors used to browse globally or for a country where the user request originated from 940 | 941 | ### Generate visualization to status code count by Amazon CloudFront Edge/PoP 942 | 943 | ![quicksight-status-code-pop.png](./assets/quicksight-status-code-pop.png) 944 | 945 | **Use case:** HTTP Status Codes (3xx, 4xx, 5xx) error code by Edge/PoP location can provide insight in troubleshooting issue (such as connectivity etc.) 946 | 947 | - Ensure that the selected region is **Ireland** in top right corner 948 | - Click the **Filter** icon in the QuickSight navigation pane 949 | - Click on **+** symbol next to **Applied Filters** 950 | - Select **day** field in the pop up menu 951 | 952 | ![quicksight-status-code-pop-filter.png](./assets/quicksight-status-code-pop-filter.png) 953 | 954 | - Choose the new filter that you just created by clicking on filter name **day** 955 | - Wait for QuickSight to load the filter values and then check the box next to one of the values for the day field *(e.g. 4)* 956 | - Click **Apply** 957 | - Click **Close** 958 | - Click again on **+** symbol next to **Applied Filters** to add another filter 959 | - Select **HourOfDay** field in the pop up menu 960 | - Choose the new filter that you just created by clicking on filter name **HourOfDay** 961 | - Wait for QuickSight to load the filter values and then check the box next to one of the values for the day field *(e.g. 0)* 962 | - Click **Apply** 963 | - Click **Close** 964 | 965 | ![quicksight-status-code-filter-summary.png](./assets/quicksight-status-code-filter-summary.png) 966 | 967 | - Click the **Visualize** icon in the QuickSight navigation pane 968 | 969 | ![quicksight-status-code-visualize-1.png](./assets/quicksight-status-code-visualize-1.png) 970 | 971 | - Select the **Horizontal bar chart** under **Visual types** 972 | - Drag and drop the **#status** field into the **Y axis** in the **Field wells** section on the top 973 | - Drag and drop the **location** field into the **Group/Color** in the **Field wells** section 974 | - Click on the drop down arrow next to **status** in the y-axis of the chart to reveal a sub menu. 975 | - Click on the Ascending order for **Sort by** in the revealed menu 976 | - Edit the title by click on the title in the chart to **Status code by Edge Location**(optional) 977 | 978 | --- 979 | 980 | ### (Optional) Generate visualization to status code count by request URI 981 | 982 |
983 | CLICK TO EXPAND FOR OPTIONAL SECTION 984 | 985 | **Use case:** HTTP Status Codes (3xx, 4xx, 5xx) error code by URI can provide insight into troubleshooting issue (such as 404- page not found etc.) 986 | 987 | 988 | - Drag and drop the **uri** field into the **Group/Color** in the **Field wells** section. 989 | 990 | > :warning: While dragging and and dropping multiple fields do not replace the existing field but drop the field on top 991 | 992 | - Click on the drop down arrow next to **status** in the y-axis of the chart to reveal a sub menu. 993 | - Click on the Ascending order for **Sort by** in the revealed menu 994 | - Edit the title by click on the title in the chart to **Status code by URI**(optional) 995 | 996 | ![quicksight-status-code-visualize-2.png](./assets/quicksight-status-code-visualize-2.png) 997 | 998 |
999 | 1000 | --- 1001 | 1002 | ### Generate visualization to show hourly average time taken between edge and origin by country where the end user request originated from 1003 | 1004 | ![edge-to-origin-chart.png](./assets/edge-to-origin-chart.png) 1005 | 1006 | **Use case:** Derive insights into edge to origin latency for your global traffic and further optimize routing 1007 | 1008 | - Ensure that the selected region is **Ireland** in top right corner 1009 | - Click on **Add** from the QuickSight menu on the top to **Add Visual** from the pop up menu 1010 | - Click the **Filter** icon in the QuickSight navigation pane 1011 | - Click on **+** symbol next to **Applied Filters** 1012 | - Select **day** field in the pop up menu 1013 | - Choose the new filter that you just created by clicking on filter name **day** 1014 | - Wait for QuickSight to load the filter values and then check the box next to one of the values for the day field *(e.g. 4)* 1015 | - Click **Apply** 1016 | - Click **Close** 1017 | - Click again on **+** symbol next to **Applied Filters** to add another filter 1018 | - Select **viewercountry** field in the pop up menu 1019 | - Choose the new filter that you just created by clicking on filter name **viewercountry** 1020 | - Select all the values **(DE, IE, IN, US)** except **NULL** 1021 | - Click **Apply** 1022 | - Click **Close** 1023 | - Click again on **+** symbol next to **Applied Filters** to add another filter 1024 | - Select **responseresulttype** field in the pop up menu 1025 | - Choose the new filter that you just created by clicking on filter name **responseresulttype** 1026 | - Select **Miss** from the list of values 1027 | - Click **Apply** 1028 | - Click **Close** 1029 | 1030 | ![edge-to-origin-filter-summary.png](./assets/edge-to-origin-filter-summary.png) 1031 | 1032 | - Click the **Visualize** icon in the QuickSight navigation pane 1033 | 1034 | ![edge-to-origin-visualize.png](./assets/edge-to-origin-visualize.png) 1035 | 1036 | - Select the Line chart under **Visual types** 1037 | - Drag and drop the **#HourofDay** field into the **X axis** in the **Field wells** section on the top 1038 | - Drag and drop the **viewercountry** field into the **Color** in the **Field wells** section 1039 | - Drag and drop the **#EdgeToOriginTimeTaken** field into the **Value** in the **Field wells** section 1040 | - Click on the down arrow next to **EdgeToOriginTimeTaken** in the Value to reveal a sub-menu 1041 | - Select **Aggregate:** and select **Average** 1042 | 1043 | ![edge-to-origin-x-axis.png](./assets/edge-to-origin-x-axis.png) 1044 | 1045 | - Click on the drop down arrow next to **HourOfDay** in the x-axis of the chart to reveal a sub menu. 1046 | - Click on the Ascending order next to **HourOfDay** under **Sort by** in the revealed menu 1047 | - Edit the title by click on the title in the chart to **Hourly Avg. for time taken from edge to origin by end user country** (optional) 1048 | 1049 | --- 1050 | 1051 | ### Generate visualization to show hourly average time taken (total Vs. edge to origin Vs. server-side processing) by country where the end user request originated from 1052 | 1053 | **Use case:** Troubleshoot latency issue at various stages of a request-response pipeline 1054 | 1055 | ![time-taken-chart.png](./assets/time-taken-chart.png) 1056 | 1057 | - Click on **Add** from the QuickSight menu on the top to **Add Visual** from the pop up menu 1058 | - Ensure that the selected region is **Ireland** in top right corner 1059 | - Click the **Filter** icon in the QuickSight navigation pane 1060 | - Click on **+** symbol next to **Applied Filters** 1061 | - Select **day** field in the pop up menu 1062 | - Choose the new filter that you just created by clicking on filter name **day** 1063 | - Wait for QuickSight to load the filter values and then check the box next to one of the values for the day field (*e.g. 4 same day that you selected in the previous chart*) 1064 | - Click **Apply** 1065 | - Click **Close** 1066 | - Click again on **+** symbol next to **Applied Filters** to add another filter 1067 | - Select **viewercountry** field in the pop up menu 1068 | - Choose the new filter that you just created by clicking on filter name **viewercountry** 1069 | - Select one of the values (e.g. **US**) except **DE** 1070 | - Click **Apply** 1071 | - Click **Close** 1072 | - Click again on **+** symbol next to **Applied Filters** to add another filter 1073 | - Select **responseresulttype** field in the pop up menu 1074 | - Choose the new filter that you just created by clicking on filter name **responseresulttype** 1075 | - Select **Miss** from the list of values 1076 | - Click **Apply** 1077 | - Click **Close** 1078 | 1079 | ![time-taken-filter-summary.png](./assets/time-taken-filter-summary.png) 1080 | 1081 | - Click the **Visualize** icon in the QuickSight navigation pane 1082 | 1083 | ![time-taken-visualize.png](./assets/time-taken-visualize.png) 1084 | 1085 | - Select the Line chart under **Visual types** 1086 | - Drag and drop the **#HourofDay**field into the **X axis** in the **Field wells** section on the top 1087 | - Drag and drop the **#EdgeToOriginTimeTaken** field into the **Value** in the **Field wells** section 1088 | - Click on the down arrow next to **EdgeToOriginTimeTaken** in the Value to reveal a sub-menu 1089 | - Select **Aggregate:** and select **Average** 1090 | - Drag and drop the **#TotalTimeTakenAtALB**field into the **Value** in the **Field wells** section 1091 | - Click on the down arrow next to **#TotalTimeTakenAtALB** in the Value to reveal a sub-menu 1092 | - Select **Aggregate:** and select **Average** 1093 | - Drag and drop the **#timetaken** field into the **Value** in the **Field wells** section 1094 | - Click on the down arrow next to **#timetaken** in the Value to reveal a sub-menu 1095 | - Select **Aggregate:** and select **Average** 1096 | - Click on the drop down arrow next to **HourOfDay** in the x-axis of the chart to reveal a sub menu. 1097 | - Click on the Ascending order next to **HourOfDay** under **Sort by** in the revealed menu 1098 | - Edit the title by click on the title in the chart to **Hourly Avg. time taken (total Vs. edge to origin Vs. server-side processing) by end user country** (optional) 1099 | 1100 | --- 1101 | 1102 | ### (Optional) Generate visualization to show hourly average time taken (total Vs. edge to origin V.s server-side processing) by country where the end user request originated from for a different viewer country 1103 | 1104 |
1105 | CLICK TO EXPAND FOR OPTIONAL SECTION 1106 | 1107 | ![time-taken-visualize-2.png](./assets/time-taken-visualize-2.png) 1108 | 1109 | **Use case:** Troubleshoot latency issues at various stages of a request-response pipeline 1110 | 1111 | - Click the **Filter** icon in the QuickSight navigation pane 1112 | - Choose the new filter that you just created by clicking on filter name **viewercountry** 1113 | - Select one of the values (e.g. **IN**) except **DE** 1114 | - Click **Apply** 1115 | - Click **Close** 1116 | - Click the **Visualize** icon in the QuickSight navigation pane 1117 | 1118 |
1119 | 1120 | --- 1121 | 1122 | ### Generate Visualization to show product category request by country 1123 | 1124 | ![product-category-chart.png](./assets/product-category-chart.png) 1125 | 1126 | **Use case:** Based on the popular attributes (e.g. product categories) that your customers are requesting for you can prioritize and optimize latency by caching the pages for these popular categories or for delivering ad impression besides various other business insight your can derive in terms of inventory management etc. 1127 | 1128 | - Click on **Add** from the QuickSight menu on the top to **Add Visual** from the pop up menu 1129 | - Ensure that the selected region is **Ireland** in top right corner 1130 | - Click the **Filter** icon in the QuickSight navigation pane 1131 | - Click again on **+** symbol next to **Applied Filters** to add another filter 1132 | - Select **requestdata** field in the pop up menu 1133 | 1134 | ![product-category-filter.png](./assets/product-category-filter.png) 1135 | 1136 | - Choose the new filter that you just created by clicking on filter name **requestdata** 1137 | - Choose **Custom filter** from the drop down for **Filter type** 1138 | - For the second drop down under **Filter type** choose **Does not equal** 1139 | - Type *null* in the text box. 1140 | - Click **Apply** 1141 | - Click **Close** 1142 | - Click again on **+** symbol next to **Applied Filters** to add another filter 1143 | - Select **viewercountry** field in the pop up menu 1144 | - Choose the new filter that you just created by clicking on filter name **viewercountry** 1145 | - Select all the values **(DE, IE, IN, US)** except **NULL** 1146 | - Click **Apply** 1147 | - Click **Close** 1148 | - Click the **Visualize** icon in the QuickSight navigation pane 1149 | 1150 | ![product-category-sort.png](./assets/product-category-sort.png) 1151 | 1152 | - Select the **Horizontal bar chart** under **Visual types** 1153 | - Drag and drop the **requestdata** field into the **Y axis** in the **Field wells** section on the top 1154 | - Drag and drop the **viewercountry** field into **Group/Color** in the **Field wells** section 1155 | - Click on the drop down arrow next to **requestdata** in the y-axis of the chart to reveal a sub menu. 1156 | - Click on the Ascending order for **Sort by** in the revealed menu 1157 | - Edit the title by click on the title in the chart to **Count of product category by end user country**(optional) 1158 | 1159 | --- 1160 | 1161 | ### (Optional) Generate visualization to show device form factor ratio 1162 | 1163 |
1164 | CLICK TO EXPAND FOR OPTIONAL SECTION 1165 | 1166 | ![device-form-factor-chart.png](./assets/device-form-factor-chart.png) 1167 | 1168 | **Use case:** Based on the popular device form factor(s) that your global customers are using to browse your website you can prioritize customization and optimization of your content on those form factor(s) 1169 | 1170 | - Click on **Add** from the QuickSight menu on the top to **Add Visual** from the pop up menu 1171 | - Ensure that the selected region is **Ireland** in top right corner 1172 | - Click the **Filter** icon in the QuickSight navigation pane 1173 | - Click on **+** symbol next to **Applied Filters** 1174 | - Select **deviceformfactor** field in the pop up menu 1175 | - Choose the new filter that you just created by clicking on filter name **deviceformfactor** 1176 | - Choose **Custom filter** from the drop down for **Filter type** 1177 | - For the second drop down under **Filter type** choose **Does not equal** 1178 | - Type *null* in the text box. 1179 | - Click **Apply** 1180 | - Click **Close** 1181 | 1182 | ![device-form-factor-visualize.png](./assets/device-form-factor-visualize.png) 1183 | 1184 | - Click the **Visualize** icon in the QuickSight navigation pane 1185 | - Select the **Pie chart** under **Visual types** 1186 | - Drag and drop the **deviceformfactor** field into **Group/Color** in the **Field wells** section 1187 | - Edit the title by click on the title in the chart to **Device form factor Ratio** (optional) 1188 | 1189 |
1190 | 1191 | --- 1192 | 1193 | ### (Optional) Generate visualization to show device form factor ration by viewer country 1194 | 1195 |
1196 | CLICK TO EXPAND FOR OPTIONAL SECTION 1197 | 1198 | ![device-form-factor-visualize-2.png](./assets/device-form-factor-visualize-2.png) 1199 | 1200 | **Use case:** Based on the popular device form factor(s) that customers in a particular region or country are using to browse your website you can prioritize customization and optimization of your content on those form factor(s) 1201 | 1202 | - Click the **Filter** icon in the QuickSight navigation pane 1203 | - Click on **+** symbol next to Applied Filters 1204 | - Choose the new filter that you just created by clicking on filter name **viewercountry** 1205 | - Select one of the values (e.g. **IN**) except **DE** 1206 | - Click **Apply** 1207 | - Click **Close** 1208 | - Click the **Visualize** icon in the QuickSight navigation pane 1209 | 1210 |
1211 | 1212 | --- 1213 | --- 1214 | 1215 | ## License Summary 1216 | 1217 | This sample code is made available under a modified MIT license. See the LICENSE file. -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/alb-access-optimized.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/alb-access-optimized.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/amazon-s3-create-bucket.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/amazon-s3-create-bucket.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/amazon-s3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/amazon-s3.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/architecture-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/architecture-diagram.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/architecture-overview-all.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/architecture-overview-all.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/assets.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/assets.txt -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/athena-database.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/athena-database.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/athena-table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/athena-table.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/cf-access-optimized.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/cf-access-optimized.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/combine-schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/combine-schema.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/combined-logs-all.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/combined-logs-all.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/device-form-factor-chart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/device-form-factor-chart.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/device-form-factor-visualize-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/device-form-factor-visualize-2.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/device-form-factor-visualize.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/device-form-factor-visualize.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/edge-to-origin-chart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/edge-to-origin-chart.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/edge-to-origin-filter-summary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/edge-to-origin-filter-summary.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/edge-to-origin-filter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/edge-to-origin-filter.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/edge-to-origin-visualize.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/edge-to-origin-visualize.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/edge-to-origin-x-axis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/edge-to-origin-x-axis.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/glue-job-complete.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/glue-job-complete.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/lambda-edge.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/lambda-edge.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/le-combined-logs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/le-combined-logs.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/log-collection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/log-collection.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/origin-request-optimized.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/origin-request-optimized.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/product-category-chart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/product-category-chart.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/product-category-filter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/product-category-filter.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/product-category-sort.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/product-category-sort.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/quicksight-account-create.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-account-create.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/quicksight-athena-ds.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-athena-ds.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/quicksight-datasource.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-datasource.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/quicksight-edition.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-edition.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/quicksight-manage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-manage.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/quicksight-new-field.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-new-field.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/quicksight-permission.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-permission.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/quicksight-region-selection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-region-selection.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/quicksight-s3-bucket-selection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-s3-bucket-selection.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/quicksight-signup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-signup.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/quicksight-status-code-filter-summary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-status-code-filter-summary.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/quicksight-status-code-pop-filter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-status-code-pop-filter.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/quicksight-status-code-pop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-status-code-pop.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/quicksight-status-code-visualize-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-status-code-visualize-1.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/quicksight-status-code-visualize-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-status-code-visualize-2.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/quicksight-table-selection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-table-selection.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/quicksight-visualization-all.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-visualization-all.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/time-taken-chart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/time-taken-chart.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/time-taken-filter-summary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/time-taken-filter-summary.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/time-taken-visualize-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/time-taken-visualize-2.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/time-taken-visualize.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/time-taken-visualize.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/assets/viewer-request-optimized.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/viewer-request-optimized.png -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/lelogconverter.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from awsglue.transforms import * 3 | from awsglue.utils import getResolvedOptions 4 | from pyspark.context import SparkContext 5 | from awsglue.context import GlueContext 6 | from awsglue.job import Job 7 | 8 | ## @params: [JOB_NAME] 9 | args = getResolvedOptions(sys.argv, ['JOB_NAME']) 10 | 11 | sc = SparkContext() 12 | glueContext = GlueContext(sc) 13 | spark = glueContext.spark_session 14 | job = Job(glueContext) 15 | job.init(args['JOB_NAME'], args) 16 | 17 | ################################################################################################################## 18 | # VIEWER REQUEST LAMBDA@EDGE LOGS - Conversion from JSon to Parquet format partitioned by year, month, date hour # 19 | ################################################################################################################## 20 | ## Create dyanmaic frame from raw(Json format) viewer request Lambda@Edge logs as the datasource. Glue Data Catalog = {database = "reInvent2018_aws_service_logs", table_name = "le_log_raw_viewer_request"} 21 | viewerRequestLELog = glueContext.create_dynamic_frame.from_catalog(database = "reInvent2018_aws_service_logs", table_name = "le_log_raw_viewer_request", transformation_ctx = "viewerRequestLELog") 22 | 23 | ## Map the viewer request Lambda@Edge logs to target format 24 | mappedViewerRequestLELog = ApplyMapping.apply(frame = viewerRequestLELog, mappings = [("executionregion", "string", "executionregion", "string"), ("requestid", "string", "requestid", "string"), ("distributionid", "string", "distributionid", "string"), ("distributionname", "string", "distributionname", "string"), ("eventtype", "string", "eventtype", "string"), ("requestdata", "string", "requestdata", "string"), ("customtraceid", "string", "customtraceid", "string"), ("useragentstring", "string", "useragentstring", "string"), ("partition_0", "string", "year", "string"), ("partition_1", "string", "month", "string"), ("partition_2", "string", "date", "string"), ("partition_3", "string", "hour", "string")], transformation_ctx = "mappedViewerRequestLELog") 25 | 26 | ## Resolves a choice type within a DynamicFrame 27 | resolvedViewerRequestLELog = ResolveChoice.apply(frame = mappedViewerRequestLELog, choice = "make_struct", transformation_ctx = "resolvedViewerRequestLELog") 28 | 29 | ## Drops all null fields in a DynamicFrame whose type is NullType 30 | cleanedViewerRequestLELog = DropNullFields.apply(frame = resolvedViewerRequestLELog, transformation_ctx = "cleanedViewerRequestLELog") 31 | 32 | ## Write the viewer request Lambda@Edge logs to the S3 path(s3://cf-log-bucket-lab/converted/lelogs/viewer-request) in the optimized (Parquet) format partitioned by year, month, date hour 33 | viewerRequestLELogSink = glueContext.write_dynamic_frame.from_options(frame = cleanedViewerRequestLELog, connection_type = "s3", connection_options = {"path": "s3://us-east-1.data-analytics/cflogworkshop/optimized/lelogs/viewer-request", "partitionKeys": ["year", "month", "date", "hour"]}, format = "parquet", transformation_ctx = "viewerRequestLELogSink") 34 | 35 | 36 | ################################################################################################################## 37 | # ORIGIN REQUEST LAMBDA@EDGE LOGS - Conversion from JSon to Parquet format partitioned by year, month, date hour # 38 | ################################################################################################################## 39 | ## Create dyanmaic frame from raw(Json format) origin request Lambda@Edge logs as the datasource. Glue Data Catalog = {database = "reInvent2018_aws_service_logs", table_name = "le_log_raw_origin_request"} 40 | originRequestLELog = glueContext.create_dynamic_frame.from_catalog(database = "reInvent2018_aws_service_logs", table_name = "le_log_raw_origin_request", transformation_ctx = "originRequestLELog") 41 | 42 | ## Map the origin request Lambda@Edge logs to target format 43 | mappedOriginRequestLELog = ApplyMapping.apply(frame = originRequestLELog, mappings = [("executionregion", "string", "executionregion", "string"), ("requestid", "string", "requestid", "string"), ("distributionid", "string", "distributionid", "string"), ("distributionname", "string", "distributionname", "string"), ("eventtype", "string", "eventtype", "string"), ("requestdata", "string", "requestdata", "string"), ("viewercountry", "string", "viewercountry", "string"), ("deviceformfactor", "string", "deviceformfactor", "string"), ("customtraceid", "string", "customtraceid", "string"), ("partition_0", "string", "year", "string"), ("partition_1", "string", "month", "string"), ("partition_2", "string", "date", "string"), ("partition_3", "string", "hour", "string")], transformation_ctx = "mappedOriginRequestLELog") 44 | 45 | ## Resolves a choice type within a DynamicFrame 46 | resolvedOriginRequestLELog = ResolveChoice.apply(frame = mappedOriginRequestLELog, choice = "make_struct", transformation_ctx = "resolvedOriginRequestLELog") 47 | 48 | ## Drops all null fields in a DynamicFrame whose type is NullType 49 | cleanedOriginRequestLELog = DropNullFields.apply(frame = resolvedOriginRequestLELog, transformation_ctx = "cleanedOriginRequestLELog") 50 | 51 | ## Write the origin request Lambda@Edge logs to the S3 path(s3://cf-log-bucket-lab/converted/lelogs/origin-request) in the optimized (Parquet) format partitioned by year, month, date hour 52 | originRequestLELogSink = glueContext.write_dynamic_frame.from_options(frame = cleanedOriginRequestLELog, connection_type = "s3", connection_options = {"path": "s3://us-east-1.data-analytics/cflogworkshop/optimized/lelogs/origin-request", "partitionKeys": ["year", "month", "date", "hour"]}, format = "parquet", transformation_ctx = "originRequestLELogSink") 53 | 54 | job.commit() -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/log-combiner-glue-script.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from awsglue.transforms import * 3 | from awsglue.utils import getResolvedOptions 4 | from pyspark.context import SparkContext 5 | from pyspark.sql.functions import split 6 | from awsglue.context import GlueContext 7 | from awsglue.dynamicframe import DynamicFrame 8 | from awsglue.job import Job 9 | 10 | ## @params: [JOB_NAME] 11 | args = getResolvedOptions(sys.argv, ['JOB_NAME', 'target_s3_bucket']) 12 | 13 | sc = SparkContext() 14 | glueContext = GlueContext(sc) 15 | spark = glueContext.spark_session 16 | job = Job(glueContext) 17 | job.init(args['JOB_NAME'], args) 18 | 19 | ################################################################ 20 | # Combining Lambda@Edge Logs -[origin-request, viewer-request] # 21 | ################################################################ 22 | 23 | ## Create dyanmaic frame from optimized(Parquet format) Amazon Lambda@Edge viewer request logs as the datasource. Glue Data Catalog = {database = "reInvent2018_aws_service_logs", table_name = "lambdaedge_logs_viewer_request_optimized"} 24 | labdaEdgeViewerRequestLogs = glueContext.create_dynamic_frame.from_catalog(database = "reInvent2018_aws_service_logs", table_name = "lambdaedge_logs_viewer_request_optimized", transformation_ctx = "labdaEdgeViewerRequest") 25 | 26 | ## Drop the fields that are duplicate between Lambda@Edge viewer request logs and Lambda@Edge origin request logs 27 | modifiedLEViewerRequestLogs = DropFields.apply(frame = labdaEdgeViewerRequestLogs, paths=["eventtype"], transformation_ctx ="modifiedLEViewerRequestLogs") 28 | 29 | ## Create dyanmaic frame from optimized(Parquet format) Amazon Lambda@Edge origin request logs as the datasource. Glue Data Catalog = {database = "reInvent2018_aws_service_logs", table_name = "lambdaedge_logs_viewer_origin_optimized"} 30 | labdaEdgeOriginRequestLogs = glueContext.create_dynamic_frame.from_catalog(database = "reInvent2018_aws_service_logs", table_name = "lambdaedge_logs_origin_request_optimized", transformation_ctx = "labdaEdgeOriginRequest") 31 | 32 | ## Drop the fields that are duplicate between Lambda@Edge viewer request logs and Lambda@Edge origin request logs 33 | trimmedLEOriginRequestLogs = DropFields.apply(frame = labdaEdgeOriginRequestLogs, paths=["executionregion", "distributionid", "distributionname", "requestdata", "customtraceid", "eventtype", "year", "month", "date", "hour"], transformation_ctx ="trimmedLEOriginRequestLogs") 34 | 35 | ## Rename the requestid field for Lambda@Edge origin request logs to origin requestid 36 | modifiedLEOriginRequestLogs = RenameField.apply(frame = trimmedLEOriginRequestLogs, old_name = "requestid", new_name = "origin_requestid", transformation_ctx ="modifiedLEOriginRequestLogs" ) 37 | 38 | ## Convert to DataFrame 39 | modifiedLEOriginRequestLogsDF = modifiedLEOriginRequestLogs.toDF() 40 | 41 | ## Convert to DataFrame 42 | modifiedLEViewerRequestLogsDF = modifiedLEViewerRequestLogs.toDF() 43 | 44 | ## Join(left outer join) the Lambda@Edge viewer-request logs with the origin-request logs based on the requestid 45 | combinedLambdaEdgeLogsDF = modifiedLEViewerRequestLogsDF.join(modifiedLEOriginRequestLogsDF, modifiedLEViewerRequestLogsDF["requestid"] == modifiedLEOriginRequestLogsDF["origin_requestid"], "left_outer") 46 | 47 | ## Convert to DynamicFrame 48 | combinedLambdaEdgeLogs = DynamicFrame.fromDF(combinedLambdaEdgeLogsDF, glueContext, "combinedLambdaEdgeLogs") 49 | 50 | ## Join the Lambda@Edge viewer-request logs with the origin-request logs based on the requestid 51 | #combinedLambdaEdgeLogs = Join.apply(modifiedLEViewerRequestLogs, modifiedLEOriginRequestLogs, 'requestid', 'origin_requestid') 52 | 53 | ## Drop the origin_requestid field 54 | lambdaEdgeLogs = DropFields.apply(frame = combinedLambdaEdgeLogs, paths=["origin_requestid"], transformation_ctx ="lambdaEdgeLogs") 55 | 56 | ## Drop the "year", "month", "date", "hour" fields 57 | trimmedLambdaEdgeLogs = DropFields.apply(frame =lambdaEdgeLogs, paths=["year", "month", "date", "hour", "useragentstring"], transformation_ctx ="trimmedLambdaEdgeLogs") 58 | 59 | ## Convert to DataFrame 60 | trimmedLambdaEdgeLogsDF = trimmedLambdaEdgeLogs.toDF() 61 | 62 | #Destnation S3 loaction for combine Lambda@Edge logs 63 | leLogDestPath = "s3://" + args['target_s3_bucket'] + "/combined/lelogs" 64 | 65 | ## Write the combined Lambda@Edge logs to S3 (s3:///combined/lelogs) in optimized Parquet format partitioned by year, month, date, hour 66 | lambdaEdgeLogsSink = glueContext.write_dynamic_frame.from_options(frame = lambdaEdgeLogs, connection_type = "s3", connection_options = {"path": leLogDestPath, "partitionKeys": ["year", "month", "date", "hour"]}, format = "parquet", transformation_ctx = "lambdaEdgeLogsSink") 67 | 68 | ######################################################################## 69 | # Combining Lambda@Edge Logs , CloudFront Access Logs, ALB Access Logs # 70 | ######################################################################## 71 | 72 | ## Create dyanmaic frame from optimized(Parquet format) Amazon CloudFront access logs as the datasource. Glue Data Catalog = {database = "reInvent2018_aws_service_logs", table_name = "cf_access_optimized"} 73 | cfLog = glueContext.create_dynamic_frame.from_catalog(database = "reInvent2018_aws_service_logs", table_name = "cf_access_optimized", transformation_ctx = "cfLog") 74 | 75 | ## Rename the requestid field in the ALB logs to cf_requestid 76 | modifiedCFLogs = RenameField.apply(frame = cfLog, old_name = "requestid", new_name = "cf_requestid", transformation_ctx ="modifiedCFLogs" ) 77 | 78 | ## Convert to DataFrame 79 | modifiedCFLogsDF = modifiedCFLogs.toDF() 80 | 81 | ## Create dyanmaic frame from optimized(Parquet format) Application Loadbalancer logs as the datasource. Glue Data Catalog = {database = "reInvent2018_aws_service_logs", table_name = "alb_access_optimized"} 82 | albLogs = glueContext.create_dynamic_frame.from_catalog(database = "reInvent2018_aws_service_logs", table_name = "alb_access_optimized", transformation_ctx = "albLog") 83 | 84 | ## Drop the "year", "month", "day", "hour" fields 85 | trimmedALBLogs = DropFields.apply(frame = albLogs, paths=["year", "month", "day", "hour"], transformation_ctx ="trimmedALBLogs") 86 | 87 | ## Rename the time field in the ALB logs to alb_time 88 | modifiedALBLogs = RenameField.apply(frame = trimmedALBLogs, old_name = "time", new_name = "alb_time", transformation_ctx ="modifiedALBLogs" ) 89 | 90 | ## Convert ALB Log dynamic frame to Apache Spark data frame 91 | modfiedALBLogDF = modifiedALBLogs.toDF() 92 | 93 | ## Extract the custom trace id from the albLog coloumn name trace_id in the alb logs, as the Application Load Balancer would have updated the trace_id value with the self field 94 | split_col = split(modfiedALBLogDF['trace_id'], ';') 95 | finalALBLogDF = modfiedALBLogDF.withColumn("custom_trace_id", split_col.getItem(1)) 96 | 97 | ## Join(let outer join) the Lambda@Edge logs with the ALB logs based on the custom trace id 98 | leALBCombinedLogsDF = trimmedLambdaEdgeLogsDF.join(finalALBLogDF, trimmedLambdaEdgeLogsDF["customtraceid"] == finalALBLogDF["custom_trace_id"], "left_outer") 99 | 100 | ## Join(let outer join) the CloudFront access logs with the combine Lambda@Edge and ALB logs based on the requestid 101 | combinedLogsDF = modifiedCFLogsDF.join(leALBCombinedLogsDF, modifiedCFLogsDF["cf_requestid"] == leALBCombinedLogsDF["requestid"], "left_outer") 102 | 103 | ## Convert the ALB Log data frame to dynamic frame 104 | combinedLogs = DynamicFrame.fromDF(combinedLogsDF, glueContext, "combinedLogs") 105 | 106 | ## Drop custom trace id and requestid from combined logs 107 | finalCombinedLogs = DropFields.apply(frame = combinedLogs, paths=["custom_trace_id", "cf_requestid"], transformation_ctx ="finalCombinedLogs") 108 | 109 | #Destnation S3 loaction for combine logs 110 | logDestPath = "s3://" + args['target_s3_bucket'] + "/combined/logs" 111 | 112 | ## Write the combined Lambda@Edge logs to S3 (s3:///combined/lelogs) in optimized Parquet format partitioned by year, month, day 113 | finalCombinedLogsSink = glueContext.write_dynamic_frame.from_options(frame = finalCombinedLogs, connection_type = "s3", connection_options = {"path": logDestPath, "partitionKeys": ["year", "month", "day"]}, format = "parquet", transformation_ctx = "finalCombinedLogsSink") 114 | 115 | job.commit() -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/originRequest-Lambda/index.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const AWS = require('aws-sdk'); 4 | const firehose = new AWS.Firehose({region: ''}); 5 | 6 | const streamName = "" 7 | console.log("StreamName: ", streamName); 8 | 9 | function parseRequest(eventData) { 10 | 11 | var parsedJson = {}; 12 | 13 | parsedJson.executionRegion = process.env.AWS_REGION; 14 | parsedJson.requestId = null; 15 | parsedJson.distributionId = eventData.config.distributionId; 16 | parsedJson.distributionName = eventData.config.distributionDomainName; 17 | parsedJson.eventType = eventData.config.eventType; 18 | parsedJson.customTraceId = null; 19 | parsedJson.viewerCountry = "unknown"; 20 | parsedJson.deviceFormFactor = "unknown"; 21 | 22 | if(eventData.request.headers["x-request-id"]) { //check if the custom header exists 23 | parsedJson.requestId = eventData.request.headers["x-request-id"][0].value; 24 | } 25 | 26 | if(eventData.request.headers["x-my-trace-id"]) { //check if the custom header exists, this is added as part of client side instrumentation 27 | parsedJson.customTraceId = eventData.request.headers["x-my-trace-id"][0].value; 28 | } 29 | 30 | if(eventData.request.headers["cloudfront-viewer-country"]) { //check if the custom header exists, this is added by Amazon CloudFront if the headers are whitelisted 31 | parsedJson.viewerCountry = eventData.request.headers["cloudfront-viewer-country"][0].value; 32 | } 33 | 34 | if(eventData.request.headers["cloudfront-is-mobile-viewer"] && eventData.request.headers["cloudfront-is-mobile-viewer"][0].value == 'true') { //check if the custom header exists, this is added by Amazon CloudFront if the headers are whitelisted 35 | parsedJson.deviceFormFactor = "mobile"; 36 | } else if (eventData.request.headers["cloudfront-is-tablet-viewer"] && eventData.request.headers["cloudfront-is-tablet-viewer"][0].value == 'true') { 37 | parsedJson.deviceFormFactor = "tablet"; 38 | } else if (eventData.request.headers["cloudfront-is-smarttv-viewer"] && eventData.request.headers["cloudfront-is-smarttv-viewer"][0].value == 'true') { 39 | parsedJson.deviceFormFactor = "smarttv"; 40 | } else if (eventData.request.headers["cloudfront-is-desktop-viewer"] && eventData.request.headers["cloudfront-is-desktop-viewer"][0].value == 'true') { 41 | parsedJson.deviceFormFactor = "desktop"; 42 | } 43 | 44 | console.log("parsed-request : ", JSON.stringify(parsedJson, null, 2)); 45 | 46 | return parsedJson; 47 | } 48 | 49 | function sendToKinesisFirehose(logMsg, stream){ 50 | 51 | var params = { 52 | DeliveryStreamName: stream, 53 | Record: { 54 | Data: JSON.stringify(logMsg) + "\n" 55 | } 56 | }; 57 | 58 | firehose.putRecord(params, function(err, data) { 59 | if (err) console.log(err, err.stack); // an error occurred 60 | else console.log(data); // successful response 61 | }); 62 | 63 | console.log("firehosed-logmessage : ", JSON.stringify(logMsg, null, 2)); 64 | } 65 | 66 | exports.handler = (event, context, callback) => { 67 | 68 | console.log("StreamName: ", streamName); 69 | console.log("request-event: ", JSON.stringify(event, null, 2)); 70 | 71 | const request = event.Records[0].cf.request; 72 | 73 | const parsedRequestJson = parseRequest(event.Records[0].cf); 74 | sendToKinesisFirehose(parsedRequestJson, streamName); 75 | 76 | //Rejecting requests from EU(Frankfurt) with viewer country code = 'DE' 77 | if(parsedRequestJson.viewerCountry == 'DE' ){ 78 | const response = { 79 | status: '302', 80 | statusDescription: 'Found', 81 | headers: { 82 | location: [{ 83 | key: 'Location', 84 | value: 'https://' + parsedRequestJson.distributionName + '/notavailable.html', 85 | }], 86 | }, 87 | }; 88 | callback(null, response); 89 | } 90 | else { 91 | callback(null, request); 92 | } 93 | }; -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/sample-logs/raw-logs/sample-alb-logs.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/sample-logs/raw-logs/sample-alb-logs.gz -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/sample-logs/raw-logs/sample-cloudfront-access-logs.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/sample-logs/raw-logs/sample-cloudfront-access-logs.gz -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/sample-logs/raw-logs/sample-lambda-at-edge-origin-request-logs.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/sample-logs/raw-logs/sample-lambda-at-edge-origin-request-logs.gz -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/sample-logs/raw-logs/sample-lambda-at-edge-viewer-request.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/sample-logs/raw-logs/sample-lambda-at-edge-viewer-request.gz -------------------------------------------------------------------------------- /lab1-serveless-cloudfront-log-analysis/viewerRequest-Lambda/index.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const AWS = require('aws-sdk'); 4 | const firehose = new AWS.Firehose({region: ''}); 5 | 6 | const streamName = ""; 7 | console.log("StreamName: ", streamName); 8 | 9 | function sleep(delayInSeconds) { 10 | console.log("Adding Delay in Seconds: " + delayInSeconds); 11 | return new Promise(resolve => setTimeout(resolve, delayInSeconds*1000)); 12 | } 13 | 14 | function parseRequest(eventData) { 15 | 16 | var parsedJson = {}; 17 | 18 | parsedJson.executionRegion = process.env.AWS_REGION; 19 | parsedJson.requestId = eventData.config.requestId; 20 | parsedJson.distributionId = eventData.config.distributionId; 21 | parsedJson.distributionName = eventData.config.distributionDomainName; 22 | parsedJson.eventType = eventData.config.eventType; 23 | parsedJson.requestData = null; 24 | parsedJson.customTraceId = null; 25 | parsedJson.userAgentString = null; 26 | 27 | if(eventData.request.body.data) { //check if the request data is not empty, in case of the GET method this field could be empty 28 | parsedJson.requestData = Buffer.from(eventData.request.body.data, 'base64').toString(); 29 | } 30 | 31 | if(eventData.request.headers["x-my-trace-id"]) { //check if the custom header exists, this is added as part of client side instrumentation 32 | parsedJson.customTraceId = eventData.request.headers["x-my-trace-id"][0].value; 33 | } 34 | 35 | if(eventData.request.headers["user-agent"]) { //check if the custom header exists, this is added as part of client side instrumentation 36 | parsedJson.userAgentString = eventData.request.headers["user-agent"][0].value; 37 | } 38 | 39 | console.log("parsed-request : ", JSON.stringify(parsedJson, null, 2)); 40 | 41 | return parsedJson; 42 | } 43 | 44 | function sendToKinesisFirehose(logMsg, stream){ 45 | 46 | var params = { 47 | DeliveryStreamName: stream, 48 | Record: { 49 | Data: JSON.stringify(logMsg) + "\n" 50 | } 51 | }; 52 | 53 | firehose.putRecord(params, function(err, data) { 54 | if (err) console.log(err, err.stack); // an error occurred 55 | else console.log(data); // successful response 56 | }); 57 | 58 | console.log("firehosed-logmessage : ", JSON.stringify(logMsg, null, 2)); 59 | } 60 | 61 | exports.handler = (event, context, callback) => { 62 | 63 | console.log("StreamName: ", streamName); 64 | console.log("request-event: ", JSON.stringify(event, null, 2)); 65 | 66 | const requestId = event.Records[0].cf.config.requestId; 67 | const request = event.Records[0].cf.request; 68 | 69 | //Adding custom header with the requestId from cloudfront 70 | request.headers['x-request-id'] = [{ 71 | "key": "x-request-id", 72 | "value": requestId, 73 | }]; 74 | 75 | console.log("modified-request: ", JSON.stringify(request, null, 2)); 76 | 77 | const parsedRequestJson = parseRequest(event.Records[0].cf); 78 | 79 | sendToKinesisFirehose(parsedRequestJson, streamName); 80 | 81 | //Adding Edge to Origin Delay 82 | //if(Math.floor(Math.random() * (4 - 0)) == 0) { 83 | // sleep(Math.floor(Math.random() * (3 - 0))); 84 | //} 85 | 86 | //Rejecting requests based on user agent 87 | if(request.headers['user-agent'] && 88 | (request.headers['user-agent'][0].value == "" || 89 | request.headers['user-agent'][0].value == "")) { 90 | const response = { 91 | status: '302', 92 | statusDescription: 'Found', 93 | headers: { 94 | location: [{ 95 | key: 'Location', 96 | value: 'https://' + parsedRequestJson.distributionName + '/notavailable.html', 97 | }], 98 | }, 99 | }; 100 | callback(null, response); 101 | }else { 102 | callback(null, request); 103 | } 104 | }; 105 | -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Lab2: CloudFront log analysis using ELK 3 | [CloudFront access logs](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/AccessLogs.html) provide rich insights on your customer behavior. The insights gained by analysis of Amazon CloudFront access logs helps improve website availability through bot detection and mitigation, optimizing web content based on the devices and browser used to view your webpages, reducing perceived latency by caching of popular object closer to its viewer, and so on. This results in a significant improvement in the overall perceived experience for the user. 4 | 5 | In this lab, you will be building an ELK (ElasticSearch, Logstash and Kibana) stack on AWS to analyze the CloudFront access logs by loading them from Amazon S3 bucket. 6 | 7 | [Amazon Elasticsearch Service](https://aws.amazon.com/elasticsearch-service/) (Amazon ES) is a fully managed service that delivers Elasticsearch’s easy-to-use APIs and real-time capabilities along with the availability, scalability, and security required by production workloads. This service offers built-in integrations with [Kibana](https://aws.amazon.com/elasticsearch-service/kibana/), [Logstash](https://aws.amazon.com/elasticsearch-service/logstash/), and AWS services including [Amazon Kinesis Firehose](https://aws.amazon.com/kinesis/firehose/), [AWS Lambda](https://aws.amazon.com/lambda/), and [Amazon CloudWatch](https://aws.amazon.com/cloudwatch/), so that you can build log analysis solutions quickly. 8 | 9 | Logstash provides out-of-the box plugins such as [grok](https://www.elastic.co/guide/en/logstash/6.4/plugins-filters-grok.html) for filtering and enriching the data, derives [geo coordinates from Ip addresses](https://www.elastic.co/guide/en/logstash/6.4/plugins-filters-geoip.html) before ingesting the data to ElasticSearch domain. Kibana provides a broad set of visualization, filtering and aggregation options to analyze your data that is stored in ElasticSearch domain. 10 | 11 | In this lab, you will visualize CloudFront access behavior using Kibana Geo-spatial visualization options such as [Regional](https://www.elastic.co/guide/en/kibana/current/regionmap.html) and [Coordinate graphs](https://www.elastic.co/guide/en/kibana/current/tilemap.html). These maps can provide nice insights about your customer behaviour as well as latency information of your CloudFront distribution for various geo locations. 12 | 13 | Note: We will use a sample access logs generated from our demo environment. In a production scenario, you can just change the Logstash configuration to poll the logs from your S3 bucket or configure CloudFront distribution logs to deliver the bucket used in this Lab. 14 | 15 | ## High Level Architecture Overview 16 | The solution involves S3 bucket for storing CloudFront access logs, Logstash deployed on EC2, an nginx proxy on EC2 instance and an ElasticSearch domain with built-in Kibana setup. The EC2 instances will be launched in a VPC. The AWS resources will be provisioned via CloudFormation template. Amazon ElasticSearch service provides [various options](https://aws.amazon.com/blogs/security/how-to-control-access-to-your-amazon-elasticsearch-service-domain/) such as resource and identity based policies to control access to the domain. In this solution, we will be leveraging IP based policies to restrict the access to the domain to Logstash and proxy servers only. Access to Kibana [will be controlled](https://docs.aws.amazon.com/elasticsearch-service/latest/developerguide/es-kibana.html#es-kibana-access) via a proxy solution. We will be leveraging a basic http authentication for proxy service to prevent anonymous access. 17 | 18 | ![](assets/architecture.png) 19 | 20 | ## Pre-requisites 21 | This module requires: 22 | - You should have active AWS account with Administrator IAM role. 23 | 24 | ## Create a Key Pair for EC2 Instances 25 | 26 | In this task, you will need to create a key pair so that we can use this keypair to launch EC2 instances and SSH into it. The following steps outline creating a unique SSH keypair for you to use in this lab. 27 | 28 | 1. Sign into the AWS Management Console and open the Amazon EC2 console at [https://console.aws.amazon.com/ec2](https://console.aws.amazon.com/ec2). 29 | 30 | 2. In the upper-right corner of the AWS Management Console, confirm you are in the desired AWS region i.e. EU West (Ireland). 31 | 32 | 3. Click on **Key Pairs** in the NETWORK & SECURITY section near the bottom of the leftmost menu. This will display a page to manage your SSH key pairs. 33 | 34 | ![](assets/keyPair1.png) 35 | 36 | 4. To create a new SSH key pair, click the **Create Key Pair** button at the top of the browser window. 37 | 38 | ![](assets/keyPair2.png) 39 | 40 | 5. In the resulting pop up window, type **_[First Name]-[Last Name]-Reinvent_** into the **Key Pair Name:** text box and click **Create.** 41 | 42 | ![](assets/keyPair3.png) 43 | 44 | 6. The page will download the file **[Your-Name]-Reinvent.pem** to the local drive. Follow the browser instructions to save the file to the default download location. 45 | 46 | 7. Remember the full path to the file .pem file you just downloaded. You will use this Key Pair to manage your EC2 instances for the rest of the lab. 47 | 48 | ## Deploy Solution 49 | In this section we will deploy the solution using CloudFormation template. This CloudFormation template will create required resources for this solution including: 50 | 51 | - A VPC with IGW, two public subnets 52 | - Nginx proxy installed on a EC2 instance with an Elastic IP Address 53 | - Logstash installed on a EC2 instance with Elastic IP address 54 | - A S3 bucket in your region which stores a sample CloudFront access logs 55 | - EC2 IAM role with policies to access the Amazon S3 56 | - Amazon ES domain with 2 nodes with IP-based access policy with access restricted to only Nginx proxy and Logstash instances 57 | 58 | :warning: **Default limit of VPCs per AWS Region is 5. This CloudFormation template needs to create a VPC.** 59 | 60 | The template gives the following outputs: 61 | 62 | - Amazon ES domain and Kibana Endpoints. 63 | - Elastic IP details of Logstash and Nginx proxy servers 64 | - Nginx IP URLs for the Amazon ES Kibana through the proxy. You can use this to access the Kibana. 65 | 66 | 1. Click on **Launch Stack** button below to launch CloudFormation template in EU (Ireland) AWS region. 67 | 68 | Region| Launch 69 | ------|----- 70 | US East (Ohio) | [![Launch Who-is-Who Workshop in us-east-2](http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/images/cloudformation-launch-stack-button.png)](https://console.aws.amazon.com/cloudformation/home?region=us-east-2#/stacks/create/review?stackName=CF-LogAnalysis2018&templateURL=https://s3-eu-west-1.amazonaws.com/eu-west-1.data-analytics/labcontent/reInvent2018-ctd410/lab2/templates/CloudFront-Analysis-ELK-Lab.json) 71 | US West (Oregon) | [![Launch Who-is-Who Workshop in us-west-2](http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/images/cloudformation-launch-stack-button.png)](https://console.aws.amazon.com/cloudformation/home?region=us-west-2#/stacks/create/review?stackName=CF-LogAnalysis2018&templateURL=https://s3-eu-west-1.amazonaws.com/eu-west-1.data-analytics/labcontent/reInvent2018-ctd410/lab2/templates/CloudFront-Analysis-ELK-Lab.json) 72 | EU (Ireland) | [![Launch Who-is-Who Workshop in eu-west-1](http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/images/cloudformation-launch-stack-button.png)](https://console.aws.amazon.com/cloudformation/home?region=eu-west-1#/stacks/create/review?CF-LogAnalysis2018&templateURL=https://s3-eu-west-1.amazonaws.com/eu-west-1.data-analytics/labcontent/reInvent2018-ctd410/lab2/templates/CloudFront-Analysis-ELK-Lab.json) 73 | 74 | 2. Enter a unique name for your Stack in **Stack name** text box or you can use the default name **CF-LogAnalysis2018**. 75 | 76 | 3. Select the key pair you created in previous section. 77 | 78 | 4. Update **KibanaPassword** field. Default password is set to **admin123** but we highly recommend to update it to a strong password. 79 | 80 | 5. Under Create stack, check both checkboxes for **I acknowledge that AWS CloudFormation might create IAM resources with custom names** and click **Create** button. 81 | 82 | :warning: **Default limit of VPCs per AWS Region is 5. This CloudFormation template needs to create a VPC.** 83 | 84 | :warning: **We recommend that you restrict the access to the EC2 instances for your specific IP range in production environments. By default, this setup allows SSH and HTTP access to `0.0.0.0/0`** 85 | 86 | ![](assets/Cf1.png) 87 | 88 | 6. You should now see the screen with status **CREATE_IN_PROGRESS**. Click on the **Stacks** link in the top navigation to see current CloudFormation stacks. 89 | 90 | ![](assets/Cf2.png) 91 | 92 | 7. Click on the checkbox next to the stack to see additional details below. 93 | 94 | ![](assets/Cf3.png) 95 | 96 | 8. CloudFormation template will take around 10 minutes to complete. Wait until CloudFormation stack status changes to **CREATE_COMPLETE**. 97 | 98 | ![](assets/Cf4.png) 99 | 100 | 9. Click on "Output" tab and note down the outputs as we will be referring to these values in next steps. 101 | 102 | ![](assets/Cf5png.png) 103 | 104 | ## Verify Amazon Elasticsearch Domain access policy 105 | 1. Go to Amazon Elasticsearch(ES) console: https://console.aws.amazon.com/es 106 | 107 | 2. Click on the Elasticsearch domain CloudFormation Template has created. 108 | 109 | ![](assets/esDomain1.png) 110 | 111 | 3. Click on the button **Modify access policy** 112 | 113 | ![](assets/esDomain2.png) 114 | 115 | 4. Verify that the Elasticsearch domain access policy has a full access to this ES domain for the IP addresses of Logstash and Nginx proxy servers. You can verify the IP addresses of servers from Cloudformation output values as shown in the screenshot. 116 | 117 | ![](assets/esDomain3.png) 118 | 119 | ## Verify CloudFront access logs in S3 bucket 120 | As part of this lab, we copy CloudFront access logs in a S3 bucket created by the CloudFormation template. Before continuing with the rest of the lab, you need to make sure those log files are copied to your account. CloudFront access logs are compressed using gzip format. Refer to AWS documentation for [CloudFront access logs](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/AccessLogs.html) format and details. You can download sample log file to your laptop to inspect the contents. 121 | 122 | 1. Go to CloudFormation console : http://console.aws.amazon.com/cloudformation/ 123 | 124 | 2. Click the checkbox next to the stack you created. 125 | 126 | 3. Select **Resources** tab and look for **CFLogBucket** and click on the **Physical ID** for it to go to S3 bucket with log files. 127 | 128 | ![](assets/s3bucket1.png) 129 | 130 | 4. You should be able to see ***.gz** files in S3 bucket. This shows that CloudFront access logs were copied to S3 bucket and we can continue with the rest of the lab. 131 | 132 | ![](assets/s3bucket2.png) 133 | 134 | ## Logstash ingestion of CloudFront logs 135 | In this step we will configure Logstash agent installed on EC2 instance to ingest CloudFront logs we just verified in S3. Logstash provides built-in transformation and filtering for many log formats using grok filter plugins. In this step, we will also use plugins such as geoip for latitude and longitude and useragent to retrieve the user agent information from the access the logs. 136 | [Index mapping templates](https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-templates.html) allow you to define templates for mapping the appropriate data types with the fields contained in the logs as part of the index creations. In this lab, we will be creating index templates to map the request IP attribute to IP data type and geoip to map latitude and longitude information for creating geo-point data type. This will ensure right mapping of log fields as part of the index creation. 137 | 138 | 1. Go to [CloudFormation console](http://console.aws.amazon.com/cloudformation/) and copy the IP address for **LogstashEC2Instance** from **Outputs** tab. 139 | 140 | 2. You need to connect to the Logstash EC2 instance using SSH. Please make sure that you have configured your machine to SSH into EC2 instances. You can follow the [instructions here](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/AccessingInstances.html) to configure your Mac/Windows machine to connect to EC2 instance using SSH. Use the following command to connect to EC2 instance: 141 | 142 | `ssh -i ec2-user@` 143 | 144 | 3. Create an Index mapping template for processing the CloudFront logs. CloudFormation template has already copied the **indextemplate.json** in `/home/ec2-user/templates directory` . Make sure you copy the Elasticsearch domain from CloudFront output key **ESDomainEndpoint**. 145 | ```bash 146 | sudo su - 147 | 148 | curl -XPUT /_template/cloudfront-template -H "Content-Type: application/json" -d@/home/ec2-user/templates/indextemplate.json 149 | ``` 150 | 4. Run the following commands to configure Logstash to start log ingestion. 151 | ```bash 152 | # Run following commands to verify that installed Java version is 1.8.x 153 | 154 | cd /elk/logstash-6.4.2/bin/ 155 | 156 | java -version 157 | ``` 158 | 5. Copy the logstash configuration file **cloudfront.conf** from **/home/ec2-user/templates/** to **/elk/logstash-6.4.2/bin**. 159 | ```bash 160 | cp /home/ec2-user/templates/cloudfront.conf /elk/logstash-6.4.2/bin/ 161 | ``` 162 | 6. Logstash uses S3 input plugin for polling the logs continuously and write to Elasticsearch domain using **logstash-output-amazon_es** plugin. Edit **input -> s3** section in **cloudfront.conf** to update S3 bucket. 163 | ```nginx 164 | input{ 165 | s3{ 166 | #Enter S3 bucket name that has the CloudFront access logs. You can copy it from 167 | #CloudFormation stack output "CFLogBucket" 168 | bucket => "" 169 | 170 | #No change needed for "prefix" 171 | prefix => "" 172 | 173 | #Point "region" to your AWS Region. e.g. eu-west-1 174 | region => "" 175 | } 176 | } 177 | ``` 178 | 7. Edit **output-> amazon_es** section to update Elasticsearch domain information for your setup. 179 | 180 | :warning: Make sure the Elasticsearch domain is listed **WITHOUT** https:// in the following section. 181 | 182 | ```nginx 183 | output{ 184 | amazon_es{ 185 | #Enter Elasticsearch domain name WITHOUT https://. You can copy the Elasticsearch 186 | #domain from CloudFormation stach output "ESDomainEndpoint" 187 | hosts =>[""] 188 | 189 | #Point "region" to AWS Region you have created the CloudFormation stack in. e.g. eu-west-1 190 | region => "" 191 | } 192 | } 193 | ``` 194 | 8. Start Logstash process. Logstash will take about 8-10 minutes to index the logs to Amazon Elasticsearch. 195 | ```bash 196 | cd /elk/logstash-6.4.2/bin/ 197 | nohup ./logstash -f cloudfront.conf 198 | 199 | ``` 200 | 9. You can also verify if the Logstash process started properly by opening another SSH session and tailing the log file as shown below. 201 | **NOTE:** You will see some errors related to installing templates in logstash logs. This is due to a known issue (https://github.com/awslabs/logstash-output-amazon_es/issues/101 -link to github). You can ignore them. 202 | 203 | ```bash 204 | tail -f /elk/logstash-6.4.2/logs/logstash-plain.log 205 | ``` 206 | 207 | 10. Check if the Indexes are created on ES domain. Go to [Elasticsearch AWS Console](http://console.aws.amazon.com/es/). Click on the Elasticsearch domain that is created earlier. 208 | 209 | ![](assets/esIndices1.png) 210 | 211 | 11. CloudFront logs indices are created on day basis as shown below. 212 | 213 | ![](assets/esIndices2.png) 214 | 215 | ![](assets/esIndices3.png) 216 | 217 | You have successfully configured Logstash. Let us proceed to Nginx configuration. 218 | ## Nginx proxy configuration 219 | It should be noted that Kibana does not natively support IAM users and roles, but Amazon Elasticsearch offers several solutions for controlling access to Kibana. For more details, please refer to [AWS documentation](https://docs.aws.amazon.com/elasticsearch-service/latest/developerguide/es-kibana.html#es-kibana-access). In this lab, we will be using open source based Nginx proxy solution to access the Kibana console. 220 | 221 | 1. Go to [CloudFormation console](http://console.aws.amazon.com/cloudformation/) and copy the IP address for **NginxEC2Instance** from **Outputs** tab. 222 | 223 | 2. Connect to Nginx proxy EC2 instance as ec2-user using your key pair. 224 | ```bash 225 | ssh -i ec2-user@ 226 | ``` 227 | 228 | 3. Copy **lab2-nginx.conf** from **/home/ec2-user/templates/**. You will need to update the conf file with your Elasticsearch domain endpoints, Elasticsearch Kibana endpoint and Elastic IPs. 229 | ```bash 230 | sudo su - 231 | cd /etc/nginx 232 | 233 | mv nginx.conf nginx.conf-bkup 234 | 235 | cp /home/ec2-user/templates/lab2-nginx.conf /etc/nginx/nginx.conf 236 | ``` 237 | 4. Update following parameters in **nginx.conf** with correct values for Elasticsearch domain endpoint **(ESDomainEndpoint)**, Kibana endpoint **(ESKibanaEndpoint)** and Nginx EC2 IP **(NginxEC2Instance)**. You can get the values from CloudFormation 238 | ```nginx 239 | location / { 240 | 241 | # ES Domain name WITHOUT https:// 242 | proxy_set_header Host ; 243 | 244 | #IP of Nginx EC2 Instance 245 | proxy_set_header X-Real-IP ; 246 | 247 | #Elasticsearch Kibana endpoint 248 | proxy_pass https:///_plugin/kibana/; 249 | 250 | #Elasticsearch kibana endpoint and IP of Nginx EC2 Instance 251 | proxy_redirect https:///_plugin/kibana/ http://; 252 | ...... 253 | .......... 254 | } 255 | location ~ (/app/kibana|/app/timelion|/bundles|/es_admin|/plugins|/api|/ui|/elasticsearch) { 256 | ...... 257 | ........ 258 | #Elasticsearch Domain endpoint 259 | proxy_pass https://; 260 | } 261 | ``` 262 | 263 | 5. Restart the nginx server after updating the configurations. 264 | 265 | ```bash 266 | service nginx reload 267 | ``` 268 | 269 | Nginx configuration is completed. Next step will be to configure Kibana. 270 | 271 | ## Kibana Configuration 272 | 1. Access the Kibana via Nginx proxy IP address. For protection of your proxy server, we will leverage a basic Http authentication. You will be challenged with username and password. Enter the username as admin (lowercase) and password as specified in the parameter section of the CloudFormation template. If you have used default values, then the password is admin123. 273 | 274 | ![](assets/kibana1.png) 275 | 276 | 2. Kibana dashboard will load. 277 | 278 | ![](assets/kibana2.png) 279 | 280 | 3. Create the index pattern in Kibana. Go to **Management** section in Kibana. Click **Index Patterns**. 281 | 282 | ![](assets/kibana3.png) 283 | 284 | 4. Enter **cloudfront*** (lowercase) in Index pattern text box. 285 | 286 | ![](assets/kibana4.png) 287 | 288 | 5. Click **Next** and choose **@timestamp** as **Time Filter field name** and click **Create Index pattern** button. 289 | 290 | ![](assets/kibana5.png) 291 | 292 | 6. You can verify the indexes if it used the correct Index template for mapping . for example, if you browse through the fields, you will see there is a new field named geoip.location which is mapped as geo_point data type. 293 | 294 | ![](assets/kibana6.png) 295 | 296 | Now Kibana has been configured and let us move to final part of this lab where we will create visualizations. 297 | 298 | ## Kibana Visualization 299 | Now we are ready to create visualizations. You can create visualizations manually or import the predefined visualizations as JSON templates to your dashboard. We will go over both cases. 300 | 301 | ### Use Case #1 (User agent Vs Error code) 302 | This visualization will show if customers are experiencing errors and from which specific device types. 303 | 304 | 1. Go to Kibana dashboard. 305 | 306 | 2. Select **Visualize** from the left side menu and click on the **+** in visualize section. 307 | 308 | ![](assets/kibana7.png) 309 | 310 | 3. Select **Heat Map** under **Basic Charts** in **Select visualization type**. 311 | 312 | ![](assets/kibana8.png) 313 | 314 | 4. Select **cloudfront*** from **From a New Search, Select Index** section. 315 | 316 | ![](assets/kibana9.png) 317 | 318 | 5. Change the time for visualization from last 15 minutest to **Last 60 days** 319 | 320 | ![](assets/kibana10.png) 321 | 322 | 6. Select settings under **Bucket** as follows and then click **Apply changes** button (play button on top): 323 | 324 | | | **Aggregation**|**Field** | 325 | | ----------|:--------------:| :----- | 326 | | **X-Axis**| Terms | useragent.device.keyword | 327 | | **Y-Axis**| Terms | sc_status | 328 | 329 | 330 | ![](assets/kibana11.png) 331 | 332 | 7. You will see the graph/visualization. Save the visualization as **User agent-status-code-heatmap** 333 | 334 | ![](assets/kibana12.png) 335 | 336 | ### Use Case #2 (Avg or Max Latency per city) 337 | You can use Geo-spatial visualization using Co-ordinate map. We will show to how to import the visualization from predefined template. 338 | 339 | 1. Save [kibanamaxlatencypercity.json](https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/master/lab2-elk-cloudfront-log-analysis/kibanamaxlatencypercity.json) file to your local computer by either downloading or copy and pasting as JSON file. 340 | 341 | 2. Go to **Management** -> **Saved objects**. Click **Import** and import the downloaded **kibanamaxlatencypercity.json**. This visualization shows the max(time_taken) for each city. 342 | 343 | ![](assets/kibana13.png) 344 | 345 | 3. Click **Yes, overwrite all objects** if asked for **Automatically overwrite all saved objects** dialog box. 346 | 347 | 4. Click **Confirm all changes** for **Index Pattern Conflicts** dialog box. 348 | 349 | 5. You should now see following visualization under the **Visualization** tab. 350 | 351 | ![](assets/kibana14.png) 352 | 353 | 6. Click on **Visualization** from the Kibana dashboard menu and select **Max-Latency-percity** to see the visualization. 354 | 355 | ![](assets/kibana15.png) 356 | 357 | ![](assets/kibana16.png) 358 | 359 | ### Use Case #3 (Number of requests per geo-region or popular regions) 360 | In this case, we will create Geo-spatial visualization using regional map. This visualization shows the number of request distribution for each city. This kind of visualization can be used for analyzing the traffic pattern as well as marketing purposes. 361 | 362 | 1. Save [kibanageorequests.json](https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/master/lab2-elk-cloudfront-log-analysis/kibanageorequests.json) file to your local computer by either downloading or copy and pasting as JSON file. 363 | 364 | 2. Follow **Steps 2 - 6** from the Use Case #2 and import downloaded file (kibanageorequests.json) . 365 | 366 | 3. Once completed, you will be able to see the final visualization for number of requests per geo-region. 367 | 368 | ![](assets/kibana17.png) 369 | 370 | ![](assets/kibana18.png) 371 | 372 | ## Completion 373 | You have successfully this Lab. Please proceed with the clean up of this lab to make sure running resources do not incur unnecessary billing. 374 | 375 | ## Clean up 376 | 1. Delete the S3 buckets created in this lab in Step: [**Verify CloudFront Access Logs**](https://github.com/aws-samples/amazon-cloudfront-log-analysis/tree/master/lab2-elk-cloudfront-log-analysis#verify-cloudfront--access-logs-in-s3-bucket) 377 | 378 | 2. Go to CloudFormation console : http://console.aws.amazon.com/cloudformation/ 379 | 380 | 3. Click the checkbox next to the stack you created. 381 | 382 | 5. Click **Actions** button and select **Delete Stack** to delete the stack. 383 | 384 | ![](assets/cleanup1.png) 385 | 386 | -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/Cf1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/Cf1.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/Cf2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/Cf2.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/Cf3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/Cf3.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/Cf4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/Cf4.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/Cf5png.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/Cf5png.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/architecture.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/asset.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/asset.txt -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/cleanup1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/cleanup1.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/esDomain1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/esDomain1.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/esDomain2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/esDomain2.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/esDomain3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/esDomain3.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/esIndices1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/esIndices1.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/esIndices2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/esIndices2.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/esIndices3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/esIndices3.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/keyPair1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/keyPair1.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/keyPair2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/keyPair2.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/keyPair3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/keyPair3.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/kibana1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana1.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/kibana10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana10.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/kibana11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana11.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/kibana12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana12.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/kibana13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana13.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/kibana14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana14.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/kibana15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana15.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/kibana16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana16.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/kibana17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana17.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/kibana18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana18.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/kibana2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana2.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/kibana3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana3.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/kibana4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana4.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/kibana5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana5.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/kibana6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana6.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/kibana7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana7.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/kibana8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana8.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/kibana9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana9.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/s3bucket1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/s3bucket1.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/assets/s3bucket2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/s3bucket2.png -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/config/CloudFront-Analysis-ELK-Lab.json: -------------------------------------------------------------------------------- 1 | { 2 | "AWSTemplateFormatVersion": "2010-09-09", 3 | "Description": "CloudFormation template for creating ELK stack for CloudFront Log Analysis Lab.You will be billed for the AWS resources used if you create a stack from this template", 4 | "Parameters": { 5 | "EC2KeyPair": { 6 | "Description": "Amazon EC2 Key Pair", 7 | "Type": "AWS::EC2::KeyPair::KeyName" 8 | }, 9 | "VpcCIDR": { 10 | "Description": "Please enter the IP range (CIDR notation) for this VPC", 11 | "Type": "String", 12 | "Default": "10.192.0.0/16", 13 | "AllowedPattern": "(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})/(\\d{1,2})" 14 | }, 15 | "PublicSubnet1CIDR": { 16 | "Description": "Please enter the IP range (CIDR notation) for the public subnet in the first Availability Zone", 17 | "Type": "String", 18 | "Default": "10.192.10.0/24", 19 | "AllowedPattern": "(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})/(\\d{1,2})" 20 | }, 21 | "PublicSubnet2CIDR": { 22 | "Description": "Please enter the IP range (CIDR notation) for the public subnet in the second Availability Zone", 23 | "Type": "String", 24 | "Default": "10.192.11.0/24", 25 | "AllowedPattern": "(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})/(\\d{1,2})" 26 | }, 27 | "LogstashInstanceType": { 28 | "Type": "String", 29 | "Description": "Amazon EC2 instance type for the Logstash Instance", 30 | "Default": "t3.medium", 31 | "AllowedValues": [ 32 | "t3.medium", 33 | "t3.large", 34 | "m5.large", 35 | "m4.large" 36 | ] 37 | }, 38 | "NginxInstanceType": { 39 | "Type": "String", 40 | "Description": "Amazon EC2 instance type for the Nginx proxy Instance", 41 | "Default": "t3.medium", 42 | "AllowedValues": [ 43 | "t3.medium", 44 | "t3.large", 45 | "m5.large", 46 | "m4.large" 47 | ] 48 | }, 49 | "ESDomainDataInstanceType": { 50 | "Type": "String", 51 | "Description": "Instance Type for the Elasticsearch Domain", 52 | "Default": "m4.large.elasticsearch", 53 | "AllowedValues": [ 54 | "m4.large.elasticsearch", 55 | "m4.medium.elasticsearch", 56 | "c4.large.elasticsearch", 57 | "r4.large.elasticsearch" 58 | ] 59 | }, 60 | "KibanaPassword": { 61 | "Default": "admin123", 62 | "NoEcho": "true", 63 | "Description": "Enter password for Kibana user: admin", 64 | "Type": "String", 65 | "MinLength": "8", 66 | "MaxLength": "41", 67 | "AllowedPattern": "[a-zA-Z0-9]*", 68 | "ConstraintDescription": "must contain only alphanumeric characters with minimum of 8 characters." 69 | }, 70 | "ClientIP": { 71 | "Description": "The IP address range that can be used to connect to the RDS instances from your local machine.It must be a valid IP CIDR range of the form x.x.x.x/x.Pls get your address using checkip.amazonaws.com or whatsmyip.org", 72 | "Type": "String", 73 | "MinLength": "9", 74 | "MaxLength": "18", 75 | "Default": "0.0.0.0/0", 76 | "AllowedPattern": "(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})/(\\d{1,2})", 77 | "ConstraintDescription": "It must be a valid IP CIDR range of the form x.x.x.x/x. Suggest to enable access to your IP address only. Pls get your address using checkip.amazonaws.com or whatsmyip.org." 78 | } 79 | }, 80 | "Conditions": { 81 | "AttachKeyPair": { 82 | "Fn::Not": [ 83 | { 84 | "Fn::Equals": [ 85 | { 86 | "Ref": "EC2KeyPair" 87 | }, 88 | "None" 89 | ] 90 | } 91 | ] 92 | } 93 | }, 94 | "Metadata": { 95 | "AWS::CloudFormation::Interface": { 96 | "ParameterGroups": [ 97 | { 98 | "Label": { 99 | "default": "VPC Configurations" 100 | }, 101 | "Parameters": [ 102 | "EC2KeyPair", 103 | "VpcCIDR", 104 | "PublicSubnet1CIDR", 105 | "PublicSubnet2CIDR" 106 | ] 107 | }, 108 | { 109 | "Label": { 110 | "default": " ELK Instance Configurations" 111 | }, 112 | "Parameters": [ 113 | "LogstashInstanceType", 114 | "NginxInstanceType", 115 | "ESDomainDataInstanceType", 116 | "KibanaPassword" 117 | ] 118 | }, 119 | { 120 | "Label": { 121 | "default": "Enter IP address for the Security group Configuration" 122 | }, 123 | "Parameters": [ 124 | "ClientIP" 125 | ] 126 | } 127 | ] 128 | } 129 | }, 130 | "Mappings": { 131 | "RegionMap": { 132 | "us-east-1": { 133 | "AMI": "ami-0ff8a91507f77f867" 134 | }, 135 | "us-east-2": { 136 | "AMI": "ami-0b59bfac6be064b78" 137 | }, 138 | "us-west-1": { 139 | "AMI": "ami-0bdb828fd58c52235" 140 | }, 141 | "us-west-2": { 142 | "AMI": "ami-a0cfeed8" 143 | }, 144 | "eu-west-1": { 145 | "AMI": "ami-047bb4163c506cd98" 146 | }, 147 | "sa-east-1": { 148 | "AMI": "ami-07b14488da8ea02a0" 149 | }, 150 | "ap-southeast-1": { 151 | "AMI": "ami-08569b978cc4dfa10" 152 | }, 153 | "ap-southeast-2": { 154 | "AMI": "ami-09b42976632b27e9b" 155 | }, 156 | "ap-northeast-1": { 157 | "AMI": "ami-06cd52961ce9f0d85" 158 | } 159 | } 160 | }, 161 | "Resources": { 162 | "VPC": { 163 | "Type": "AWS::EC2::VPC", 164 | "Properties": { 165 | "CidrBlock": { 166 | "Ref": "VpcCIDR" 167 | }, 168 | "Tags": [ 169 | { 170 | "Key": "Name", 171 | "Value": { 172 | "Ref": "AWS::StackName" 173 | } 174 | } 175 | ] 176 | } 177 | }, 178 | "InternetGateway": { 179 | "Type": "AWS::EC2::InternetGateway", 180 | "Properties": { 181 | "Tags": [ 182 | { 183 | "Key": "Name", 184 | "Value": { 185 | "Ref": "AWS::StackName" 186 | } 187 | } 188 | ] 189 | } 190 | }, 191 | "InternetGatewayAttachment": { 192 | "Type": "AWS::EC2::VPCGatewayAttachment", 193 | "Properties": { 194 | "InternetGatewayId": { 195 | "Ref": "InternetGateway" 196 | }, 197 | "VpcId": { 198 | "Ref": "VPC" 199 | } 200 | } 201 | }, 202 | "PublicSubnet1": { 203 | "Type": "AWS::EC2::Subnet", 204 | "Properties": { 205 | "VpcId": { 206 | "Ref": "VPC" 207 | }, 208 | "AvailabilityZone": { 209 | "Fn::Select": [ 210 | "0", 211 | { 212 | "Fn::GetAZs": "" 213 | } 214 | ] 215 | }, 216 | "CidrBlock": { 217 | "Ref": "PublicSubnet1CIDR" 218 | }, 219 | "MapPublicIpOnLaunch": true, 220 | "Tags": [ 221 | { 222 | "Key": "Name", 223 | "Value": { 224 | "Fn::Sub": "${AWS::StackName} Public Subnet (AZ1)" 225 | } 226 | } 227 | ] 228 | } 229 | }, 230 | "PublicSubnet2": { 231 | "Type": "AWS::EC2::Subnet", 232 | "Properties": { 233 | "VpcId": { 234 | "Ref": "VPC" 235 | }, 236 | "AvailabilityZone": { 237 | "Fn::Select": [ 238 | "1", 239 | { 240 | "Fn::GetAZs": "" 241 | } 242 | ] 243 | }, 244 | "CidrBlock": { 245 | "Ref": "PublicSubnet2CIDR" 246 | }, 247 | "MapPublicIpOnLaunch": true, 248 | "Tags": [ 249 | { 250 | "Key": "Name", 251 | "Value": { 252 | "Fn::Sub": "${AWS::StackName} Public Subnet (AZ2)" 253 | } 254 | } 255 | ] 256 | } 257 | }, 258 | "PublicRouteTable": { 259 | "Type": "AWS::EC2::RouteTable", 260 | "Properties": { 261 | "VpcId": { 262 | "Ref": "VPC" 263 | }, 264 | "Tags": [ 265 | { 266 | "Key": "Name", 267 | "Value": { 268 | "Fn::Sub": "${AWS::StackName} Public Routes" 269 | } 270 | } 271 | ] 272 | } 273 | }, 274 | "DefaultPublicRoute": { 275 | "Type": "AWS::EC2::Route", 276 | "DependsOn": "InternetGatewayAttachment", 277 | "Properties": { 278 | "RouteTableId": { 279 | "Ref": "PublicRouteTable" 280 | }, 281 | "DestinationCidrBlock": "0.0.0.0/0", 282 | "GatewayId": { 283 | "Ref": "InternetGateway" 284 | } 285 | } 286 | }, 287 | "PublicSubnet1RouteTableAssociation": { 288 | "Type": "AWS::EC2::SubnetRouteTableAssociation", 289 | "Properties": { 290 | "RouteTableId": { 291 | "Ref": "PublicRouteTable" 292 | }, 293 | "SubnetId": { 294 | "Ref": "PublicSubnet1" 295 | } 296 | } 297 | }, 298 | "PublicSubnet2RouteTableAssociation": { 299 | "Type": "AWS::EC2::SubnetRouteTableAssociation", 300 | "Properties": { 301 | "RouteTableId": { 302 | "Ref": "PublicRouteTable" 303 | }, 304 | "SubnetId": { 305 | "Ref": "PublicSubnet2" 306 | } 307 | } 308 | }, 309 | "SGCFLabPublicEC2Access": { 310 | "Type": "AWS::EC2::SecurityGroup", 311 | "Properties": { 312 | "GroupDescription": "Security group for EC2 SSH and Proxy access", 313 | "SecurityGroupIngress": [ 314 | { 315 | "IpProtocol": "tcp", 316 | "FromPort": "22", 317 | "ToPort": "22", 318 | "CidrIp": { 319 | "Ref": "ClientIP" 320 | } 321 | }, 322 | { 323 | "IpProtocol": "tcp", 324 | "FromPort": "80", 325 | "ToPort": "80", 326 | "CidrIp": { 327 | "Ref": "ClientIP" 328 | } 329 | } 330 | ], 331 | "VpcId": { 332 | "Ref": "VPC" 333 | } 334 | } 335 | }, 336 | "EC2InstanceRole": { 337 | "Type": "AWS::IAM::Role", 338 | "Properties": { 339 | "ManagedPolicyArns": [ 340 | "arn:aws:iam::aws:policy/AmazonS3FullAccess" 341 | ], 342 | "AssumeRolePolicyDocument": { 343 | "Version": "2012-10-17", 344 | "Statement": [ 345 | { 346 | "Sid": "", 347 | "Effect": "Allow", 348 | "Principal": { 349 | "Service": [ 350 | "ec2.amazonaws.com" 351 | ] 352 | }, 353 | "Action": [ 354 | "sts:AssumeRole" 355 | ] 356 | } 357 | ] 358 | }, 359 | "Path": "/" 360 | } 361 | }, 362 | "IAMlogstashInstanceProfile": { 363 | "Type": "AWS::IAM::InstanceProfile", 364 | "Properties": { 365 | "Path": "/", 366 | "Roles": [ 367 | { 368 | "Ref": "EC2InstanceRole" 369 | } 370 | ] 371 | } 372 | }, 373 | "CFLogBucket": { 374 | "Type": "AWS::S3::Bucket" 375 | }, 376 | "LogstashEC2Instance": { 377 | "Type": "AWS::EC2::Instance", 378 | "DependsOn": "CFLogBucket", 379 | "Properties": { 380 | "ImageId": { 381 | "Fn::FindInMap": [ 382 | "RegionMap", 383 | { 384 | "Ref": "AWS::Region" 385 | }, 386 | "AMI" 387 | ] 388 | }, 389 | "InstanceType": { 390 | "Ref": "LogstashInstanceType" 391 | }, 392 | "SecurityGroupIds": [ 393 | { 394 | "Ref": "SGCFLabPublicEC2Access" 395 | } 396 | ], 397 | "KeyName": { 398 | "Ref": "EC2KeyPair" 399 | }, 400 | "IamInstanceProfile": { 401 | "Ref": "IAMlogstashInstanceProfile" 402 | }, 403 | "SubnetId": { 404 | "Ref": "PublicSubnet1" 405 | }, 406 | "Tags": [ 407 | { 408 | "Key": "Name", 409 | "Value": "Logstash EC2 server" 410 | } 411 | ], 412 | "UserData": { 413 | "Fn::Base64": { 414 | "Fn::Sub": "#!/bin/bash\necho \"${CFLogBucket}\" > /home/ec2-user/s3bucket.txt\naws s3 sync s3://eu-west-1.data-analytics/cflogworkshop/raw/cf-accesslogs s3://${CFLogBucket}\nmkdir /home/ec2-user/templates/\naws s3 sync s3://us-east-1.data-analytics/labcontent/reInvent2018content-ctd410/lab2/templates/ /home/ec2-user/templates/\nsudo mkdir /elk\ncd /elk\nsudo wget https://artifacts.elastic.co/downloads/logstash/logstash-6.4.2.tar.gz /elk/\nsudo gunzip logstash-6.4.2.tar.gz\nsudo tar -xvf logstash-6.4.2.tar\nsudo /elk/logstash-6.4.2/bin/logstash-plugin install logstash-output-amazon_es\nsudo sudo yum -y install java-1.8.*\ntmp_javapath=`alternatives --display java|grep priority|grep 1.8|awk '{print $1}'`\necho $tmp_javapath > /home/ec2-user/javapath.txt\nsudo alternatives --set java `cat /home/ec2-user/javapath.txt`\n" 415 | } 416 | } 417 | } 418 | }, 419 | "LogstashElasticIP": { 420 | "Type": "AWS::EC2::EIP", 421 | "DependsOn": "LogstashEC2Instance", 422 | "Properties": { 423 | "InstanceId": { 424 | "Ref": "LogstashEC2Instance" 425 | }, 426 | "Domain": "vpc" 427 | } 428 | }, 429 | "NginxEC2Instance": { 430 | "Type": "AWS::EC2::Instance", 431 | "Properties": { 432 | "ImageId": { 433 | "Fn::FindInMap": [ 434 | "RegionMap", 435 | { 436 | "Ref": "AWS::Region" 437 | }, 438 | "AMI" 439 | ] 440 | }, 441 | "InstanceType": { 442 | "Ref": "NginxInstanceType" 443 | }, 444 | "SecurityGroupIds": [ 445 | { 446 | "Ref": "SGCFLabPublicEC2Access" 447 | } 448 | ], 449 | "KeyName": { 450 | "Ref": "EC2KeyPair" 451 | }, 452 | "SubnetId": { 453 | "Ref": "PublicSubnet2" 454 | }, 455 | "Tags": [ 456 | { 457 | "Key": "Name", 458 | "Value": "Nginx Proxy EC2 server" 459 | } 460 | ], 461 | "UserData": { 462 | "Fn::Base64": { 463 | "Fn::Sub": "#!/bin/bash\nmkdir /home/ec2-user/templates/\n cd /home/ec2-user/templates/\n wget https://s3-eu-west-1.amazonaws.com/eu-west-1.data-analytics/labcontent/reInvent2018-ctd410/lab2/templates/lab2-nginx.conf\nsudo yum -y install nginx\nsudo yum -y install httpd-tools\nsudo service nginx start\nsudo htpasswd -b -c /etc/nginx/.secrets_kibana admin ${KibanaPassword}\n" 464 | } 465 | } 466 | } 467 | }, 468 | "NginxElasticIP": { 469 | "Type": "AWS::EC2::EIP", 470 | "DependsOn": "NginxEC2Instance", 471 | "Properties": { 472 | "InstanceId": { 473 | "Ref": "NginxEC2Instance" 474 | }, 475 | "Domain": "vpc" 476 | } 477 | }, 478 | "ElasticsearchDomain": { 479 | "Type": "AWS::Elasticsearch::Domain", 480 | "DependsOn": "LogstashElasticIP", 481 | "DependsOn": "NginxElasticIP", 482 | "Properties": { 483 | "ElasticsearchVersion": "6.3", 484 | "ElasticsearchClusterConfig": { 485 | "InstanceCount": "2", 486 | "ZoneAwarenessEnabled": "true", 487 | "InstanceType": { 488 | "Ref": "ESDomainDataInstanceType" 489 | } 490 | }, 491 | "EBSOptions": { 492 | "EBSEnabled": true, 493 | "Iops": 0, 494 | "VolumeSize": 50, 495 | "VolumeType": "gp2" 496 | }, 497 | "SnapshotOptions": { 498 | "AutomatedSnapshotStartHour": "0" 499 | }, 500 | "Tags": [ 501 | { 502 | "Key": "Name", 503 | "Value": { 504 | "Fn::Sub": "${AWS::StackName} ES Domain" 505 | } 506 | } 507 | ], 508 | "AccessPolicies": { 509 | "Version": "2012-10-17", 510 | "Statement": [ 511 | { 512 | "Effect": "Allow", 513 | "Principal": { 514 | "AWS": "*" 515 | }, 516 | "Action": "es:*", 517 | "Resource": { 518 | "Fn::Sub": "arn:aws:es:${AWS::Region}:${AWS::AccountId}:domain/*" 519 | }, 520 | "Condition": { 521 | "IpAddress": { 522 | "aws:SourceIp": [ 523 | { 524 | "Ref": "LogstashElasticIP" 525 | }, 526 | { 527 | "Ref": "NginxElasticIP" 528 | } 529 | ] 530 | } 531 | } 532 | } 533 | ] 534 | }, 535 | "AdvancedOptions": { 536 | "rest.action.multi.allow_explicit_index": "true" 537 | } 538 | } 539 | } 540 | }, 541 | "Outputs": { 542 | 543 | "ESDomainEndpoint": { 544 | "Description": "ElasticSearch Domain Endpoint", 545 | "Value": { 546 | "Fn::Join": [ 547 | "", 548 | ["https://", 549 | { 550 | "Fn::GetAtt": [ 551 | "ElasticsearchDomain", 552 | "DomainEndpoint" 553 | ] 554 | } 555 | ] 556 | ] 557 | } 558 | 559 | }, 560 | 561 | "ESKibanaEndpoint": { 562 | "Description": "ElasticSearch Kibana Endpoint", 563 | "Value": { 564 | "Fn::Join": [ 565 | "", 566 | ["https://", 567 | { 568 | "Fn::GetAtt": [ 569 | "ElasticsearchDomain", 570 | "DomainEndpoint" 571 | ] 572 | }, 573 | "/_plugin/kibana/" 574 | ] 575 | ] 576 | } 577 | 578 | 579 | }, 580 | "S3bucketName": { 581 | "Description": "S3 bucket to store CloudFront access logs", 582 | "Value": { 583 | "Ref": "CFLogBucket" 584 | } 585 | }, 586 | "LogstashEC2Instance": { 587 | "Description": "IP for SSH access to Logstash server", 588 | "Value": { 589 | "Ref": "LogstashElasticIP" 590 | } 591 | }, 592 | "NginxEC2Instance": { 593 | "Description": "IP for SSH access to Nginx proxy server", 594 | "Value": { 595 | "Ref": "NginxElasticIP" 596 | } 597 | }, 598 | "ESKibanaProxyEndpoint": { 599 | "Description": "Kibana Proxy Endpoint", 600 | "Value": { 601 | "Fn::Join": [ 602 | "", 603 | [ 604 | "http://", 605 | { 606 | "Ref": "NginxElasticIP" 607 | } 608 | ] 609 | ] 610 | } 611 | } 612 | } 613 | } 614 | -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/config/cloudfront.conf: -------------------------------------------------------------------------------- 1 | #cloudfront.conf 2 | input { 3 | s3 { 4 | #Enter S3 bucket name that has the CloudFront access logs. You can copy it from CloudFormation stack output "CFLogBucket" 5 | bucket => "" 6 | #No change needed for "prefix" 7 | prefix => "" 8 | #Point "region" to your AWS Region. 9 | region => "" 10 | } 11 | } 12 | 13 | 14 | filter { 15 | grok { 16 | match => { "message" => "%{DATE_EU:date}\t%{TIME:time}\t%{GREEDYDATA:x_edge_location}\t(?:%{NUMBER:sc_bytes:int}|-)\t%{IPORHOST:c_ip}\t%{WORD:cs_method}\t%{HOSTNAME:cs_host}\t%{NOTSPACE:cs_uri_stem}\t%{NUMBER:sc_status:int}\t%{GREEDYDATA:referrer}\t%{GREEDYDATA:User_Agent}\t%{GREEDYDATA:cs-uri-query}\t%{GREEDYDATA:cookies}\t%{WORD:x_edge_result_type}\t%{NOTSPACE:x_edge_request_id}\t%{HOSTNAME:x_host_header}\t%{URIPROTO:cs_protocol}\t%{INT:cs_bytes:int}\t%{NUMBER:time_taken:float}\t%{GREEDYDATA:x_forwarded_for}\t%{GREEDYDATA:ssl_protocol}\t%{GREEDYDATA:ssl_cipher}\t%{GREEDYDATA:x_edge_response_result_type}\t%{GREEDYDATA:cs-protocol-version}\t%{GREEDYDATA:fle-status}\t%{GREEDYDATA:fle-encrypted-fields}" } 17 | } 18 | 19 | mutate { 20 | add_field => [ "listener_timestamp", "%{date} %{time}" ] 21 | } 22 | 23 | date { 24 | match => [ "listener_timestamp", "yy-MM-dd HH:mm:ss" ] 25 | target => "@timestamp" 26 | } 27 | 28 | geoip { 29 | source => "c_ip" 30 | } 31 | 32 | useragent { 33 | source => "User_Agent" 34 | target => "useragent" 35 | } 36 | 37 | mutate { 38 | remove_field => ["date", "time", "listener_timestamp", "cloudfront_version", "message", "cloudfront_fields", "User_Agent"] 39 | } 40 | } 41 | 42 | output { 43 | amazon_es { 44 | #Enter Elasticsearch domain name WITHOUT https://. You can copy the Elasticsearch domain from CloudFormation stach output "ESDomainEndpoint" 45 | hosts => [""] 46 | #Point "region" to AWS Region you have created the CloudFormation stack in 47 | region => "" 48 | index => "cloudfront-logs-%{+YYYY.MM.dd}" 49 | } 50 | } 51 | 52 | -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/config/indextemplate.json: -------------------------------------------------------------------------------- 1 | { 2 | "index_patterns": ["cloudfront*"], 3 | "settings": { 4 | "number_of_shards": 2 }, 5 | 6 | "mappings" : { 7 | "doc" : { 8 | "dynamic_templates" : [ { 9 | "string_fields" : { 10 | "match" : "*", 11 | "match_mapping_type" : "string", 12 | "mapping" : { 13 | "type" : "text", "norms" : false, 14 | "fields" : { 15 | "keyword" : { "type": "keyword", "ignore_above": 256 } 16 | } 17 | } 18 | } 19 | } ], 20 | "properties" : { 21 | "@timestamp": { "type": "date"}, 22 | "@version": { "type": "keyword"}, 23 | "geoip" : { 24 | "dynamic": true, 25 | "properties" : { 26 | "ip": { "type": "ip" }, 27 | "location" : { "type" : "geo_point" }, 28 | "latitude" : { "type" : "half_float" }, 29 | "longitude" : { "type" : "half_float" } 30 | } 31 | } 32 | } 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/config/lab2-nginx.conf: -------------------------------------------------------------------------------- 1 | events { 2 | worker_connections 1024; 3 | } 4 | 5 | http { 6 | 7 | 8 | server { 9 | listen 80; 10 | # error logging 11 | error_log /var/log/nginx/elasticsearch_error.log; 12 | 13 | # authentication: kibana 14 | auth_basic "Kibana Auth"; 15 | auth_basic_user_file /etc/nginx/.secrets_kibana; 16 | 17 | 18 | location / { 19 | # ES Domain name WITHOUT https:// 20 | proxy_set_header Host ; 21 | #IP of Nginx EC2 Instance 22 | proxy_set_header X-Real-IP ; 23 | proxy_buffer_size 128k; 24 | proxy_buffers 4 256k; 25 | proxy_busy_buffers_size 256k; 26 | proxy_set_header Connection "Keep-Alive"; 27 | proxy_set_header Proxy-Connection "Keep-Alive"; 28 | proxy_http_version 1.1; 29 | proxy_set_header Authorization ""; 30 | #Elasticsearch Kibana endpoint 31 | proxy_pass https:///_plugin/kibana/; 32 | #Elasticsearch Kibana endpoint and IP of Nginx EC2 Instance 33 | proxy_redirect https:///_plugin/kibana/ http://; 34 | } 35 | 36 | location ~ (/app/kibana|/app/timelion|/bundles|/es_admin|/plugins|/api|/ui|/elasticsearch) { 37 | #Elasticsearch Domain endpoint 38 | proxy_pass https://; 39 | proxy_set_header Host $host; 40 | proxy_set_header X-Real-IP $remote_addr; 41 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 42 | proxy_set_header X-Forwarded-Proto $scheme; 43 | proxy_set_header X-Forwarded-Host $http_host; 44 | proxy_set_header Authorization ""; 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/kibanageorequests.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "_id": "595e9c70-d305-11e8-aa33-3358ecf94586", 4 | "_type": "visualization", 5 | "_source": { 6 | "title": "Geo-requests-distribution", 7 | "visState": "{\"title\":\"Geo-requests-distribution\",\"type\":\"region_map\",\"params\":{\"legendPosition\":\"bottomright\",\"addTooltip\":true,\"colorSchema\":\"Yellow to Red\",\"selectedLayer\":{\"attribution\":\"

Made with NaturalEarth|Elastic Maps Service

\",\"weight\":1,\"name\":\"World Countries\",\"url\":\"https://vector.maps.elastic.co/blob/5659313586569216?elastic_tile_service_tos=agree&my_app_version=6.3.1\",\"format\":{\"type\":\"geojson\"},\"fields\":[{\"name\":\"iso2\",\"description\":\"Two letter abbreviation\"},{\"name\":\"name\",\"description\":\"Country name\"},{\"name\":\"iso3\",\"description\":\"Three letter abbreviation\"}],\"created_at\":\"2017-04-26T17:12:15.978370\",\"tags\":[],\"id\":5659313586569216,\"layerId\":\"elastic_maps_service.World Countries\"},\"selectedJoinField\":{\"name\":\"name\",\"description\":\"Country name\"},\"isDisplayWarning\":true,\"wms\":{\"enabled\":true,\"options\":{\"format\":\"image/png\",\"transparent\":true},\"baseLayersAreLoaded\":{},\"tmsLayers\":[{\"minZoom\":0,\"maxZoom\":10,\"attribution\":\"\",\"url\":\"https://example.com/v1/default/{z}/{x}/{y}.png\",\"id\":\"TMS in config/kibana.yml\"},{\"id\":\"road_map\",\"url\":\"https://tiles.maps.elastic.co/v2/default/{z}/{x}/{y}.png?elastic_tile_service_tos=agree&my_app_name=kibana&my_app_version=6.3.1\",\"minZoom\":0,\"maxZoom\":10,\"attribution\":\"

© OpenStreetMap contributors | Elastic Maps Service

\",\"subdomains\":[]}],\"selectedTmsLayer\":{\"minZoom\":0,\"maxZoom\":10,\"attribution\":\"\",\"url\":\"https://example.com/v1/default/{z}/{x}/{y}.png\",\"id\":\"TMS in config/kibana.yml\"}},\"mapZoom\":2,\"mapCenter\":[0,0],\"outlineWeight\":1,\"showAllShapes\":true},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{\"customLabel\":\"requests-distribution\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"geoip.country_name.keyword\",\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\",\"customLabel\":\"Geo-requests-Distribution\"}}]}", 8 | "uiStateJSON": "{\"mapZoom\":2,\"mapCenter\":[6.926426847059551,15.292968750000002]}", 9 | "description": "", 10 | "version": 1, 11 | "kibanaSavedObjectMeta": { 12 | "searchSourceJSON": "{\"index\":\"3b754c00-d2fa-11e8-9179-51a584345c01\",\"filter\":[],\"query\":{\"query\":\"\",\"language\":\"kuery\"}}" 13 | } 14 | } 15 | } 16 | ] -------------------------------------------------------------------------------- /lab2-elk-cloudfront-log-analysis/kibanamaxlatencypercity.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "_id": "63df0410-d304-11e8-aa33-3358ecf94586", 4 | "_type": "visualization", 5 | "_source": { 6 | "title": "Max-Latency-percity", 7 | "visState": "{\"title\":\"Max-Latency-percity\",\"type\":\"tile_map\",\"params\":{\"mapType\":\"Scaled Circle Markers\",\"isDesaturated\":true,\"addTooltip\":true,\"heatClusterSize\":1.5,\"legendPosition\":\"bottomright\",\"mapZoom\":2,\"mapCenter\":[0,0],\"wms\":{\"enabled\":true,\"options\":{\"format\":\"image/png\",\"transparent\":true,\"layers\":\"http://ows-tile.terrestris.de/osm-basemap/service?\"},\"baseLayersAreLoaded\":{},\"tmsLayers\":[{\"minZoom\":0,\"maxZoom\":10,\"attribution\":\"\",\"url\":\"https://example.com/v1/default/{z}/{x}/{y}.png\",\"id\":\"TMS in config/kibana.yml\"},{\"id\":\"road_map\",\"url\":\"https://tiles.maps.elastic.co/v2/default/{z}/{x}/{y}.png?elastic_tile_service_tos=agree&my_app_name=kibana&my_app_version=6.3.1\",\"minZoom\":0,\"maxZoom\":10,\"attribution\":\"

© OpenStreetMap contributors | Elastic Maps Service

\",\"subdomains\":[]}],\"selectedTmsLayer\":{\"minZoom\":0,\"maxZoom\":10,\"attribution\":\"\",\"url\":\"https://example.com/v1/default/{z}/{x}/{y}.png\",\"id\":\"TMS in config/kibana.yml\"},\"url\":\"https://maps.wikimedia.org/osm-intl/{z}/{x}/{y}.png\"}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"max\",\"schema\":\"metric\",\"params\":{\"field\":\"time_taken\",\"customLabel\":\"Maximum-Last-Byte-Latency\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"geohash_grid\",\"schema\":\"segment\",\"params\":{\"field\":\"geoip.location\",\"autoPrecision\":true,\"isFilteredByCollar\":true,\"useGeocentroid\":true,\"precision\":2,\"customLabel\":\"City\"}}]}", 8 | "uiStateJSON": "{}", 9 | "description": "", 10 | "version": 1, 11 | "kibanaSavedObjectMeta": { 12 | "searchSourceJSON": "{\"index\":\"3b754c00-d2fa-11e8-9179-51a584345c01\",\"filter\":[],\"query\":{\"query\":\"\",\"language\":\"kuery\"}}" 13 | } 14 | } 15 | } 16 | ] --------------------------------------------------------------------------------