├── .github
└── PULL_REQUEST_TEMPLATE.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── lab1-serveless-cloudfront-log-analysis
├── README.md
├── assets
│ ├── alb-access-optimized.png
│ ├── amazon-s3-create-bucket.png
│ ├── amazon-s3.png
│ ├── architecture-diagram.png
│ ├── architecture-overview-all.png
│ ├── assets.txt
│ ├── athena-database.png
│ ├── athena-table.png
│ ├── cf-access-optimized.png
│ ├── combine-schema.png
│ ├── combined-logs-all.png
│ ├── device-form-factor-chart.png
│ ├── device-form-factor-visualize-2.png
│ ├── device-form-factor-visualize.png
│ ├── edge-to-origin-chart.png
│ ├── edge-to-origin-filter-summary.png
│ ├── edge-to-origin-filter.png
│ ├── edge-to-origin-visualize.png
│ ├── edge-to-origin-x-axis.png
│ ├── glue-job-complete.png
│ ├── lambda-edge.png
│ ├── le-combined-logs.png
│ ├── log-collection.png
│ ├── origin-request-optimized.png
│ ├── product-category-chart.png
│ ├── product-category-filter.png
│ ├── product-category-sort.png
│ ├── quicksight-account-create.png
│ ├── quicksight-athena-ds.png
│ ├── quicksight-datasource.png
│ ├── quicksight-edition.png
│ ├── quicksight-manage.png
│ ├── quicksight-new-field.png
│ ├── quicksight-permission.png
│ ├── quicksight-region-selection.png
│ ├── quicksight-s3-bucket-selection.png
│ ├── quicksight-signup.png
│ ├── quicksight-status-code-filter-summary.png
│ ├── quicksight-status-code-pop-filter.png
│ ├── quicksight-status-code-pop.png
│ ├── quicksight-status-code-visualize-1.png
│ ├── quicksight-status-code-visualize-2.png
│ ├── quicksight-table-selection.png
│ ├── quicksight-visualization-all.png
│ ├── time-taken-chart.png
│ ├── time-taken-filter-summary.png
│ ├── time-taken-visualize-2.png
│ ├── time-taken-visualize.png
│ └── viewer-request-optimized.png
├── lelogconverter.py
├── log-combiner-glue-script.py
├── originRequest-Lambda
│ └── index.js
├── sample-logs
│ └── raw-logs
│ │ ├── sample-alb-logs.gz
│ │ ├── sample-cloudfront-access-logs.gz
│ │ ├── sample-lambda-at-edge-origin-request-logs.gz
│ │ └── sample-lambda-at-edge-viewer-request.gz
└── viewerRequest-Lambda
│ └── index.js
└── lab2-elk-cloudfront-log-analysis
├── README.md
├── assets
├── Cf1.png
├── Cf2.png
├── Cf3.png
├── Cf4.png
├── Cf5png.png
├── architecture.png
├── asset.txt
├── cleanup1.png
├── esDomain1.png
├── esDomain2.png
├── esDomain3.png
├── esIndices1.png
├── esIndices2.png
├── esIndices3.png
├── keyPair1.png
├── keyPair2.png
├── keyPair3.png
├── kibana1.png
├── kibana10.png
├── kibana11.png
├── kibana12.png
├── kibana13.png
├── kibana14.png
├── kibana15.png
├── kibana16.png
├── kibana17.png
├── kibana18.png
├── kibana2.png
├── kibana3.png
├── kibana4.png
├── kibana5.png
├── kibana6.png
├── kibana7.png
├── kibana8.png
├── kibana9.png
├── s3bucket1.png
└── s3bucket2.png
├── config
├── CloudFront-Analysis-ELK-Lab.json
├── cloudfront.conf
├── indextemplate.json
└── lab2-nginx.conf
├── kibanageorequests.json
└── kibanamaxlatencypercity.json
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | *Issue #, if available:*
2 |
3 | *Description of changes:*
4 |
5 |
6 | By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice.
7 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing Guidelines
2 |
3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
4 | documentation, we greatly value feedback and contributions from our community.
5 |
6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
7 | information to effectively respond to your bug report or contribution.
8 |
9 |
10 | ## Reporting Bugs/Feature Requests
11 |
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 |
14 | When filing an issue, please check [existing open](https://github.com/aws-samples/amazon-cloudfront-log-analysis/issues), or [recently closed](https://github.com/aws-samples/amazon-cloudfront-log-analysis/issues?utf8=%E2%9C%93&q=is%3Aissue%20is%3Aclosed%20), issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 |
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 |
22 |
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 |
26 | 1. You are working against the latest source on the *master* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 |
30 | To send us a pull request, please:
31 |
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 |
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 |
42 |
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/aws-samples/amazon-cloudfront-log-analysis/labels/help%20wanted) issues is a great place to start.
45 |
46 |
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 |
52 |
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 |
56 |
57 | ## Licensing
58 |
59 | See the [LICENSE](https://github.com/aws-samples/amazon-cloudfront-log-analysis/blob/master/LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 |
61 | We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes.
62 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this
4 | software and associated documentation files (the "Software"), to deal in the Software
5 | without restriction, including without limitation the rights to use, copy, modify,
6 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
7 | permit persons to whom the Software is furnished to do so.
8 |
9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
10 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
11 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
12 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
13 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
14 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
15 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Analyze & Visualize Amazon CloudFront and Lambda@Edge Logs to Improve Customer Experience on your Website.
2 |
3 | ## Overview
4 |
5 | Nowadays, web servers are often fronted by a global content delivery network, such as Amazon CloudFront, to accelerate delivery of websites, APIs, media content, and other web assets. In this hands-on-workshop, learn to improve website availability, optimize content based on devices, browser and user demographics, identify and analyze CDN usage patterns, and perform end-to-end debugging by correlating logs from various points in a request-response pipeline. Build an end-to-end serverless solution to analyze Amazon CloudFront logs using AWS Glue and Amazon Athena, generate visualization to derive deeper insights using Amazon QuickSight, and correlate with other logs such Lambda@Edge logs, ALB logs to provide finer debugging experiences. You will also learn how to use the popular ELK(Elasticsearch,Logstash,Kibana) solution for geospatial visualization of CloudFront logs. Discuss how you can extend the pipeline you just built to generate deeper insights needed to improve the overall experience for your users.
6 |
7 | ## AWS Console
8 |
9 | ### Verifying your region in the AWS Management Console
10 |
11 | With Amazon Ec2, you can place instances in multiple locations. Amazon EC2 locations are composed of regions that contain more that one Availability Zones. Regions are dispersed and located in separate geographic areas (US, EU, etc.). Availability Zones are distinct locations within a region. They are are engineered to be isolated from failures in other Availability Zones and to provide inexpensive, low-latency network connectivity to other Availability Zones in the same region.
12 |
13 | By launching instances in separate regions, you can design your application to be closer to specific customers or to meet legal or other requirements. By launching instances in separate Availability Zones, you can protect your application from localized regional failures.
14 |
15 | ### Verify your Region
16 |
17 | The AWS region name is always listed in the upper-right corner of the AWS Management Console, in the navigation bar.
18 |
19 | * Make a note of the AWS region name, for example, for this lab you will need to choose the **EU West-1 (Ireland)** region.
20 | * Use the chart below to determine the region code. Choose **eu-west-1 for this lab.**
21 |
22 | | Region Name |Region Code|
23 | |---|---|
24 | |US East (Northern Virginia) Region|us-east-1 |
25 | |US West (Oregon) Region|us-west-2|
26 | |Asia Pacific (Tokyo) Region|ap-northeast-1|
27 | |Asia Pacific (Seoul) Region|ap-northeast-2|
28 | |Asia Pacific (Singapore) Region|ap-southeast-1|
29 | |Asia Pacific (Sydney) Region|ap-southeast-2|
30 | |EU (Ireland) Region|eu-west-1|
31 | |EU (Frankfurt) Region|eu-central-1|
32 |
33 | ---
34 | ## Labs
35 |
36 | ### Pre-requisites
37 | You should have active AWS account with Administrator IAM role
38 |
39 | |Lab|Name|
40 | |---|----|
41 | |Lab 1|[Serverless Amazon CloudFront Log Analysis Pipeline](./lab1-serveless-cloudfront-log-analysis)|
42 | |Lab 2|[Amazon CloudFront Log Analysis using ELK](./lab2-elk-cloudfront-log-analysis)|
43 |
44 | ## Deploy Solution
45 | We recommend to deploy the solution for Lab2 using CloudFormation template while we go through the presentation. This is to save your time for Lab #2. Please complete the following 2 steps to deploy solution. The CloudFormation will take about 10 minutes to complete.
46 | - [Create a Key Pair for EC2 Instances](https://github.com/aws-samples/amazon-cloudfront-log-analysis/tree/master/lab2-elk-cloudfront-log-analysis#create-a-key-pair-for-ec2-instances)
47 | - [Deploy Solution](https://github.com/aws-samples/amazon-cloudfront-log-analysis/tree/master/lab2-elk-cloudfront-log-analysis#deploy-solution)
48 |
49 | ## License Summary
50 |
51 | This sample code is made available under a modified MIT license. See the LICENSE file.
52 |
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/README.md:
--------------------------------------------------------------------------------
1 | # LAB 1: Serverless Amazon CloudFront Log Analysis Pipeline
2 |
3 |
4 | * [Overview](#overview)
5 | * [Log collection](#log-collection)
6 | * [Lab Overview](#lab-overview)
7 | * [Lambda @ Edge](#lambda--edge)
8 | * [Pre-requisites](#pre-requisites)
9 | * [Create Amazon S3 Bucket](#create-amazon-s3-bucket)
10 | * [Creating Glue Data Catalog Database and Table using Amazon Athena](#creating-glue-data-catalog-database-and-table-using-amazon-athena)
11 | * [Create Glue Data Catalog Database using Amazon Athena](#create-glue-data-catalog-database-using-amazon-athena)
12 | * [Create Glue Data Catalog for CloudFront Access Logs in optimized Parquet Format](#create-glue-data-catalog-for-cloudfront-access-logs-in-optimized-parquet-format)
13 | * [Create Glue Data Catalog for Application Load Balancer(ALB) Access Logs in optimized Parquet Format](#create-glue-data-catalog-for-application-load-balanceralb-access-logs-in-optimized-parquet-format)
14 | * [Create Glue Data Catalog for Lambda@Edge Logs - Viewer Request in optimized Parquet Format](#create-glue-data-catalog-for-lambdaedge-logs---viewer-request-in-optimized-parquet-format)
15 | * [Create Glue Data Catalog for Lambda@Edge Logs - Origin Request in optimized Parquet Format](#create-glue-data-catalog-for-lambdaedge-logs---origin-request-in-optimized-parquet-format)
16 | * [Combine the logs using an AWS Glue ETL Job](#combine-the-logs-using-an-aws-glue-elt-job)
17 | * [Create AWS IAM Role](#create-aws-iam-role)
18 | * [Create AWS Glue ETL Job](#create-aws-glue-etl-job)
19 | * [Combine the logs using an AWS Glue ETL Job](#combine-the-logs-using-an-aws-glue-etl-job)
20 | * [(Optional)Create AWS Glue Data Catalog for the combined Lamabda@Eddge logs using Amazon Athena](#optional-create-aws-glue-data-catalog-for-the-combined-lamabdaeddge-logs-using-amazon-athena)
21 | * [Create AWS Glue Data Catalog for the combined logs using Amazon Athena](#create-aws-glue-data-catalog-for-the-combined-logs-using-amazon-athena)
22 | * [Visualization using Amazon QuickSight](#visualization-using-amazon-quicksight)
23 | * [Signing Up for Amazon QuickSight Standard Edition](#signing-up-for-amazon-quicksight-standard-edition)
24 | * [Configure Amazon S3 bucket Permission](#configure-amazon-s3-bucket-permission)
25 | * [Configuring Amazon QuickSight to use Amazon Athena as data source](#configuring-amazon-quicksight-to-use-amazon-athena-as-data-source)
26 | * [Generating new calculated fields in Amazon QuickSight](#generating-new-calculated-fields-in-amazon-quickSight)
27 | * [Create new calculated fields “EdgeToOriginTimeTaken” in Amazon QuickSight](#create-new-calculated-fields-edgetoorigintimetaken-in-amazon-quicksight)
28 | * [Create new calculated fields "HourOfDay" in Amazon QuickSight](#create-new-calculated-fields-hourofday-in-amazon-quicksight)
29 | * [Create new calculated fields "TotalTimeTakenAtALB" in Amazon QuickSight](#create-new-calculated-fields-totaltimetakenatalb-in-amazon-quicksight)
30 | * [Generate Visualization using Amazon QuickSight](#visualization-using-amazon-quicksight)
31 | * [Generate visualization to status code by edge location](#generate-visualization-to-status-code-by-edge-location)
32 | * [(Optional)Generate visualization to status code by URI](#optional-generate-visualization-to-status-code-by-uri)
33 | * [Generate visualization to show hourly average time taken between edge and origin by country where the end user request originated from](#generate-visualization-to-show-hourly-average-time-taken-between-edge-and-origin-by-country-where-the-end-user-request-originated-from)
34 | * [Generate visualization to show hourly average time taken (total Vs. edge to origin Vs. server-side processing) by country where the end user request originated from](#generate-visualization-to-show-hourly-average-time-taken-total-vs-edge-to-origin-vs-server-side-processing-by-country-where-the-end-user-request-originated-from)
35 | * [(Optional)Generate visualization to show hourly average time taken (total Vs. edge to origin V.s server-side processing) by country where the end user request originated from for a different viewer country](#optional-generate-visualization-to-show-hourly-average-time-taken-total-vs-edge-to-origin-vs-server-side-processing-by-country-where-the-end-user-request-originated-from-for-a-different-viewer-country)
36 | * [Generate Visualization to show product category request by country](#generate-visualization-to-show-product-category-request-by-country)
37 | * [(Optional)Generate visualization to show device form factor ratio](#optional-generate-visualization-to-show-device-form-factor-ratio)
38 | * [(Optional)Generate visualization to show device form factor ration by viewer country](#optional-generate-visualization-to-show-device-form-factor-ration-by-viewer-country)
39 |
40 | ---
41 | ---
42 |
43 | ## Overview
44 |
45 | ### Log collection
46 |
47 | 
48 |
49 | As part of the log data generation generation, the following four different logs have been collected
50 |
51 | |Log Name|Raw Log Location|Format|Log Entries|
52 | |---|----|---|-----|
53 | |Viewer request triggered Lambda@Edge logs|aws s3 ls s3://eu-west-1.data-analytics/raw/lelogs/viewer-request/|JSON|{executionregion, requestid, distributionid, distributionname, eventtype, requestdata, customtraceid, useragentstring}|
54 | |Amazon CloudFront access logs|aws s3 ls s3://eu-west-1.data-analytics/raw/cf-accesslogs/|CSV|[Web Distribution Log File Format](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/AccessLogs.html#BasicDistributionFileFormat)|
55 | |Origin request triggered Lambda@Edge logs|aws s3 ls s3://eu-west-1.data-analytics/raw/lelogs/origin-request/|JSON|{executionregion, requestid, distributionid, distributionname, eventtype, requestdata, customtraceid, viewercountry, deviceformfactor}|
56 | |Application Load Balancer(ALB) logs|aws s3 ls s3://eu-west-1.data-analytics/raw/lblogs/|JSON|[Access Log Entries](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-access-logs.html#access-log-entry-format)|
57 |
58 | ---
59 |
60 | ### Lab Overview
61 |
62 | 
63 |
64 | In this lab, you are going to build a serverless architecture to combine all the four logs - 1) Viewer request triggered Lambda@Edge logs, 2) Origin request triggered Lambda@Edge logs, 3) Amazon CloudFront access logs and 4) Application Load Balancer(ALB) logs using AWS Glue and then analyze the combined logs using Amazon Athena and visualize in Amazon QuickSight. The logs you are going to use is already converter from raw logs in CSV or JSON format to optimized logs into partition and compresses parquet format.
65 |
66 | |Log Name|Partition|Conversion Script|Github|Optimized Log Location|
67 | |---|----|---|---|---|
68 | |Viewer request triggered Lambda@Edge logs|year, month, day, hour|[lelogconverter.py](./lelogconverter.py)|-|aws s3 ls s3://eu-west-1.data-analytics/cflogworkshop/optimized/lelogs/viewer-request/|
69 | |Amazon CloudFront access logs|year, month, day|[sample_cloudfront_job.py](https://github.com/awslabs/athena-glue-service-logs/blob/master/scripts/sample_cloudfront_job.py)|[Link](https://github.com/awslabs/athena-glue-service-logs)|aws s3 ls s3://us-east-1.data-analytics/cflogworkshop/optimized/cf-accesslogs/|
70 | |Origin request triggered Lambda@Edge logs|year, month, day, hour|[lelogconverter.py](./lelogconverter.py)|-|aws s3 ls s3://eu-west-1.data-analytics/cflogworkshop/optimized/lelogs/origin-request/|
71 | |Application Load Balancer(ALB) logs|region, year, month, day|[sample_alb_job.py](https://github.com/awslabs/athena-glue-service-logs/blob/master/scripts/sample_alb_job.py)|[Link](https://github.com/awslabs/athena-glue-service-logs)|aws s3 ls s3://eu-west-1.data-analytics/cflogworkshop/optimized/lblogs/|
72 |
73 | ---
74 |
75 | ### Lambda @ Edge
76 |
77 | 
78 |
79 | |EventType|Script|
80 | |---|----|
81 | |Viewer Request|[index.js](./viewerRequest-Lambda/index.js)|
82 | |Origin Request|[index.js](./originRequest-Lambda/index.js)|
83 |
84 | ---
85 | ---
86 |
87 | ## Pre-requisites
88 | This module requires:
89 | - You should have active AWS account with Administrator IAM role.
90 |
91 | ---
92 | ---
93 |
94 | ## Create Amazon S3 Bucket
95 |
96 | In this section you will be creating an Amazon S3 bucket to store the combined (by joining Viewer request triggered Lambda@Edge logs, Origin request triggered Lambda@Edge logs, Amazon CloudFront access logs and Application Load Balancer(ALB) logs) and optimized logs written by the AWS Glue ETL job that you create and execute as part of this workshop.
97 |
98 | - Open the AWS Management console for Amazon S3 from [here](https://s3.console.aws.amazon.com/s3/home?region=eu-west-1)
99 | - On the S3 Dashboard, Click on **Create Bucket.**
100 |
101 | 
102 |
103 | - In the **Create Bucket** pop-up page, input a unique **Bucket name**. Choose a large bucket name with many random characters and numbers (no spaces). You will need this Bucket name later in this exercise.
104 | - Select the region as **EU (Ireland)**
105 | - Click **Next** to navigate to next tab
106 | - In the **Configure Options** tab, leave all options as default
107 | - Click **Next** to navigate to next tab
108 | - In the **Set permissions** tag, leave all options as default
109 | - Click **Next** to navigate to next tab
110 | - In the **Review** tab, click on **Create Bucket**
111 |
112 | 
113 |
114 | ---
115 | ---
116 |
117 | ## Creating Glue Data Catalog Database and Table using Amazon Athena
118 |
119 | In this section you will be creating an AWS Data Catalog Database along with the tables pointing to the optimized logs. These logs have been pre-generated as part of the workshop. You will be creating the following tables, loading the partitions into each of these tables, and previewing the fields.
120 |
121 | |Table Name|Log Name|Partition|
122 | |---|---|----|
123 | |lambdaedge_logs_viewer_request_optimized|Viewer request triggered Lambda@Edge logs|year, month, day, hour|
124 | |cf_access_optimized|Amazon CloudFront access logs|year, month, day|
125 | |lambdaedge_logs_origin_request_optimized|Origin request triggered Lambda@Edge logs|year, month, day, hour|
126 | |alb_access_optimized|Application Load Balancer(ALB) logs|region, year, month, day|
127 |
128 | The AWS Glue ETL job that will combine all the four logs will refer to metadata in AWS Glue data catalog to read the logs from Amazon S3.
129 |
130 | ### Create Glue Data Catalog Database using Amazon Athena
131 |
132 | - Open the AWS Management Console for Athena from [here](https://console.aws.amazon.com/athena/home).
133 | - If this is your first time visiting the AWS Management Console for Athena, you will get a Getting Started page. Choose Get Started to open the Query Editor. If this isn't your first time, the Athena Query Editor opens.
134 | - Make a note of the AWS region name, for example, for this lab you will need to choose the *EU (Ireland)* region.
135 | - In the *Athena Query Editor*, you will see a query pane with an example query. Now you can start entering your query in the query pane.
136 | - To create a database named reInvent2018_aws_service_logs, copy the following statement, and then choose Run Query:
137 |
138 | ```sql
139 | CREATE DATABASE IF NOT EXISTS reInvent2018_aws_service_logs
140 | ```
141 |
142 | 
143 |
144 | - Ensure *reInvent2018_aws_service_logs* appears in the DATABASE list on the Catalog dashboard
145 |
146 | ---
147 |
148 | ### Create Glue Data Catalog for CloudFront Access Logs in optimized Parquet Format
149 |
150 | - Ensure that current AWS region is **EU (Ireland)** region
151 | - Ensure *reInvent2018_aws_service_logs* is selected from the DATABASE list and then choose New Query.
152 | - In the query pane, copy the following statement to create a the *cf_access_optimized* table, and then choose **Run Query**:
153 |
154 | ```sql
155 | CREATE EXTERNAL TABLE IF NOT EXISTS reInvent2018_aws_service_logs.cf_access_optimized(
156 | time timestamp,
157 | location string,
158 | bytes bigint,
159 | requestip string,
160 | method string,
161 | host string,
162 | uri string,
163 | status int,
164 | referrer string,
165 | useragent string,
166 | querystring string,
167 | cookie string,
168 | resulttype string,
169 | requestid string,
170 | hostheader string,
171 | requestprotocol string,
172 | requestbytes bigint,
173 | timetaken double,
174 | xforwardedfor string,
175 | sslprotocol string,
176 | sslcipher string,
177 | responseresulttype string,
178 | httpversion string)
179 | PARTITIONED BY (
180 | year string,
181 | month string,
182 | day string)
183 | ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
184 | STORED AS PARQUET
185 | LOCATION 's3://us-east-1.data-analytics/cflogworkshop/optimized/cf-accesslogs'
186 | TBLPROPERTIES("parquet.compress"="SNAPPY")
187 | ```
188 |
189 | 
190 |
191 | Now that you have created the table you need to add the partition metadata to the AWS Glue Data Catalog.
192 |
193 | Choose **New Query**, copy the following statement into the query pane, and then choose **Run Query** to add partition metadata
194 |
195 | ```sql
196 | MSCK REPAIR TABLE reInvent2018_aws_service_logs.cf_access_optimized
197 | ```
198 |
199 | - Get the total number of CloudFront Access Log records:
200 |
201 | ```sql
202 | SELECT count(*) AS rowcount FROM reInvent2018_aws_service_logs.cf_access_optimized
203 | ```
204 |
205 | > :warning: Ensure that the rowcount = **207535**
206 |
207 | - Get the first ten records:
208 |
209 | ```sql
210 | SELECT * FROM reInvent2018_aws_service_logs.cf_access_optimized LIMIT 10
211 | ```
212 |
213 | *After a few seconds, Athena will display your query results as shown below:*
214 |
215 | 
216 |
217 |
218 |
219 |
220 | Click to expand to review the values in the following fields/columns as you will be using them in this workshop
221 |
222 | |Field Name|Description|type
223 | |---|----|---|
224 | |requestid|An encrypted string that uniquely identifies a request. This field value is used to join the optimized CloudFront access logs with the optimized Lambda@Edge logs|string|
225 | |time|The time when the CloudFront server finished responding to the request (in UTC), for example, 01:42:39|timestamp|
226 | |location|The edge location that served the request. Each edge location is identified by a three-letter code and an arbitrarily assigned number, for example, DFW3. The three-letter code typically corresponds with the International Air Transport Association airport code for an airport near the edge location. (These abbreviations might change in the future.) For a list of edge locations, see the Amazon CloudFront detail page, [http://aws.amazon.com/cloudfront](http://aws.amazon.com/cloudfront)|string|
227 | |uri|The query string portion of the URI, if any. When a URI doesn't contain a query string, the value of cs-uri-query is a hyphen (-). For more information, see [Caching Content Based on Query String Parameters](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/QueryStringParameters.html).|string|
228 | |status| One of the following values: - An HTTP status code (for example, 200). For a list of HTTP status codes, see [RFC 2616](http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html), [Hypertext Transfer Protocol—HTTP 1.1, section 10, Status Code Definitions]((http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html)). For more information, see [How CloudFront Processes and Caches HTTP 4xx and 5xx Status Codes from Your Origin](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/HTTPStatusCodes.html).
- 000, which indicates that the viewer closed the connection (for example, closed the browser tab) before CloudFront could respond to a request. If the viewer closes the connection after CloudFront starts to send the object, the log contains the applicable HTTP status code.
|string|
229 | |useragent| The value of the User-Agent header in the request. The User-Agent header identifies the source of the request, such as the type of device and browser that submitted the request and, if the request came from a search engine, which search engine. For more information, see [User-Agent Header](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/RequestAndResponseBehaviorCustomOrigin.html#request-custom-user-agent-header).
230 | |responseresulttype|How CloudFront classified the response just before returning the response to the viewer. Possible values include:- Hit – CloudFront served the object to the viewer from the edge cache.For information about a situation in which CloudFront classifies the result type as Hit even though the response from the origin contains a Cache-Control: no-cache header, see [Simultaneous Requests for the Same Object (Traffic Spikes)](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/RequestAndResponseBehaviorCustomOrigin.html#request-custom-traffic-spikes).
- RefreshHit – CloudFront found the object in the edge cache but it had expired, so CloudFront contacted the origin to determine whether the cache has the latest version of the object and, if not, to get the latest version.
- Miss – The request could not be satisfied by an object in the edge cache, so CloudFront forwarded the request to the origin server and returned the result to the viewer.
- LimitExceeded – The request was denied because a CloudFront limit was exceeded.
CapacityExceeded – CloudFront returned an HTTP 503 status code (Service Unavailable) because the CloudFront edge server was temporarily unable to respond to requests.- Error – Typically, this means the request resulted in a client error (sc-status is 4xx) or a server error (sc-status is 5xx).
- Redirect – CloudFront redirects from HTTP to HTTPS.If sc-status is 403 and you configured CloudFront to restrict the geographic distribution of your content, the request might have come from a restricted location. For more information about geo restriction, see [Restricting the Geographic Distribution of Your Content](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/georestrictions.html).If the value of x-edge-result-type is Error and the value of x-edge-response-result-type is not Error, the client disconnected before finishing the download.
|string|
231 | |timetaken|The number of seconds (to the thousandth of a second, for example, 0.002) between the time that a CloudFront edge server receives a viewer's request and the time that CloudFront writes the last byte of the response to the edge server's output queue as measured on the server. From the perspective of the viewer, the total time to get the full object will be longer than this value due to network latency and TCP buffering.|double|
232 | |year(partition)|The year on which the event occurred.|string|
233 | |month(partition)|The month on which the event occurred.|string|
234 | |day(partition)|The day on which the event occurred.|string|
235 |
236 |
237 |
238 | ---
239 |
240 | ### Create Glue Data Catalog for Application Load Balancer(ALB) Access Logs in optimized Parquet Format
241 |
242 | In the query pane, copy the following statement to create a the **alb_access_optimized** table, and then choose **Run Query**:
243 |
244 | ```sql
245 | CREATE EXTERNAL TABLE IF NOT EXISTS reInvent2018_aws_service_logs.alb_access_optimized(
246 | type string,
247 | time timestamp,
248 | elb string,
249 | client_ip_port string,
250 | target_ip_port string,
251 | request_processing_time double,
252 | target_processing_time double,
253 | response_processing_time double,
254 | elb_status_code string,
255 | target_status_code string,
256 | received_bytes bigint,
257 | sent_bytes bigint,
258 | request_verb string,
259 | request_url string,
260 | request_proto string,
261 | user_agent string,
262 | ssl_cipher string,
263 | ssl_protocol string,
264 | target_group_arn string,
265 | trace_id string,
266 | domain_name string,
267 | chosen_cert_arn string)
268 | PARTITIONED BY (
269 | region string,
270 | year string,
271 | month string,
272 | day string)
273 | ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
274 | STORED AS PARQUET
275 | LOCATION 's3://eu-west-1.data-analytics/cflogworkshop/optimized/lblogs'
276 | TBLPROPERTIES("parquet.compress"="SNAPPY")
277 | ```
278 |
279 | Now that you have created the table you need to add the partition metadata to the AWS Glue Catalog.
280 |
281 | - Choose **New Query**, copy the following statement into the query pane, and then choose **Run Query** to add partition metadata.
282 |
283 | ```sql
284 | MSCK REPAIR TABLE reInvent2018_aws_service_logs.alb_access_optimized
285 | ```
286 |
287 | - Get the total number of ALB Access Log records:
288 |
289 | ```sql
290 | SELECT count(*) AS rowcount FROM reInvent2018_aws_service_logs.alb_access_optimized
291 | ```
292 |
293 | > :warning: Ensure that the rowcount = **15355**
294 |
295 | - Get the first ten records:
296 |
297 | ```sql
298 | SELECT * FROM reInvent2018_aws_service_logs.alb_access_optimized LIMIT 10
299 | ```
300 |
301 | After a few seconds, Athena will display your query results as shown below:
302 |
303 | 
304 |
305 |
306 |
307 | Click to expand to review the values in the following fields/columns as you will be using them in this workshop
308 |
309 | |Field Name|Description|type
310 | |---|----|---|
311 | |trace_id|The contents of the X-Amzn-Trace-Id header, enclosed in double quotes. This field is used to join the optimized ALB logs with the optimized Lambda@Edge logs which in turn is used to correlate with the optimized CloudFront access logs using the requestId filed. For more information see [Request Tracing for Your Application Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-request-tracing.html). Example value: ```X-Amzn-Trace-Id: Self=1-67891234-12456789abcdef012345678;Root=1-67891233-abcdef012345678912345678```|string|
312 | |request_processing_time|The total time elapsed (in seconds, with millisecond precision) from the time the load balancer received the request until the time it sent it to a target. This value is set to -1 if the load balancer can't dispatch the request to a target. This can happen if the target closes the connection before the idle timeout or if the client sends a malformed request. This value can also be set to -1 if the registered target does not respond before the idle timeout.| double|
313 | |target_processing_time|The total time elapsed (in seconds, with millisecond precision) from the time the load balancer sent the request to a target until the target started to send the response headers. This value is set to -1 if the load balancer can't dispatch the request to a target. This can happen if the target closes the connection before the idle timeout or if the client sends a malformed request. This value can also be set to -1 if the registered target does not respond before the idle timeout. |double|
314 | |response_processing_time|The total time elapsed (in seconds, with millisecond precision) from the time the load balancer received the response header from the target until it started to send the response to the client. This includes both the queuing time at the load balancer and the connection acquisition time from the load balancer to the client. This value is set to -1 if the load balancer can't send the request to a target. This can happen if the target closes the connection before the idle timeout or if the client sends a malformed request. |double|
315 | |region(partition|The region of the load balancer and S3 bucket.|string|
316 | |year(partition)|The year the log was delivered.|string|
317 | |month(partition)|The month the log was delivered.|string|
318 | |day(partition)|The day the logs was delivered.|string|
319 |
320 |
321 |
322 | ---
323 |
324 | ### Create Glue Data Catalog for Lambda@Edge Logs - Viewer Request in optimized Parquet Format
325 |
326 | In the query pane, copy the following statement to create a the *lambdaedge_logs_viewer_request_optimized* table, and then choose **Run Query**:
327 |
328 | ```sql
329 | CREATE EXTERNAL TABLE IF NOT EXISTS reInvent2018_aws_service_logs.lambdaedge_logs_viewer_request_optimized(
330 | executionregion string,
331 | requestid string,
332 | distributionid string,
333 | distributionname string,
334 | eventtype string,
335 | requestdata string,
336 | customtraceid string,
337 | useragentstring string)
338 | PARTITIONED BY (
339 | year string,
340 | month string,
341 | date string,
342 | hour string)
343 | ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
344 | STORED AS PARQUET
345 | LOCATION 's3://eu-west-1.data-analytics/cflogworkshop/optimized/lelogs/viewer-request'
346 | TBLPROPERTIES("parquet.compress"="SNAPPY")
347 | ```
348 |
349 | Now that you have created the table you need to add the partition metadata to the AWS Glue Catalog.
350 |
351 | - Choose **New Query**, copy the following statement into the query pane, and then choose **Run Query** to add partition metadata.
352 |
353 | ```sql
354 | MSCK REPAIR TABLE reInvent2018_aws_service_logs.lambdaedge_logs_viewer_request_optimized
355 | ```
356 |
357 | - Get the total number of Lambda@Edge Log - Viewer Request records:
358 |
359 | ```sql
360 | SELECT count(*) AS rowcount FROM reInvent2018_aws_service_logs.lambdaedge_logs_viewer_request_optimized
361 | ```
362 |
363 | > :warning: Ensure that the rowcount = **207837**
364 |
365 | - Get the first ten records:
366 |
367 | ```sql
368 | SELECT * FROM reInvent2018_aws_service_logs.lambdaedge_logs_viewer_request_optimized LIMIT 10
369 | ```
370 |
371 | After a few seconds, Athena will display your query results as shown below:
372 |
373 | 
374 |
375 |
376 |
377 | Click to expand to review the values in the following fields/columns
378 |
379 | |Field Name|Description|type
380 | |---|----|---|
381 | |requestid|An encrypted string that uniquely identifies a request. This field value is used to join the optimized CloudFront access logs with the optimized Lambda@Edge logs. The requestId value also appears in CloudFront access logs as x-edge-request-id. For more information, see [Configuring and Using Access Logs](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/AccessLogs.html) and [Web Distribution Log File Format](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/AccessLogs.html#BasicDistributionFileFormat).|string|
382 | |customtraceid|A uniquely generated value per request to join the ALB logs with Lambda@Edge logs. As part of client side instrumentation an unique value (Sample Value: ```Root=1-67891233-abcdef012345678912345678```) per request is generated and added two headers **x-my-trace-id** and **X-Amzn-Trace-Id**. The viewer-request triggered Lambda@Edge function extract the **x-my-trace-id** header and logs the value. For more details see [Viewer Request Trigger Lambda Function](./viewerRequest-Lambda/index.js). The **X-Amzn-Trace-Id** value is logged by the ALB. For more details refer, [Request Tracing for Your Application Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-request-tracing.html). |string|
383 | |executionregion|The AWS region where the Lambda@Edge function was executed.|string|
384 | |eventtype|The type of trigger that's associated with the request. Value = "veiwer-request"|string|
385 | |distributionid|The ID of the distribution that's associated with the request.|string|
386 | |distributionname|The domain name of the distribution that's associated with the request.|string|
387 | |year(partition)|The year on which the event occurred.|string|
388 | |month(partition)|The month on which the event occurred.|string|
389 | |day(partition)|The day on which the event occurred.|string|
390 | |hour(partition)|The hour on which the event occurred.|string|
391 |
392 |
393 |
394 | ---
395 |
396 | ### Create Glue Data Catalog for Lambda@Edge Logs - Origin Request in optimized Parquet Format
397 |
398 | In the query pane, copy the following statement to create a the *lambdaedge_logs_origin_request_optimized* table, and then choose **Run Query**:
399 |
400 | ```sql
401 | CREATE EXTERNAL TABLE IF NOT EXISTS reInvent2018_aws_service_logs.lambdaedge_logs_origin_request_optimized(
402 | executionregion string,
403 | requestid string,
404 | distributionid string,
405 | distributionname string,
406 | eventtype string,
407 | requestdata string,
408 | customtraceid string,
409 | viewercountry string,
410 | deviceformfactor string)
411 | PARTITIONED BY (
412 | year string,
413 | month string,
414 | date string,
415 | hour string)
416 | ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
417 | STORED AS PARQUET
418 | LOCATION 's3://eu-west-1.data-analytics/cflogworkshop/optimized/lelogs/origin-request'
419 | TBLPROPERTIES("parquet.compress"="SNAPPY")
420 | ```
421 |
422 | Now that you have created the table you need to add the partition metadata to the AWS Glue Catalog.
423 |
424 | - Choose **New Query**, copy the following statement into the query pane, and then choose **Run Query** to add partition metadata.
425 |
426 | ```sql
427 | MSCK REPAIR TABLE reInvent2018_aws_service_logs.lambdaedge_logs_origin_request_optimized
428 | ```
429 |
430 | - Get the total number of Lambda@Edge Log - Viewer Request records:
431 |
432 | ```sql
433 | SELECT count(*) AS rowcount FROM reInvent2018_aws_service_logs.lambdaedge_logs_origin_request_optimized
434 | ```
435 |
436 | > :warning: Ensure that the rowcount = **14517**
437 |
438 | - Get the first ten records:
439 |
440 | ```sql
441 | SELECT * FROM reInvent2018_aws_service_logs.lambdaedge_logs_origin_request_optimized LIMIT 10
442 | ```
443 |
444 | After a few seconds, Athena will display your query results as shown below:
445 |
446 | 
447 |
448 |
449 |
450 | Click to expand to review the values in the following fields/columns
451 |
452 | |Field Name|Description|type
453 | |---|----|---|
454 | |requestid|An encrypted string that uniquely identifies a request. This field value is used to join the optimized CloudFront access logs with the optimized Lambda@Edge logs. The requestId value also appears in CloudFront access logs as x-edge-request-id. For more information, see [Configuring and Using Access Logs](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/AccessLogs.html) and [Web Distribution Log File Format](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/AccessLogs.html#BasicDistributionFileFormat).|string|
455 | |customtraceid|A uniquely generated value per request to join the ALB logs with Lambda@Edge logs. As part of client side instrumentation an unique value (Sample Value: ```Root=1-67891233-abcdef012345678912345678```) per request is generated and added two headers **x-my-trace-id** and **X-Amzn-Trace-Id**. The origin-request triggered Lambda@Edge function extract the **x-my-trace-id** header and logs the value. For more details see [Origin Request Trigger Lambda Function](./originRequest-Lambda/index.js). The **X-Amzn-Trace-Id** value is logged by the ALB. For more details refer, [Request Tracing for Your Application Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-request-tracing.html). |string|
456 | |executionregion|The AWS region where the Lambda@Edge function was executed.|string|
457 | |eventtype|The type of trigger that's associated with the request. Value = "origin-request"|string|
458 | |distributionid|The ID of the distribution that's associated with the request.|string|
459 | |distributionname|The domain name of the distribution that's associated with the request.|string|
460 | |viewercountry|Two letter country code based on IP address where the request came from. For more details [Configuring Caching Based on the Location of the Viewer](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/header-caching.html#header-caching-web-location). For an easy-to-use list of country codes, sortable by code and by country name, see the Wikipedia entry [ISO 3166-1 alpha-2](http://en.wikipedia.org/wiki/ISO_3166-1_alpha-2).|string|
461 | |deviceformfactor|Category or form factor of the device based on the user agent associated with the request. For more details see [Configuring Caching Based on the Device Type](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/header-caching.html#header-caching-web-device). Possible values: - desktop
- mobile
- smarttv
- tablet
|string|
462 | |year(partition)|The year on which the event occurred.|string|
463 | |month(partition)|The month on which the event occurred.|string|
464 | |day(partition)|The day on which the event occurred.|string|
465 | |hour(partition)|The hour on which the event occurred.|string|
466 |
467 |
468 |
469 | ---
470 | ---
471 |
472 | ## Combine the logs using an AWS Glue ETL Job
473 |
474 | Now that you have created all the AWS Glue data catalog tables for the optimized logs, in this section you will create an AWS Glue ETL job to join the four optimized logs - 1) Viewer request triggered Lambda@Edge logs, 2) Origin request triggered Lambda@Edge logs, 3)Amazon CloudFront access logs and 4) Application Load Balancer(ALB) logs. The output of the combined logs is written in optimized parquet format to the Amazon S3 bucket that you created at the beginning of this lab. The data is partition by year followed by month follow by day. You will also create an IAM role that grants AWS Glue service permission to read and write to Amazon S3 bucket and access the AWS Glue data catalog tables.
475 |
476 | ### Create AWS IAM Role
477 |
478 | Create an IAM role that has permission to your Amazon S3 sources, targets, temporary directory, scripts, AWSGlueServiceRole and any libraries used by the job.
479 |
480 | - Open the AWS Management console for AWS IAM from [here](https://console.aws.amazon.com/iam/home?region=us-west-2#/roles)
481 | - On the IAM **Role** page click on **Create role**
482 | - Choose **Glue** under **Choose the service that will use this role section**
483 | - Ensure that **Glue** is shown under the **Select your use case** section
484 | - Click on **Next:Permissions** on the bottom
485 | - On the Attach permissions policies, search policies for S3 and check the box for **AmazonS3FullAccess**
486 |
487 | > :warning: Do not click on the policy, you just have to check the corresponding checkbox
488 |
489 | - On the same page, now search policies for Glue and check the box for **AWSGlueConsoleFullAccess** and **AWSGlueServiceRole**.
490 |
491 | > :warning: Do not click on the policy, you just have to check the corresponding checkbox
492 |
493 | - Click on **Next: Tags**
494 | - Click on **Next: Review**
495 | - Type the **Role name** *(e.g. ReInvent2018-CTD410-GlueRole)*
496 | - Type the **Role description** (optional)
497 | - Ensure that **AmazonS3FullAccess**, **AWSGlueConsoleFullAccess** and **AWSGlueServiceRole** are listed under policies
498 | - Click **Create role**
499 |
500 | ---
501 |
502 | ### Create AWS Glue ETL Job
503 |
504 | - Now that you have created the IAM role, open the AWS Management console for AWS Glue service from [here](https://eu-west-1.console.aws.amazon.com/glue/home?region=eu-west-1)
505 | - If this is your first time visiting the AWS Management Console for AWS Glue, you will get a Getting Started page. Choose **Get Started**. If this isn't your first time, the **Tables** pages opens.
506 | - Make a note of the AWS region name, for example, for this lab you will need to choose the **eu-west-1 (Ireland)** region
507 | - Click on **Jobs** under the **ETL** section in the navigation pane on the left
508 | - Click on **Add job** to create a new ETL job to join the Amazon CloudFront access logs, Lambda@Edge(viewer-request and origin-request) logs and Application Load Balancer logs
509 | - On the **Job properties** page, type the **Name** *(e.g. ReInvent2018-CTD410-LogCombiner)* of the AWS Glue ETL job
510 | - Choose the **IAM role** you created *(e.g. ReInvent2018-CTD410-GlueRole)* as part of the previous section in this lab from the drop down menu
511 | - Select **A new script to be authored by you** for **This job runs**
512 | - Select **Python** as the **ETL language**
513 | - Click **Next**
514 | - On the **Connections** page, click **Next**
515 | - On the **Review** page, click **Save job and edit script**
516 | - If this your first time, a **Script editor tips** page will pop up. Close the pop up page by clicking on the **x** symbol on the top right
517 | - Copy and paste the LogCombiner script [log-combiner-glue-script.py](./log-combiner-glue-script.py) to AWS Glue script editor pane
518 | - Click **Save**
519 | - Click **Run job**
520 | - Expand **Security configuration, script libraries, and job parameters** section on the popped up **Parameters(optional)** page
521 | - Under **Job parameters**, type **--target_s3_bucket** into the text box under **Key**
522 | - Into the text box under **Value**, type the name of the Amazon S3 bucket that you created at the beginning of this lab.
523 |
524 | > :warning: Type only the name of the S3 bucket and **not** the Amazon S3 path starting with S3://
525 |
526 | - Click **Run job**
527 | - Close the script editor page by click on **X** symbol on the right hand side of the page
528 |
529 | 
530 |
531 | - On the Jobs pages check the box next to the name of the Glue ETL job *(e.g. ReInvent2018-CTD410-LogCombiner)* *to view the current status of the job under the **History** tab at the bottom of the page
532 | - Ensure that the **Run status** is displaced as **Running**
533 | - Wait until the Run status changes to **Succeeded**
534 |
535 | > :warning: This step may take from upto 15 minutes to complete.
536 |
537 | 
538 |
539 | The AWS Glue ETL job performs an left outer join with the Amazon CloudFront access logs with the viewer request and origin triggered Lambda@Edge logs based on the 'requiestid' field. This is followed by another left outer join will Application Load Balancer (ALB) logs based on 'customtraceid' field in the Lambda@Edge logs and 'trace_id' field ALB logs. The duplicate fields in the logs are also removed. For more details, see [log-combiner-glue-script.py](./log-combiner-glue-script.py).
540 |
541 | ---
542 | ---
543 |
544 | ## Create AWS Glue Data Catalog for Combined Logs
545 |
546 | Now that you have successfully generated the combined logs, in this section you will be creating an AWS Data Catalog tables pointing to the combined logs written by the AWS Glue ETL job that you just executed. You will be creating the following tables, loading the partitions into each of these tables, and previewing the fields.
547 |
548 | |Table Name|Log Name|Partition|
549 | |---|---|----|
550 | |lambdaedge_logs_combined_optimized(optional)|Combined Lambda@Edge Logs obtained by joining viewer-request and origin-request logs |year, month, day, hour|
551 | |combined_log_optimized|Combined all the four following logs
- Amazon CloudFront access logs
- Viewer request triggered Lambda@Edge logs
- Origin request triggered Lambda@Edge logs
- Application Load Balancer(ALB)
|year, month, day|
552 |
553 | The above AWS Glue data catalogs will be referred by AWS Athena service when you query the logs directly from Amazon S3 bucket for generating visualizations using Amazon QuickSight.
554 |
555 | ### Create AWS Glue Data Catalog for the combined logs using Amazon Athena
556 | - In the query pane, copy the following statement to create a the *combined_log_optimized* table, and then choose **Run Query*:
557 |
558 | > :warning: Replace in the query below with the unique name of the S3 Bucket you created in step 1 earlier.
559 |
560 | ```sql
561 | CREATE EXTERNAL TABLE reInvent2018_aws_service_logs.combined_log_optimized(
562 | received_bytes int,
563 | trace_id string,
564 | distributionname string,
565 | executionregion string,
566 | distributionid string,
567 | location string,
568 | sent_bytes int,
569 | responseresulttype string,
570 | xforwardedfor string,
571 | type string,
572 | customtraceid string,
573 | querystring string,
574 | client_ip_port string,
575 | response_processing_time double,
576 | elb string,
577 | deviceformfactor string,
578 | elb_status_code string,
579 | uri string,
580 | request_verb string,
581 | col24 string,
582 | request_url string,
583 | region string,
584 | hostheader string,
585 | request_processing_time double,
586 | resulttype string,
587 | method string,
588 | useragent string,
589 | httpversion string,
590 | target_status_code string,
591 | target_ip_port string,
592 | requestdata string,
593 | host string,
594 | referrer string,
595 | cookie string,
596 | bytes bigint,
597 | target_processing_time double,
598 | alb_time timestamp,
599 | requestid string,
600 | viewercountry string,
601 | timetaken double,
602 | requestbytes bigint,
603 | target_group_arn string,
604 | sslprotocol string,
605 | requestprotocol string,
606 | status int,
607 | time timestamp,
608 | requestip string,
609 | sslcipher string,
610 | request_proto string,
611 | col25 string,
612 | user_agent string)
613 | PARTITIONED BY (
614 | year string,
615 | month string,
616 | day string)
617 | ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
618 | STORED AS PARQUET
619 | LOCATION 's3:///combined/logs/'
620 | TBLPROPERTIES("parquet.compress"="SNAPPY")
621 | ```
622 |
623 | Now that you have created the table you need to add the partition metadata to the AWS Glue Catalog.
624 |
625 | - Choose **New Query**, copy the following statement into the query pane, and then choose **Run Query** to add partition metadata.
626 |
627 | ```sql
628 | MSCK REPAIR TABLE reInvent2018_aws_service_logs.combined_log_optimized
629 | ```
630 | - Get the total number of combined log records:
631 |
632 | ```sql
633 | SELECT count(*) AS rowcount FROM reInvent2018_aws_service_logs.combined_log_optimized
634 | ```
635 |
636 | > :warning: Ensure that the rowcount = **207537**
637 |
638 | - Get the first ten records:
639 |
640 | ```sql
641 | SELECT * FROM reInvent2018_aws_service_logs.combined_log_optimized LIMIT 10
642 | ```
643 |
644 | 
645 |
646 |
647 | Click to expand to review the values in the following fields/columns as you will be using them in this workshop
648 |
649 | |Field Name|Description|type
650 | |---|----|---|
651 | |requestid|An encrypted string that uniquely identifies a request. This field value is used to join the optimized CloudFront access logs with the optimized Lambda@Edge logs|string|
652 | |trace_id|The contents of the X-Amzn-Trace-Id header, enclosed in double quotes. This field is used to join the optimized ALB logs with the optimized Lambda@Edge logs which in turn is used to correlate with the optimized CloudFront access logs using the requestId filed. For more information see [Request Tracing for Your Application Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-request-tracing.html). Example value: ```X-Amzn-Trace-Id: Self=1-67891234-12456789abcdef012345678;Root=1-67891233-abcdef012345678912345678```|string|
653 | |time|The time when the CloudFront server finished responding to the request (in UTC), for example, 01:42:39|timestamp|
654 | |location|The edge location that served the request. Each edge location is identified by a three-letter code and an arbitrarily assigned number, for example, DFW3. The three-letter code typically corresponds with the International Air Transport Association airport code for an airport near the edge location. (These abbreviations might change in the future.) For a list of edge locations, see the Amazon CloudFront detail page, [http://aws.amazon.com/cloudfront](http://aws.amazon.com/cloudfront)|string|
655 | |uri|The query string portion of the URI, if any. When a URI doesn't contain a query string, the value of cs-uri-query is a hyphen (-). For more information, see [Caching Content Based on Query String Parameters](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/QueryStringParameters.html).|string|
656 | |status| One of the following values: - An HTTP status code (for example, 200). For a list of HTTP status codes, see [RFC 2616](http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html), [Hypertext Transfer Protocol—HTTP 1.1, section 10, Status Code Definitions]((http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html)). For more information, see [How CloudFront Processes and Caches HTTP 4xx and 5xx Status Codes from Your Origin](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/HTTPStatusCodes.html).
- 000, which indicates that the viewer closed the connection (for example, closed the browser tab) before CloudFront could respond to a request. If the viewer closes the connection after CloudFront starts to send the object, the log contains the applicable HTTP status code.
|string|
657 | |useragent| The value of the User-Agent header in the request. The User-Agent header identifies the source of the request, such as the type of device and browser that submitted the request and, if the request came from a search engine, which search engine. For more information, see [User-Agent Header](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/RequestAndResponseBehaviorCustomOrigin.html#request-custom-user-agent-header).
658 | |responseresulttype|How CloudFront classified the response just before returning the response to the viewer. Possible values include:- Hit – CloudFront served the object to the viewer from the edge cache.For information about a situation in which CloudFront classifies the result type as Hit even though the response from the origin contains a Cache-Control: no-cache header, see [Simultaneous Requests for the Same Object (Traffic Spikes)](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/RequestAndResponseBehaviorCustomOrigin.html#request-custom-traffic-spikes).
- RefreshHit – CloudFront found the object in the edge cache but it had expired, so CloudFront contacted the origin to determine whether the cache has the latest version of the object and, if not, to get the latest version.
- Miss – The request could not be satisfied by an object in the edge cache, so CloudFront forwarded the request to the origin server and returned the result to the viewer.
- LimitExceeded – The request was denied because a CloudFront limit was exceeded.
CapacityExceeded – CloudFront returned an HTTP 503 status code (Service Unavailable) because the CloudFront edge server was temporarily unable to respond to requests.- Error – Typically, this means the request resulted in a client error (sc-status is 4xx) or a server error (sc-status is 5xx).
- Redirect – CloudFront redirects from HTTP to HTTPS.If sc-status is 403 and you configured CloudFront to restrict the geographic distribution of your content, the request might have come from a restricted location. For more information about geo restriction, see [Restricting the Geographic Distribution of Your Content](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/georestrictions.html).If the value of x-edge-result-type is Error and the value of x-edge-response-result-type is not Error, the client disconnected before finishing the download.
|string|
659 | |timetaken|The number of seconds (to the thousandth of a second, for example, 0.002) between the time that a CloudFront edge server receives a viewer's request and the time that CloudFront writes the last byte of the response to the edge server's output queue as measured on the server. From the perspective of the viewer, the total time to get the full object will be longer than this value due to network latency and TCP buffering.|double|
660 | |request_processing_time|The total time elapsed (in seconds, with millisecond precision) from the time the load balancer received the request until the time it sent it to a target. This value is set to -1 if the load balancer can't dispatch the request to a target. This can happen if the target closes the connection before the idle timeout or if the client sends a malformed request. This value can also be set to -1 if the registered target does not respond before the idle timeout.| double|
661 | |target_processing_time|The total time elapsed (in seconds, with millisecond precision) from the time the load balancer sent the request to a target until the target started to send the response headers. This value is set to -1 if the load balancer can't dispatch the request to a target. This can happen if the target closes the connection before the idle timeout or if the client sends a malformed request. This value can also be set to -1 if the registered target does not respond before the idle timeout. |double|
662 | |response_processing_time|The total time elapsed (in seconds, with millisecond precision) from the time the load balancer received the response header from the target until it started to send the response to the client. This includes both the queuing time at the load balancer and the connection acquisition time from the load balancer to the client. This value is set to -1 if the load balancer can't send the request to a target. This can happen if the target closes the connection before the idle timeout or if the client sends a malformed request. |double|
663 | |customtraceid|A uniquely generated value per request to join the ALB logs with Lambda@Edge logs. As part of client side instrumentation an unique value (Sample Value: ```Root=1-67891233-abcdef012345678912345678```) per request is generated and added two headers **x-my-trace-id** and **X-Amzn-Trace-Id**. The origin-request triggered Lambda@Edge function extract the **x-my-trace-id** header and logs the value. For more details see [Origin Request Trigger Lambda Function](./originRequest-Lambda/index.js). The **X-Amzn-Trace-Id** value is logged by the ALB. For more details refer, [Request Tracing for Your Application Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-request-tracing.html). |string|
664 | |viewercountry|Two letter country code based on IP address where the request came from. For more details [Configuring Caching Based on the Location of the Viewer](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/header-caching.html#header-caching-web-location). For an easy-to-use list of country codes, sortable by code and by country name, see the Wikipedia entry [ISO 3166-1 alpha-2](http://en.wikipedia.org/wiki/ISO_3166-1_alpha-2).|string|
665 | |deviceformfactor|Category or form factor of the device based on the user agent associated with the request. For more details see [Configuring Caching Based on the Device Type](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/header-caching.html#header-caching-web-device). Possible values: - desktop
- mobile
- smarttv
- tablet
|string|
666 | |year(partition)|The year on which the event occurred.|string|
667 | |month(partition)|The month on which the event occurred.|string|
668 | |day(partition)|The day on which the event occurred.|string|
669 |
670 |
671 |
672 | ---
673 |
674 | ### (Optional) Create AWS Glue Data Catalog for the combined Lamabda@Eddge logs using Amazon Athena
675 |
676 |
677 | CLICK TO EXPAND FOR OPTIONAL SECTION
678 |
679 | - Open the AWS Management Console for Athena from [here](https://console.aws.amazon.com/athena/home).
680 | - In the query pane, copy the following statement to create a the *lambdaedge_logs_combined_optimized* table, and then choose **Run Query**:
681 |
682 | > :warning: Replace in the query below with the unique name of the S3 Bucket you created in beginning of this lab.
683 |
684 | ```sql
685 | CREATE EXTERNAL TABLE IF NOT EXISTS reInvent2018_aws_service_logs.lambdaedge_logs_combined_optimized(
686 | executionregion string,
687 | requestid string,
688 | distributionid string,
689 | distributionname string,
690 | requestdata string,
691 | customtraceid string,
692 | useragentstring string,
693 | deviceformfactor string,
694 | viewercountry string)
695 | PARTITIONED BY (
696 | year string,
697 | month string,
698 | date string,
699 | hour string)
700 | ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
701 | STORED AS PARQUET
702 | LOCATION 's3:///combined/lelogs/'
703 | TBLPROPERTIES("parquet.compress"="SNAPPY")
704 | ```
705 |
706 | Now that you have created the table you need to add the partition metadata to the AWS Glue Catalog.
707 |
708 | 1. Choose **New Query**, copy the following statement into the query pane, and then choose **Run Query** to add partition metadata.
709 |
710 | ```sql
711 | MSCK REPAIR TABLE reInvent2018_aws_service_logs.lambdaedge_logs_combined_optimized
712 | ```
713 |
714 | - Get the total number of combined Lambda@Edge Log records:
715 |
716 | ```sql
717 | SELECT count(*) AS rowcount FROM reInvent2018_aws_service_logs.lambdaedge_logs_combined_optimized
718 | ```
719 |
720 | > :warning: Ensure that the rowcount = **207837**
721 |
722 | - Get the first ten records:
723 | ```sql
724 | SELECT * FROM reInvent2018_aws_service_logs.lambdaedge_logs_combined_optimized LIMIT 10
725 | ```
726 |
727 | After a few seconds, Athena will display your query results as shown below:
728 |
729 | 
730 |
731 |
732 | Click to expand to review the values in the following fields/columns
733 |
734 | |Field Name|Description|type
735 | |---|----|---|
736 | |requestid|An encrypted string that uniquely identifies a request. This field value is used to join the optimized CloudFront access logs with the optimized Lambda@Edge logs. The requestId value also appears in CloudFront access logs as x-edge-request-id. For more information, see [Configuring and Using Access Logs](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/AccessLogs.html) and [Web Distribution Log File Format](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/AccessLogs.html#BasicDistributionFileFormat).|string|
737 | |customtraceid|A uniquely generated value per request to join the ALB logs with Lambda@Edge logs. As part of client side instrumentation an unique value (Sample Value: ```Root=1-67891233-abcdef012345678912345678```) per request is generated and added two headers **x-my-trace-id** and **X-Amzn-Trace-Id**. The origin-request triggered Lambda@Edge function extract the **x-my-trace-id** header and logs the value. For more details see [Viewer Request Trigger Lambda Function](./viewerRequest-Lambda/index.js) and [Origin Request Trigger Lambda Function](./originRequest-Lambda/index.js) and . The **X-Amzn-Trace-Id** value is logged by the ALB. For more details refer, [Request Tracing for Your Application Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-request-tracing.html). |string|
738 | |executionregion|The AWS region where the Lambda@Edge function was executed.|string|
739 | |eventtype|The type of trigger that's associated with the request. Possible Values - viewer-request
- origin-request
|string|
740 | |distributionid|The ID of the distribution that's associated with the request.|string|
741 | |distributionname|The domain name of the distribution that's associated with the request.|string|
742 | |viewercountry|Two letter country code based on IP address where the request came from. For more details [Configuring Caching Based on the Location of the Viewer](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/header-caching.html#header-caching-web-location). For an easy-to-use list of country codes, sortable by code and by country name, see the Wikipedia entry [ISO 3166-1 alpha-2](http://en.wikipedia.org/wiki/ISO_3166-1_alpha-2).|string|
743 | |deviceformfactor|Category or form factor of the device based on the user agent associated with the request. For more details see [Configuring Caching Based on the Device Type](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/header-caching.html#header-caching-web-device). Possible values: - desktop
- mobile
- smarttv
- tablet
|string|
744 | |year(partition)|The year on which the event occurred.|string|
745 | |month(partition)|The month on which the event occurred.|string|
746 | |day(partition)|The day on which the event occurred.|string|
747 | |hour(partition)|The hour on which the event occurred.|string|
748 |
749 |
750 |
751 |
752 |
753 | ---
754 | ---
755 |
756 | ## Visualization using Amazon QuickSight
757 |
758 | ### Signing Up for Amazon QuickSight Standard Edition
759 |
760 |
761 | IF YOU HAVE NEVER USER AMAZON QUICKSIGHT WITHIN THIS ACCOUNT, CLICK TO EXPAND THE INSTRUCTIONS TO SIGN-UP FOR AN AMAZON QUICKSIGHT ACCOUNT
762 |
763 | 
764 |
765 | - Open the AWS Management console for Amazon QuickSight from [here](https://eu-west-1.quicksight.aws.amazon.com/sn/start)
766 | - If this is the first time you are accessing QuickSight, you will see a sign-up landing page for QuickSight.
767 | - Click on **Sign up for QuickSight**.
768 |
769 | 
770 |
771 | - On the **Create your QuickSight account** page, select **Standard Edition** for the subscription type.
772 | - Click **Continue**
773 |
774 | 
775 |
776 | - On the next page, type a unique **QuickSight account name** *(e.g. REInvent2018-CTD410-QuickSight)*
777 | - Type a valid email id for **Notification email address**
778 | - Just for this step, ensure that **US East(N. Virginia)** is selected from the drop down menu for *QuickSight capacity region*
779 | - Ensure that boxes next to **Enable autodiscovery of your data and users in your Amazon Redshift, Amazon RDS and AWS IAM Services** and **Amazon Athena** are checked
780 | - Click **Finish**
781 | - Wait until the page with message **Congratulations! You are signed up for Amazon QuickSight!** on successful sign up is presented.
782 | - Click on **Go to Amazon QuickSight**.
783 |
784 |
785 |
786 | ---
787 |
788 | ### Configure Amazon S3 bucket Permission
789 |
790 | In this section you will configure the permission for Amazon QuickSight to access the Amazon S3 bucket to read the combined logs that you generated as part of the ETL job.
791 |
792 | 
793 |
794 | - On the Amazon QuickSight dashboard, navigate to user settings page on the top right section and click **Manage QuickSight**.
795 |
796 | 
797 |
798 | - On the next page, click on **Account Settings**
799 | - Click on **Manage QuickSight permissions**
800 | - Click **Choose S3 buckets** to select the Amazon S3 buckets for which auto-discovery needs to be enabled for QuickSight
801 |
802 | 
803 |
804 | - On the pop up **Select Amazon S3 buckets** page check the box next to **Select all** or the name of the Amazon S3 bucket you created at the beginning of the lab
805 | - Click **Select buckets**
806 | - Ensure that the box next to **Amazon S3** is checked
807 | - Click **Apply**
808 |
809 | ---
810 |
811 | ### Configuring Amazon QuickSight to use Amazon Athena as data source
812 |
813 | In this section you will configure Amazon Athena as the data source to query the combined logs directly from Amazon S3 bucket by referencing the AWS Glue data catalog - *reInvent2018_aws_service_logs.combined_log_optimized*.
814 |
815 | 
816 |
817 | - Select **EU(Ireland)** as the region for this lab
818 | - If this is first time you are using Amazon QuickSight in this region, close **Welcome to QuickSight** pop up page by clicking on the **x** symbol.
819 | - Click on **Manage data** in the upper right hand corner
820 | - Click on **New data set** on the upper left hand corner
821 |
822 | 
823 |
824 | - Select **Athena** as the data source
825 |
826 | 
827 |
828 | - Type a **Data source name** *(e.g. ReInvent-CTD410-DS)*
829 | - Click on **Create data source**
830 |
831 | 
832 |
833 | - Select **reinvent2018_aws_service_logs** from the drop down menu for **Database: contain sets of tables**
834 | - Choose **combined_log_optimized** from the list under **Tables: contains the data you can visualize**
835 | - Click **Edit/Preview data**
836 |
837 | > :warning: **THIS IS A CRUCIAL STEP. PLEASE ENSURE YOU CHOOSE Edit/Preview data.**\
838 | > :warning: **THIS IS A CRUCIAL STEP. PLEASE ENSURE YOU CHOOSE Edit/Preview data.**
839 |
840 | ---
841 | ---
842 |
843 | ## Generating new calculated fields in Amazon QuickSight
844 |
845 | Now that you have configured the Amazon S3 permission and the data source in Amazon QuickSight, in this section you will generated following additional fields - HourOfDay, EdgeToOriginTimeTaken, TotalTimeTakenAtALB.
846 |
847 | ### Create new calculated fields “EdgeToOriginTimeTaken” in Amazon QuickSight
848 |
849 | > **Formula:**\
850 | > EdgeToOriginTimeTaken = timetaken - target_processing_time + response_processing_time + request_processing_time\
851 | > = timetaken, when target_processing_time = null i.e. response was served by Amazon CloudFront\
852 | > = 0, when (target_processing_time || response_processing_time || request_processing_time) == -1 (request timeout)
853 |
854 | |Field Name|Description|type
855 | |---|----|---|
856 | |timetaken|The number of seconds (to the thousandth of a second, for example, 0.002) between the time that a CloudFront edge server receives a viewer's request and the time that CloudFront writes the last byte of the response to the edge server's output queue as measured on the server. From the perspective of the viewer, the total time to get the full object will be longer than this value due to network latency and TCP buffering.|double|
857 | |request_processing_time|The total time elapsed (in seconds, with millisecond precision) from the time the load balancer received the request until the time it sent it to a target. This value is set to -1 if the load balancer can't dispatch the request to a target. This can happen if the target closes the connection before the idle timeout or if the client sends a malformed request. This value can also be set to -1 if the registered target does not respond before the idle timeout.| double|
858 | |target_processing_time|The total time elapsed (in seconds, with millisecond precision) from the time the load balancer sent the request to a target until the target started to send the response headers. This value is set to -1 if the load balancer can't dispatch the request to a target. This can happen if the target closes the connection before the idle timeout or if the client sends a malformed request. This value can also be set to -1 if the registered target does not respond before the idle timeout. |double|
859 | |response_processing_time|The total time elapsed (in seconds, with millisecond precision) from the time the load balancer received the response header from the target until it started to send the response to the client. This includes both the queuing time at the load balancer and the connection acquisition time from the load balancer to the client. This value is set to -1 if the load balancer can't send the request to a target. This can happen if the target closes the connection before the idle timeout or if the client sends a malformed request. |double|
860 |
861 | - Open the AWS Management console for Amazon QuickSight from [here](https://eu-west-1.quicksight.aws.amazon.com/sn/start)
862 | - Under **Fields** on the left column, click **Add calculated field**
863 |
864 | 
865 |
866 | - In the **Add calculated field** pop up page, type **EdgeToOriginTimeTaken** under **Calculated field name**
867 | - Copy and paste the formula below in the **Formula** text box
868 |
869 | ```$xslt
870 | ifelse(isNull(target_processing_time), {timetaken}, ifelse(target_processing_time = -1 or response_processing_time = -1 or request_processing_time = -1, 0, {timetaken} - {target_processing_time} + {response_processing_time} +{request_processing_time}))
871 | ```
872 |
873 | - Click **Create**
874 | - Ensure that **#EdgeToOriginTimeTaken** appears under *Calculated fields*
875 |
876 | ---
877 |
878 | ### Create new calculated fields "HourOfDay" in Amazon QuickSight
879 |
880 | > ** Formula:**\
881 | > HourofDay = extract("HH",{time})
882 |
883 | |Field Name|Description|type
884 | |---|----|---|
885 | |time|The time when the CloudFront server finished responding to the request (in UTC), for example, 01:42:39|timestamp|
886 |
887 |
888 | - Under **Fields** on the left column, click **Add calculated field**
889 | - In the **Add calculated field** pop up page, type **HourOfDay** under **Calculated field name**
890 | - Copy and paste the formula below in the **Formula** text box
891 |
892 | ```$xslt
893 | extract("HH",{time})
894 | ```
895 |
896 | - Click **Create**
897 | - Ensure that **#HourOfDay** *appears under **Calculated fields**
898 |
899 | ---
900 |
901 | ### Create new calculated fields "TotalTimeTakenAtALB" in Amazon QuickSight
902 |
903 | > **Formula**\
904 | > TotalTimeTakenAtALB = target_processing_time + response_processing_time + request_processing_time\
905 | > = 0, when target_processing_time = null i.e. response was served by Amazon CloudFront\
906 | > = 0, when (target_processing_time || response_processing_time || request_processing_time) == -1 (request timeout)
907 |
908 | |Field Name|Description|type
909 | |---|----|---|
910 | |timetaken|The number of seconds (to the thousandth of a second, for example, 0.002) between the time that a CloudFront edge server receives a viewer's request and the time that CloudFront writes the last byte of the response to the edge server's output queue as measured on the server. From the perspective of the viewer, the total time to get the full object will be longer than this value due to network latency and TCP buffering.|double|
911 | |request_processing_time|The total time elapsed (in seconds, with millisecond precision) from the time the load balancer received the request until the time it sent it to a target. This value is set to -1 if the load balancer can't dispatch the request to a target. This can happen if the target closes the connection before the idle timeout or if the client sends a malformed request. This value can also be set to -1 if the registered target does not respond before the idle timeout.| double|
912 | |target_processing_time|The total time elapsed (in seconds, with millisecond precision) from the time the load balancer sent the request to a target until the target started to send the response headers. This value is set to -1 if the load balancer can't dispatch the request to a target. This can happen if the target closes the connection before the idle timeout or if the client sends a malformed request. This value can also be set to -1 if the registered target does not respond before the idle timeout. |double|
913 | |response_processing_time|The total time elapsed (in seconds, with millisecond precision) from the time the load balancer received the response header from the target until it started to send the response to the client. This includes both the queuing time at the load balancer and the connection acquisition time from the load balancer to the client. This value is set to -1 if the load balancer can't send the request to a target. This can happen if the target closes the connection before the idle timeout or if the client sends a malformed request. |double|
914 |
915 | - Under **Fields** on the left column, click **Add calculated field**
916 | - In the **Add calculated field** pop up page, type **TotalTimeTakenAtALB** under **Calculated field name**
917 | - Copy and paste the formula below in the **Formula** text box
918 |
919 | ```$xslt
920 | ifelse(isNull(target_processing_time), 0, ifelse(target_processing_time = -1 or response_processing_time = -1 or request_processing_time = -1, 0, {target_processing_time} + {response_processing_time} +{request_processing_time}))
921 | ```
922 | - Click **Create**
923 | - Ensure that **#TotatlTimeTakenAtALB** appears under **Calculated fields**
924 | - Click on **Save & visualize** on the top of the page
925 |
926 | ---
927 | ---
928 |
929 | ## Generate visualization using Amazon QuickSight
930 |
931 | 
932 |
933 | Now that you have configure Athena as the data source to query the combined logs directly from Amazon S3 and created additional fields in Amazon QuickSight, you are ready to generation visualization for the following use cases:
934 | * Status code count by Amazon CloudFront Edge / PoP
935 | * Status code count by user requested URI
936 | * Time taken (averaged over hour) from Amazon CloudFront edge to origin (located in AWS region us-east-1 (N. Virginia)) by country where the user request originated from
937 | * Total time taken (averaged over hour) Vs. time taken (averaged over hour) from Amazon CloudFront edge to origin (located in AWS region us-east-1 (N. Virginia)) Vs. total server-side processing time (averaged over hour) for a country where the user request originated from
938 | * Count of product category request by the country where the request originated from
939 | * Ratio of device form factors used to browse globally or for a country where the user request originated from
940 |
941 | ### Generate visualization to status code count by Amazon CloudFront Edge/PoP
942 |
943 | 
944 |
945 | **Use case:** HTTP Status Codes (3xx, 4xx, 5xx) error code by Edge/PoP location can provide insight in troubleshooting issue (such as connectivity etc.)
946 |
947 | - Ensure that the selected region is **Ireland** in top right corner
948 | - Click the **Filter** icon in the QuickSight navigation pane
949 | - Click on **+** symbol next to **Applied Filters**
950 | - Select **day** field in the pop up menu
951 |
952 | 
953 |
954 | - Choose the new filter that you just created by clicking on filter name **day**
955 | - Wait for QuickSight to load the filter values and then check the box next to one of the values for the day field *(e.g. 4)*
956 | - Click **Apply**
957 | - Click **Close**
958 | - Click again on **+** symbol next to **Applied Filters** to add another filter
959 | - Select **HourOfDay** field in the pop up menu
960 | - Choose the new filter that you just created by clicking on filter name **HourOfDay**
961 | - Wait for QuickSight to load the filter values and then check the box next to one of the values for the day field *(e.g. 0)*
962 | - Click **Apply**
963 | - Click **Close**
964 |
965 | 
966 |
967 | - Click the **Visualize** icon in the QuickSight navigation pane
968 |
969 | 
970 |
971 | - Select the **Horizontal bar chart** under **Visual types**
972 | - Drag and drop the **#status** field into the **Y axis** in the **Field wells** section on the top
973 | - Drag and drop the **location** field into the **Group/Color** in the **Field wells** section
974 | - Click on the drop down arrow next to **status** in the y-axis of the chart to reveal a sub menu.
975 | - Click on the Ascending order for **Sort by** in the revealed menu
976 | - Edit the title by click on the title in the chart to **Status code by Edge Location**(optional)
977 |
978 | ---
979 |
980 | ### (Optional) Generate visualization to status code count by request URI
981 |
982 |
983 | CLICK TO EXPAND FOR OPTIONAL SECTION
984 |
985 | **Use case:** HTTP Status Codes (3xx, 4xx, 5xx) error code by URI can provide insight into troubleshooting issue (such as 404- page not found etc.)
986 |
987 |
988 | - Drag and drop the **uri** field into the **Group/Color** in the **Field wells** section.
989 |
990 | > :warning: While dragging and and dropping multiple fields do not replace the existing field but drop the field on top
991 |
992 | - Click on the drop down arrow next to **status** in the y-axis of the chart to reveal a sub menu.
993 | - Click on the Ascending order for **Sort by** in the revealed menu
994 | - Edit the title by click on the title in the chart to **Status code by URI**(optional)
995 |
996 | 
997 |
998 |
999 |
1000 | ---
1001 |
1002 | ### Generate visualization to show hourly average time taken between edge and origin by country where the end user request originated from
1003 |
1004 | 
1005 |
1006 | **Use case:** Derive insights into edge to origin latency for your global traffic and further optimize routing
1007 |
1008 | - Ensure that the selected region is **Ireland** in top right corner
1009 | - Click on **Add** from the QuickSight menu on the top to **Add Visual** from the pop up menu
1010 | - Click the **Filter** icon in the QuickSight navigation pane
1011 | - Click on **+** symbol next to **Applied Filters**
1012 | - Select **day** field in the pop up menu
1013 | - Choose the new filter that you just created by clicking on filter name **day**
1014 | - Wait for QuickSight to load the filter values and then check the box next to one of the values for the day field *(e.g. 4)*
1015 | - Click **Apply**
1016 | - Click **Close**
1017 | - Click again on **+** symbol next to **Applied Filters** to add another filter
1018 | - Select **viewercountry** field in the pop up menu
1019 | - Choose the new filter that you just created by clicking on filter name **viewercountry**
1020 | - Select all the values **(DE, IE, IN, US)** except **NULL**
1021 | - Click **Apply**
1022 | - Click **Close**
1023 | - Click again on **+** symbol next to **Applied Filters** to add another filter
1024 | - Select **responseresulttype** field in the pop up menu
1025 | - Choose the new filter that you just created by clicking on filter name **responseresulttype**
1026 | - Select **Miss** from the list of values
1027 | - Click **Apply**
1028 | - Click **Close**
1029 |
1030 | 
1031 |
1032 | - Click the **Visualize** icon in the QuickSight navigation pane
1033 |
1034 | 
1035 |
1036 | - Select the Line chart under **Visual types**
1037 | - Drag and drop the **#HourofDay** field into the **X axis** in the **Field wells** section on the top
1038 | - Drag and drop the **viewercountry** field into the **Color** in the **Field wells** section
1039 | - Drag and drop the **#EdgeToOriginTimeTaken** field into the **Value** in the **Field wells** section
1040 | - Click on the down arrow next to **EdgeToOriginTimeTaken** in the Value to reveal a sub-menu
1041 | - Select **Aggregate:** and select **Average**
1042 |
1043 | 
1044 |
1045 | - Click on the drop down arrow next to **HourOfDay** in the x-axis of the chart to reveal a sub menu.
1046 | - Click on the Ascending order next to **HourOfDay** under **Sort by** in the revealed menu
1047 | - Edit the title by click on the title in the chart to **Hourly Avg. for time taken from edge to origin by end user country** (optional)
1048 |
1049 | ---
1050 |
1051 | ### Generate visualization to show hourly average time taken (total Vs. edge to origin Vs. server-side processing) by country where the end user request originated from
1052 |
1053 | **Use case:** Troubleshoot latency issue at various stages of a request-response pipeline
1054 |
1055 | 
1056 |
1057 | - Click on **Add** from the QuickSight menu on the top to **Add Visual** from the pop up menu
1058 | - Ensure that the selected region is **Ireland** in top right corner
1059 | - Click the **Filter** icon in the QuickSight navigation pane
1060 | - Click on **+** symbol next to **Applied Filters**
1061 | - Select **day** field in the pop up menu
1062 | - Choose the new filter that you just created by clicking on filter name **day**
1063 | - Wait for QuickSight to load the filter values and then check the box next to one of the values for the day field (*e.g. 4 same day that you selected in the previous chart*)
1064 | - Click **Apply**
1065 | - Click **Close**
1066 | - Click again on **+** symbol next to **Applied Filters** to add another filter
1067 | - Select **viewercountry** field in the pop up menu
1068 | - Choose the new filter that you just created by clicking on filter name **viewercountry**
1069 | - Select one of the values (e.g. **US**) except **DE**
1070 | - Click **Apply**
1071 | - Click **Close**
1072 | - Click again on **+** symbol next to **Applied Filters** to add another filter
1073 | - Select **responseresulttype** field in the pop up menu
1074 | - Choose the new filter that you just created by clicking on filter name **responseresulttype**
1075 | - Select **Miss** from the list of values
1076 | - Click **Apply**
1077 | - Click **Close**
1078 |
1079 | 
1080 |
1081 | - Click the **Visualize** icon in the QuickSight navigation pane
1082 |
1083 | 
1084 |
1085 | - Select the Line chart under **Visual types**
1086 | - Drag and drop the **#HourofDay**field into the **X axis** in the **Field wells** section on the top
1087 | - Drag and drop the **#EdgeToOriginTimeTaken** field into the **Value** in the **Field wells** section
1088 | - Click on the down arrow next to **EdgeToOriginTimeTaken** in the Value to reveal a sub-menu
1089 | - Select **Aggregate:** and select **Average**
1090 | - Drag and drop the **#TotalTimeTakenAtALB**field into the **Value** in the **Field wells** section
1091 | - Click on the down arrow next to **#TotalTimeTakenAtALB** in the Value to reveal a sub-menu
1092 | - Select **Aggregate:** and select **Average**
1093 | - Drag and drop the **#timetaken** field into the **Value** in the **Field wells** section
1094 | - Click on the down arrow next to **#timetaken** in the Value to reveal a sub-menu
1095 | - Select **Aggregate:** and select **Average**
1096 | - Click on the drop down arrow next to **HourOfDay** in the x-axis of the chart to reveal a sub menu.
1097 | - Click on the Ascending order next to **HourOfDay** under **Sort by** in the revealed menu
1098 | - Edit the title by click on the title in the chart to **Hourly Avg. time taken (total Vs. edge to origin Vs. server-side processing) by end user country** (optional)
1099 |
1100 | ---
1101 |
1102 | ### (Optional) Generate visualization to show hourly average time taken (total Vs. edge to origin V.s server-side processing) by country where the end user request originated from for a different viewer country
1103 |
1104 |
1105 | CLICK TO EXPAND FOR OPTIONAL SECTION
1106 |
1107 | 
1108 |
1109 | **Use case:** Troubleshoot latency issues at various stages of a request-response pipeline
1110 |
1111 | - Click the **Filter** icon in the QuickSight navigation pane
1112 | - Choose the new filter that you just created by clicking on filter name **viewercountry**
1113 | - Select one of the values (e.g. **IN**) except **DE**
1114 | - Click **Apply**
1115 | - Click **Close**
1116 | - Click the **Visualize** icon in the QuickSight navigation pane
1117 |
1118 |
1119 |
1120 | ---
1121 |
1122 | ### Generate Visualization to show product category request by country
1123 |
1124 | 
1125 |
1126 | **Use case:** Based on the popular attributes (e.g. product categories) that your customers are requesting for you can prioritize and optimize latency by caching the pages for these popular categories or for delivering ad impression besides various other business insight your can derive in terms of inventory management etc.
1127 |
1128 | - Click on **Add** from the QuickSight menu on the top to **Add Visual** from the pop up menu
1129 | - Ensure that the selected region is **Ireland** in top right corner
1130 | - Click the **Filter** icon in the QuickSight navigation pane
1131 | - Click again on **+** symbol next to **Applied Filters** to add another filter
1132 | - Select **requestdata** field in the pop up menu
1133 |
1134 | 
1135 |
1136 | - Choose the new filter that you just created by clicking on filter name **requestdata**
1137 | - Choose **Custom filter** from the drop down for **Filter type**
1138 | - For the second drop down under **Filter type** choose **Does not equal**
1139 | - Type *null* in the text box.
1140 | - Click **Apply**
1141 | - Click **Close**
1142 | - Click again on **+** symbol next to **Applied Filters** to add another filter
1143 | - Select **viewercountry** field in the pop up menu
1144 | - Choose the new filter that you just created by clicking on filter name **viewercountry**
1145 | - Select all the values **(DE, IE, IN, US)** except **NULL**
1146 | - Click **Apply**
1147 | - Click **Close**
1148 | - Click the **Visualize** icon in the QuickSight navigation pane
1149 |
1150 | 
1151 |
1152 | - Select the **Horizontal bar chart** under **Visual types**
1153 | - Drag and drop the **requestdata** field into the **Y axis** in the **Field wells** section on the top
1154 | - Drag and drop the **viewercountry** field into **Group/Color** in the **Field wells** section
1155 | - Click on the drop down arrow next to **requestdata** in the y-axis of the chart to reveal a sub menu.
1156 | - Click on the Ascending order for **Sort by** in the revealed menu
1157 | - Edit the title by click on the title in the chart to **Count of product category by end user country**(optional)
1158 |
1159 | ---
1160 |
1161 | ### (Optional) Generate visualization to show device form factor ratio
1162 |
1163 |
1164 | CLICK TO EXPAND FOR OPTIONAL SECTION
1165 |
1166 | 
1167 |
1168 | **Use case:** Based on the popular device form factor(s) that your global customers are using to browse your website you can prioritize customization and optimization of your content on those form factor(s)
1169 |
1170 | - Click on **Add** from the QuickSight menu on the top to **Add Visual** from the pop up menu
1171 | - Ensure that the selected region is **Ireland** in top right corner
1172 | - Click the **Filter** icon in the QuickSight navigation pane
1173 | - Click on **+** symbol next to **Applied Filters**
1174 | - Select **deviceformfactor** field in the pop up menu
1175 | - Choose the new filter that you just created by clicking on filter name **deviceformfactor**
1176 | - Choose **Custom filter** from the drop down for **Filter type**
1177 | - For the second drop down under **Filter type** choose **Does not equal**
1178 | - Type *null* in the text box.
1179 | - Click **Apply**
1180 | - Click **Close**
1181 |
1182 | 
1183 |
1184 | - Click the **Visualize** icon in the QuickSight navigation pane
1185 | - Select the **Pie chart** under **Visual types**
1186 | - Drag and drop the **deviceformfactor** field into **Group/Color** in the **Field wells** section
1187 | - Edit the title by click on the title in the chart to **Device form factor Ratio** (optional)
1188 |
1189 |
1190 |
1191 | ---
1192 |
1193 | ### (Optional) Generate visualization to show device form factor ration by viewer country
1194 |
1195 |
1196 | CLICK TO EXPAND FOR OPTIONAL SECTION
1197 |
1198 | 
1199 |
1200 | **Use case:** Based on the popular device form factor(s) that customers in a particular region or country are using to browse your website you can prioritize customization and optimization of your content on those form factor(s)
1201 |
1202 | - Click the **Filter** icon in the QuickSight navigation pane
1203 | - Click on **+** symbol next to Applied Filters
1204 | - Choose the new filter that you just created by clicking on filter name **viewercountry**
1205 | - Select one of the values (e.g. **IN**) except **DE**
1206 | - Click **Apply**
1207 | - Click **Close**
1208 | - Click the **Visualize** icon in the QuickSight navigation pane
1209 |
1210 |
1211 |
1212 | ---
1213 | ---
1214 |
1215 | ## License Summary
1216 |
1217 | This sample code is made available under a modified MIT license. See the LICENSE file.
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/alb-access-optimized.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/alb-access-optimized.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/amazon-s3-create-bucket.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/amazon-s3-create-bucket.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/amazon-s3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/amazon-s3.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/architecture-diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/architecture-diagram.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/architecture-overview-all.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/architecture-overview-all.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/assets.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/assets.txt
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/athena-database.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/athena-database.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/athena-table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/athena-table.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/cf-access-optimized.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/cf-access-optimized.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/combine-schema.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/combine-schema.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/combined-logs-all.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/combined-logs-all.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/device-form-factor-chart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/device-form-factor-chart.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/device-form-factor-visualize-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/device-form-factor-visualize-2.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/device-form-factor-visualize.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/device-form-factor-visualize.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/edge-to-origin-chart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/edge-to-origin-chart.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/edge-to-origin-filter-summary.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/edge-to-origin-filter-summary.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/edge-to-origin-filter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/edge-to-origin-filter.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/edge-to-origin-visualize.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/edge-to-origin-visualize.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/edge-to-origin-x-axis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/edge-to-origin-x-axis.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/glue-job-complete.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/glue-job-complete.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/lambda-edge.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/lambda-edge.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/le-combined-logs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/le-combined-logs.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/log-collection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/log-collection.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/origin-request-optimized.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/origin-request-optimized.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/product-category-chart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/product-category-chart.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/product-category-filter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/product-category-filter.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/product-category-sort.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/product-category-sort.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/quicksight-account-create.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-account-create.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/quicksight-athena-ds.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-athena-ds.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/quicksight-datasource.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-datasource.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/quicksight-edition.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-edition.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/quicksight-manage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-manage.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/quicksight-new-field.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-new-field.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/quicksight-permission.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-permission.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/quicksight-region-selection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-region-selection.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/quicksight-s3-bucket-selection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-s3-bucket-selection.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/quicksight-signup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-signup.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/quicksight-status-code-filter-summary.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-status-code-filter-summary.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/quicksight-status-code-pop-filter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-status-code-pop-filter.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/quicksight-status-code-pop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-status-code-pop.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/quicksight-status-code-visualize-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-status-code-visualize-1.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/quicksight-status-code-visualize-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-status-code-visualize-2.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/quicksight-table-selection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-table-selection.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/quicksight-visualization-all.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/quicksight-visualization-all.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/time-taken-chart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/time-taken-chart.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/time-taken-filter-summary.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/time-taken-filter-summary.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/time-taken-visualize-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/time-taken-visualize-2.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/time-taken-visualize.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/time-taken-visualize.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/assets/viewer-request-optimized.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/assets/viewer-request-optimized.png
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/lelogconverter.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from awsglue.transforms import *
3 | from awsglue.utils import getResolvedOptions
4 | from pyspark.context import SparkContext
5 | from awsglue.context import GlueContext
6 | from awsglue.job import Job
7 |
8 | ## @params: [JOB_NAME]
9 | args = getResolvedOptions(sys.argv, ['JOB_NAME'])
10 |
11 | sc = SparkContext()
12 | glueContext = GlueContext(sc)
13 | spark = glueContext.spark_session
14 | job = Job(glueContext)
15 | job.init(args['JOB_NAME'], args)
16 |
17 | ##################################################################################################################
18 | # VIEWER REQUEST LAMBDA@EDGE LOGS - Conversion from JSon to Parquet format partitioned by year, month, date hour #
19 | ##################################################################################################################
20 | ## Create dyanmaic frame from raw(Json format) viewer request Lambda@Edge logs as the datasource. Glue Data Catalog = {database = "reInvent2018_aws_service_logs", table_name = "le_log_raw_viewer_request"}
21 | viewerRequestLELog = glueContext.create_dynamic_frame.from_catalog(database = "reInvent2018_aws_service_logs", table_name = "le_log_raw_viewer_request", transformation_ctx = "viewerRequestLELog")
22 |
23 | ## Map the viewer request Lambda@Edge logs to target format
24 | mappedViewerRequestLELog = ApplyMapping.apply(frame = viewerRequestLELog, mappings = [("executionregion", "string", "executionregion", "string"), ("requestid", "string", "requestid", "string"), ("distributionid", "string", "distributionid", "string"), ("distributionname", "string", "distributionname", "string"), ("eventtype", "string", "eventtype", "string"), ("requestdata", "string", "requestdata", "string"), ("customtraceid", "string", "customtraceid", "string"), ("useragentstring", "string", "useragentstring", "string"), ("partition_0", "string", "year", "string"), ("partition_1", "string", "month", "string"), ("partition_2", "string", "date", "string"), ("partition_3", "string", "hour", "string")], transformation_ctx = "mappedViewerRequestLELog")
25 |
26 | ## Resolves a choice type within a DynamicFrame
27 | resolvedViewerRequestLELog = ResolveChoice.apply(frame = mappedViewerRequestLELog, choice = "make_struct", transformation_ctx = "resolvedViewerRequestLELog")
28 |
29 | ## Drops all null fields in a DynamicFrame whose type is NullType
30 | cleanedViewerRequestLELog = DropNullFields.apply(frame = resolvedViewerRequestLELog, transformation_ctx = "cleanedViewerRequestLELog")
31 |
32 | ## Write the viewer request Lambda@Edge logs to the S3 path(s3://cf-log-bucket-lab/converted/lelogs/viewer-request) in the optimized (Parquet) format partitioned by year, month, date hour
33 | viewerRequestLELogSink = glueContext.write_dynamic_frame.from_options(frame = cleanedViewerRequestLELog, connection_type = "s3", connection_options = {"path": "s3://us-east-1.data-analytics/cflogworkshop/optimized/lelogs/viewer-request", "partitionKeys": ["year", "month", "date", "hour"]}, format = "parquet", transformation_ctx = "viewerRequestLELogSink")
34 |
35 |
36 | ##################################################################################################################
37 | # ORIGIN REQUEST LAMBDA@EDGE LOGS - Conversion from JSon to Parquet format partitioned by year, month, date hour #
38 | ##################################################################################################################
39 | ## Create dyanmaic frame from raw(Json format) origin request Lambda@Edge logs as the datasource. Glue Data Catalog = {database = "reInvent2018_aws_service_logs", table_name = "le_log_raw_origin_request"}
40 | originRequestLELog = glueContext.create_dynamic_frame.from_catalog(database = "reInvent2018_aws_service_logs", table_name = "le_log_raw_origin_request", transformation_ctx = "originRequestLELog")
41 |
42 | ## Map the origin request Lambda@Edge logs to target format
43 | mappedOriginRequestLELog = ApplyMapping.apply(frame = originRequestLELog, mappings = [("executionregion", "string", "executionregion", "string"), ("requestid", "string", "requestid", "string"), ("distributionid", "string", "distributionid", "string"), ("distributionname", "string", "distributionname", "string"), ("eventtype", "string", "eventtype", "string"), ("requestdata", "string", "requestdata", "string"), ("viewercountry", "string", "viewercountry", "string"), ("deviceformfactor", "string", "deviceformfactor", "string"), ("customtraceid", "string", "customtraceid", "string"), ("partition_0", "string", "year", "string"), ("partition_1", "string", "month", "string"), ("partition_2", "string", "date", "string"), ("partition_3", "string", "hour", "string")], transformation_ctx = "mappedOriginRequestLELog")
44 |
45 | ## Resolves a choice type within a DynamicFrame
46 | resolvedOriginRequestLELog = ResolveChoice.apply(frame = mappedOriginRequestLELog, choice = "make_struct", transformation_ctx = "resolvedOriginRequestLELog")
47 |
48 | ## Drops all null fields in a DynamicFrame whose type is NullType
49 | cleanedOriginRequestLELog = DropNullFields.apply(frame = resolvedOriginRequestLELog, transformation_ctx = "cleanedOriginRequestLELog")
50 |
51 | ## Write the origin request Lambda@Edge logs to the S3 path(s3://cf-log-bucket-lab/converted/lelogs/origin-request) in the optimized (Parquet) format partitioned by year, month, date hour
52 | originRequestLELogSink = glueContext.write_dynamic_frame.from_options(frame = cleanedOriginRequestLELog, connection_type = "s3", connection_options = {"path": "s3://us-east-1.data-analytics/cflogworkshop/optimized/lelogs/origin-request", "partitionKeys": ["year", "month", "date", "hour"]}, format = "parquet", transformation_ctx = "originRequestLELogSink")
53 |
54 | job.commit()
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/log-combiner-glue-script.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from awsglue.transforms import *
3 | from awsglue.utils import getResolvedOptions
4 | from pyspark.context import SparkContext
5 | from pyspark.sql.functions import split
6 | from awsglue.context import GlueContext
7 | from awsglue.dynamicframe import DynamicFrame
8 | from awsglue.job import Job
9 |
10 | ## @params: [JOB_NAME]
11 | args = getResolvedOptions(sys.argv, ['JOB_NAME', 'target_s3_bucket'])
12 |
13 | sc = SparkContext()
14 | glueContext = GlueContext(sc)
15 | spark = glueContext.spark_session
16 | job = Job(glueContext)
17 | job.init(args['JOB_NAME'], args)
18 |
19 | ################################################################
20 | # Combining Lambda@Edge Logs -[origin-request, viewer-request] #
21 | ################################################################
22 |
23 | ## Create dyanmaic frame from optimized(Parquet format) Amazon Lambda@Edge viewer request logs as the datasource. Glue Data Catalog = {database = "reInvent2018_aws_service_logs", table_name = "lambdaedge_logs_viewer_request_optimized"}
24 | labdaEdgeViewerRequestLogs = glueContext.create_dynamic_frame.from_catalog(database = "reInvent2018_aws_service_logs", table_name = "lambdaedge_logs_viewer_request_optimized", transformation_ctx = "labdaEdgeViewerRequest")
25 |
26 | ## Drop the fields that are duplicate between Lambda@Edge viewer request logs and Lambda@Edge origin request logs
27 | modifiedLEViewerRequestLogs = DropFields.apply(frame = labdaEdgeViewerRequestLogs, paths=["eventtype"], transformation_ctx ="modifiedLEViewerRequestLogs")
28 |
29 | ## Create dyanmaic frame from optimized(Parquet format) Amazon Lambda@Edge origin request logs as the datasource. Glue Data Catalog = {database = "reInvent2018_aws_service_logs", table_name = "lambdaedge_logs_viewer_origin_optimized"}
30 | labdaEdgeOriginRequestLogs = glueContext.create_dynamic_frame.from_catalog(database = "reInvent2018_aws_service_logs", table_name = "lambdaedge_logs_origin_request_optimized", transformation_ctx = "labdaEdgeOriginRequest")
31 |
32 | ## Drop the fields that are duplicate between Lambda@Edge viewer request logs and Lambda@Edge origin request logs
33 | trimmedLEOriginRequestLogs = DropFields.apply(frame = labdaEdgeOriginRequestLogs, paths=["executionregion", "distributionid", "distributionname", "requestdata", "customtraceid", "eventtype", "year", "month", "date", "hour"], transformation_ctx ="trimmedLEOriginRequestLogs")
34 |
35 | ## Rename the requestid field for Lambda@Edge origin request logs to origin requestid
36 | modifiedLEOriginRequestLogs = RenameField.apply(frame = trimmedLEOriginRequestLogs, old_name = "requestid", new_name = "origin_requestid", transformation_ctx ="modifiedLEOriginRequestLogs" )
37 |
38 | ## Convert to DataFrame
39 | modifiedLEOriginRequestLogsDF = modifiedLEOriginRequestLogs.toDF()
40 |
41 | ## Convert to DataFrame
42 | modifiedLEViewerRequestLogsDF = modifiedLEViewerRequestLogs.toDF()
43 |
44 | ## Join(left outer join) the Lambda@Edge viewer-request logs with the origin-request logs based on the requestid
45 | combinedLambdaEdgeLogsDF = modifiedLEViewerRequestLogsDF.join(modifiedLEOriginRequestLogsDF, modifiedLEViewerRequestLogsDF["requestid"] == modifiedLEOriginRequestLogsDF["origin_requestid"], "left_outer")
46 |
47 | ## Convert to DynamicFrame
48 | combinedLambdaEdgeLogs = DynamicFrame.fromDF(combinedLambdaEdgeLogsDF, glueContext, "combinedLambdaEdgeLogs")
49 |
50 | ## Join the Lambda@Edge viewer-request logs with the origin-request logs based on the requestid
51 | #combinedLambdaEdgeLogs = Join.apply(modifiedLEViewerRequestLogs, modifiedLEOriginRequestLogs, 'requestid', 'origin_requestid')
52 |
53 | ## Drop the origin_requestid field
54 | lambdaEdgeLogs = DropFields.apply(frame = combinedLambdaEdgeLogs, paths=["origin_requestid"], transformation_ctx ="lambdaEdgeLogs")
55 |
56 | ## Drop the "year", "month", "date", "hour" fields
57 | trimmedLambdaEdgeLogs = DropFields.apply(frame =lambdaEdgeLogs, paths=["year", "month", "date", "hour", "useragentstring"], transformation_ctx ="trimmedLambdaEdgeLogs")
58 |
59 | ## Convert to DataFrame
60 | trimmedLambdaEdgeLogsDF = trimmedLambdaEdgeLogs.toDF()
61 |
62 | #Destnation S3 loaction for combine Lambda@Edge logs
63 | leLogDestPath = "s3://" + args['target_s3_bucket'] + "/combined/lelogs"
64 |
65 | ## Write the combined Lambda@Edge logs to S3 (s3:///combined/lelogs) in optimized Parquet format partitioned by year, month, date, hour
66 | lambdaEdgeLogsSink = glueContext.write_dynamic_frame.from_options(frame = lambdaEdgeLogs, connection_type = "s3", connection_options = {"path": leLogDestPath, "partitionKeys": ["year", "month", "date", "hour"]}, format = "parquet", transformation_ctx = "lambdaEdgeLogsSink")
67 |
68 | ########################################################################
69 | # Combining Lambda@Edge Logs , CloudFront Access Logs, ALB Access Logs #
70 | ########################################################################
71 |
72 | ## Create dyanmaic frame from optimized(Parquet format) Amazon CloudFront access logs as the datasource. Glue Data Catalog = {database = "reInvent2018_aws_service_logs", table_name = "cf_access_optimized"}
73 | cfLog = glueContext.create_dynamic_frame.from_catalog(database = "reInvent2018_aws_service_logs", table_name = "cf_access_optimized", transformation_ctx = "cfLog")
74 |
75 | ## Rename the requestid field in the ALB logs to cf_requestid
76 | modifiedCFLogs = RenameField.apply(frame = cfLog, old_name = "requestid", new_name = "cf_requestid", transformation_ctx ="modifiedCFLogs" )
77 |
78 | ## Convert to DataFrame
79 | modifiedCFLogsDF = modifiedCFLogs.toDF()
80 |
81 | ## Create dyanmaic frame from optimized(Parquet format) Application Loadbalancer logs as the datasource. Glue Data Catalog = {database = "reInvent2018_aws_service_logs", table_name = "alb_access_optimized"}
82 | albLogs = glueContext.create_dynamic_frame.from_catalog(database = "reInvent2018_aws_service_logs", table_name = "alb_access_optimized", transformation_ctx = "albLog")
83 |
84 | ## Drop the "year", "month", "day", "hour" fields
85 | trimmedALBLogs = DropFields.apply(frame = albLogs, paths=["year", "month", "day", "hour"], transformation_ctx ="trimmedALBLogs")
86 |
87 | ## Rename the time field in the ALB logs to alb_time
88 | modifiedALBLogs = RenameField.apply(frame = trimmedALBLogs, old_name = "time", new_name = "alb_time", transformation_ctx ="modifiedALBLogs" )
89 |
90 | ## Convert ALB Log dynamic frame to Apache Spark data frame
91 | modfiedALBLogDF = modifiedALBLogs.toDF()
92 |
93 | ## Extract the custom trace id from the albLog coloumn name trace_id in the alb logs, as the Application Load Balancer would have updated the trace_id value with the self field
94 | split_col = split(modfiedALBLogDF['trace_id'], ';')
95 | finalALBLogDF = modfiedALBLogDF.withColumn("custom_trace_id", split_col.getItem(1))
96 |
97 | ## Join(let outer join) the Lambda@Edge logs with the ALB logs based on the custom trace id
98 | leALBCombinedLogsDF = trimmedLambdaEdgeLogsDF.join(finalALBLogDF, trimmedLambdaEdgeLogsDF["customtraceid"] == finalALBLogDF["custom_trace_id"], "left_outer")
99 |
100 | ## Join(let outer join) the CloudFront access logs with the combine Lambda@Edge and ALB logs based on the requestid
101 | combinedLogsDF = modifiedCFLogsDF.join(leALBCombinedLogsDF, modifiedCFLogsDF["cf_requestid"] == leALBCombinedLogsDF["requestid"], "left_outer")
102 |
103 | ## Convert the ALB Log data frame to dynamic frame
104 | combinedLogs = DynamicFrame.fromDF(combinedLogsDF, glueContext, "combinedLogs")
105 |
106 | ## Drop custom trace id and requestid from combined logs
107 | finalCombinedLogs = DropFields.apply(frame = combinedLogs, paths=["custom_trace_id", "cf_requestid"], transformation_ctx ="finalCombinedLogs")
108 |
109 | #Destnation S3 loaction for combine logs
110 | logDestPath = "s3://" + args['target_s3_bucket'] + "/combined/logs"
111 |
112 | ## Write the combined Lambda@Edge logs to S3 (s3:///combined/lelogs) in optimized Parquet format partitioned by year, month, day
113 | finalCombinedLogsSink = glueContext.write_dynamic_frame.from_options(frame = finalCombinedLogs, connection_type = "s3", connection_options = {"path": logDestPath, "partitionKeys": ["year", "month", "day"]}, format = "parquet", transformation_ctx = "finalCombinedLogsSink")
114 |
115 | job.commit()
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/originRequest-Lambda/index.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | const AWS = require('aws-sdk');
4 | const firehose = new AWS.Firehose({region: ''});
5 |
6 | const streamName = ""
7 | console.log("StreamName: ", streamName);
8 |
9 | function parseRequest(eventData) {
10 |
11 | var parsedJson = {};
12 |
13 | parsedJson.executionRegion = process.env.AWS_REGION;
14 | parsedJson.requestId = null;
15 | parsedJson.distributionId = eventData.config.distributionId;
16 | parsedJson.distributionName = eventData.config.distributionDomainName;
17 | parsedJson.eventType = eventData.config.eventType;
18 | parsedJson.customTraceId = null;
19 | parsedJson.viewerCountry = "unknown";
20 | parsedJson.deviceFormFactor = "unknown";
21 |
22 | if(eventData.request.headers["x-request-id"]) { //check if the custom header exists
23 | parsedJson.requestId = eventData.request.headers["x-request-id"][0].value;
24 | }
25 |
26 | if(eventData.request.headers["x-my-trace-id"]) { //check if the custom header exists, this is added as part of client side instrumentation
27 | parsedJson.customTraceId = eventData.request.headers["x-my-trace-id"][0].value;
28 | }
29 |
30 | if(eventData.request.headers["cloudfront-viewer-country"]) { //check if the custom header exists, this is added by Amazon CloudFront if the headers are whitelisted
31 | parsedJson.viewerCountry = eventData.request.headers["cloudfront-viewer-country"][0].value;
32 | }
33 |
34 | if(eventData.request.headers["cloudfront-is-mobile-viewer"] && eventData.request.headers["cloudfront-is-mobile-viewer"][0].value == 'true') { //check if the custom header exists, this is added by Amazon CloudFront if the headers are whitelisted
35 | parsedJson.deviceFormFactor = "mobile";
36 | } else if (eventData.request.headers["cloudfront-is-tablet-viewer"] && eventData.request.headers["cloudfront-is-tablet-viewer"][0].value == 'true') {
37 | parsedJson.deviceFormFactor = "tablet";
38 | } else if (eventData.request.headers["cloudfront-is-smarttv-viewer"] && eventData.request.headers["cloudfront-is-smarttv-viewer"][0].value == 'true') {
39 | parsedJson.deviceFormFactor = "smarttv";
40 | } else if (eventData.request.headers["cloudfront-is-desktop-viewer"] && eventData.request.headers["cloudfront-is-desktop-viewer"][0].value == 'true') {
41 | parsedJson.deviceFormFactor = "desktop";
42 | }
43 |
44 | console.log("parsed-request : ", JSON.stringify(parsedJson, null, 2));
45 |
46 | return parsedJson;
47 | }
48 |
49 | function sendToKinesisFirehose(logMsg, stream){
50 |
51 | var params = {
52 | DeliveryStreamName: stream,
53 | Record: {
54 | Data: JSON.stringify(logMsg) + "\n"
55 | }
56 | };
57 |
58 | firehose.putRecord(params, function(err, data) {
59 | if (err) console.log(err, err.stack); // an error occurred
60 | else console.log(data); // successful response
61 | });
62 |
63 | console.log("firehosed-logmessage : ", JSON.stringify(logMsg, null, 2));
64 | }
65 |
66 | exports.handler = (event, context, callback) => {
67 |
68 | console.log("StreamName: ", streamName);
69 | console.log("request-event: ", JSON.stringify(event, null, 2));
70 |
71 | const request = event.Records[0].cf.request;
72 |
73 | const parsedRequestJson = parseRequest(event.Records[0].cf);
74 | sendToKinesisFirehose(parsedRequestJson, streamName);
75 |
76 | //Rejecting requests from EU(Frankfurt) with viewer country code = 'DE'
77 | if(parsedRequestJson.viewerCountry == 'DE' ){
78 | const response = {
79 | status: '302',
80 | statusDescription: 'Found',
81 | headers: {
82 | location: [{
83 | key: 'Location',
84 | value: 'https://' + parsedRequestJson.distributionName + '/notavailable.html',
85 | }],
86 | },
87 | };
88 | callback(null, response);
89 | }
90 | else {
91 | callback(null, request);
92 | }
93 | };
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/sample-logs/raw-logs/sample-alb-logs.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/sample-logs/raw-logs/sample-alb-logs.gz
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/sample-logs/raw-logs/sample-cloudfront-access-logs.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/sample-logs/raw-logs/sample-cloudfront-access-logs.gz
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/sample-logs/raw-logs/sample-lambda-at-edge-origin-request-logs.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/sample-logs/raw-logs/sample-lambda-at-edge-origin-request-logs.gz
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/sample-logs/raw-logs/sample-lambda-at-edge-viewer-request.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab1-serveless-cloudfront-log-analysis/sample-logs/raw-logs/sample-lambda-at-edge-viewer-request.gz
--------------------------------------------------------------------------------
/lab1-serveless-cloudfront-log-analysis/viewerRequest-Lambda/index.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | const AWS = require('aws-sdk');
4 | const firehose = new AWS.Firehose({region: ''});
5 |
6 | const streamName = "";
7 | console.log("StreamName: ", streamName);
8 |
9 | function sleep(delayInSeconds) {
10 | console.log("Adding Delay in Seconds: " + delayInSeconds);
11 | return new Promise(resolve => setTimeout(resolve, delayInSeconds*1000));
12 | }
13 |
14 | function parseRequest(eventData) {
15 |
16 | var parsedJson = {};
17 |
18 | parsedJson.executionRegion = process.env.AWS_REGION;
19 | parsedJson.requestId = eventData.config.requestId;
20 | parsedJson.distributionId = eventData.config.distributionId;
21 | parsedJson.distributionName = eventData.config.distributionDomainName;
22 | parsedJson.eventType = eventData.config.eventType;
23 | parsedJson.requestData = null;
24 | parsedJson.customTraceId = null;
25 | parsedJson.userAgentString = null;
26 |
27 | if(eventData.request.body.data) { //check if the request data is not empty, in case of the GET method this field could be empty
28 | parsedJson.requestData = Buffer.from(eventData.request.body.data, 'base64').toString();
29 | }
30 |
31 | if(eventData.request.headers["x-my-trace-id"]) { //check if the custom header exists, this is added as part of client side instrumentation
32 | parsedJson.customTraceId = eventData.request.headers["x-my-trace-id"][0].value;
33 | }
34 |
35 | if(eventData.request.headers["user-agent"]) { //check if the custom header exists, this is added as part of client side instrumentation
36 | parsedJson.userAgentString = eventData.request.headers["user-agent"][0].value;
37 | }
38 |
39 | console.log("parsed-request : ", JSON.stringify(parsedJson, null, 2));
40 |
41 | return parsedJson;
42 | }
43 |
44 | function sendToKinesisFirehose(logMsg, stream){
45 |
46 | var params = {
47 | DeliveryStreamName: stream,
48 | Record: {
49 | Data: JSON.stringify(logMsg) + "\n"
50 | }
51 | };
52 |
53 | firehose.putRecord(params, function(err, data) {
54 | if (err) console.log(err, err.stack); // an error occurred
55 | else console.log(data); // successful response
56 | });
57 |
58 | console.log("firehosed-logmessage : ", JSON.stringify(logMsg, null, 2));
59 | }
60 |
61 | exports.handler = (event, context, callback) => {
62 |
63 | console.log("StreamName: ", streamName);
64 | console.log("request-event: ", JSON.stringify(event, null, 2));
65 |
66 | const requestId = event.Records[0].cf.config.requestId;
67 | const request = event.Records[0].cf.request;
68 |
69 | //Adding custom header with the requestId from cloudfront
70 | request.headers['x-request-id'] = [{
71 | "key": "x-request-id",
72 | "value": requestId,
73 | }];
74 |
75 | console.log("modified-request: ", JSON.stringify(request, null, 2));
76 |
77 | const parsedRequestJson = parseRequest(event.Records[0].cf);
78 |
79 | sendToKinesisFirehose(parsedRequestJson, streamName);
80 |
81 | //Adding Edge to Origin Delay
82 | //if(Math.floor(Math.random() * (4 - 0)) == 0) {
83 | // sleep(Math.floor(Math.random() * (3 - 0)));
84 | //}
85 |
86 | //Rejecting requests based on user agent
87 | if(request.headers['user-agent'] &&
88 | (request.headers['user-agent'][0].value == "" ||
89 | request.headers['user-agent'][0].value == "")) {
90 | const response = {
91 | status: '302',
92 | statusDescription: 'Found',
93 | headers: {
94 | location: [{
95 | key: 'Location',
96 | value: 'https://' + parsedRequestJson.distributionName + '/notavailable.html',
97 | }],
98 | },
99 | };
100 | callback(null, response);
101 | }else {
102 | callback(null, request);
103 | }
104 | };
105 |
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/README.md:
--------------------------------------------------------------------------------
1 |
2 | # Lab2: CloudFront log analysis using ELK
3 | [CloudFront access logs](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/AccessLogs.html) provide rich insights on your customer behavior. The insights gained by analysis of Amazon CloudFront access logs helps improve website availability through bot detection and mitigation, optimizing web content based on the devices and browser used to view your webpages, reducing perceived latency by caching of popular object closer to its viewer, and so on. This results in a significant improvement in the overall perceived experience for the user.
4 |
5 | In this lab, you will be building an ELK (ElasticSearch, Logstash and Kibana) stack on AWS to analyze the CloudFront access logs by loading them from Amazon S3 bucket.
6 |
7 | [Amazon Elasticsearch Service](https://aws.amazon.com/elasticsearch-service/) (Amazon ES) is a fully managed service that delivers Elasticsearch’s easy-to-use APIs and real-time capabilities along with the availability, scalability, and security required by production workloads. This service offers built-in integrations with [Kibana](https://aws.amazon.com/elasticsearch-service/kibana/), [Logstash](https://aws.amazon.com/elasticsearch-service/logstash/), and AWS services including [Amazon Kinesis Firehose](https://aws.amazon.com/kinesis/firehose/), [AWS Lambda](https://aws.amazon.com/lambda/), and [Amazon CloudWatch](https://aws.amazon.com/cloudwatch/), so that you can build log analysis solutions quickly.
8 |
9 | Logstash provides out-of-the box plugins such as [grok](https://www.elastic.co/guide/en/logstash/6.4/plugins-filters-grok.html) for filtering and enriching the data, derives [geo coordinates from Ip addresses](https://www.elastic.co/guide/en/logstash/6.4/plugins-filters-geoip.html) before ingesting the data to ElasticSearch domain. Kibana provides a broad set of visualization, filtering and aggregation options to analyze your data that is stored in ElasticSearch domain.
10 |
11 | In this lab, you will visualize CloudFront access behavior using Kibana Geo-spatial visualization options such as [Regional](https://www.elastic.co/guide/en/kibana/current/regionmap.html) and [Coordinate graphs](https://www.elastic.co/guide/en/kibana/current/tilemap.html). These maps can provide nice insights about your customer behaviour as well as latency information of your CloudFront distribution for various geo locations.
12 |
13 | Note: We will use a sample access logs generated from our demo environment. In a production scenario, you can just change the Logstash configuration to poll the logs from your S3 bucket or configure CloudFront distribution logs to deliver the bucket used in this Lab.
14 |
15 | ## High Level Architecture Overview
16 | The solution involves S3 bucket for storing CloudFront access logs, Logstash deployed on EC2, an nginx proxy on EC2 instance and an ElasticSearch domain with built-in Kibana setup. The EC2 instances will be launched in a VPC. The AWS resources will be provisioned via CloudFormation template. Amazon ElasticSearch service provides [various options](https://aws.amazon.com/blogs/security/how-to-control-access-to-your-amazon-elasticsearch-service-domain/) such as resource and identity based policies to control access to the domain. In this solution, we will be leveraging IP based policies to restrict the access to the domain to Logstash and proxy servers only. Access to Kibana [will be controlled](https://docs.aws.amazon.com/elasticsearch-service/latest/developerguide/es-kibana.html#es-kibana-access) via a proxy solution. We will be leveraging a basic http authentication for proxy service to prevent anonymous access.
17 |
18 | 
19 |
20 | ## Pre-requisites
21 | This module requires:
22 | - You should have active AWS account with Administrator IAM role.
23 |
24 | ## Create a Key Pair for EC2 Instances
25 |
26 | In this task, you will need to create a key pair so that we can use this keypair to launch EC2 instances and SSH into it. The following steps outline creating a unique SSH keypair for you to use in this lab.
27 |
28 | 1. Sign into the AWS Management Console and open the Amazon EC2 console at [https://console.aws.amazon.com/ec2](https://console.aws.amazon.com/ec2).
29 |
30 | 2. In the upper-right corner of the AWS Management Console, confirm you are in the desired AWS region i.e. EU West (Ireland).
31 |
32 | 3. Click on **Key Pairs** in the NETWORK & SECURITY section near the bottom of the leftmost menu. This will display a page to manage your SSH key pairs.
33 |
34 | 
35 |
36 | 4. To create a new SSH key pair, click the **Create Key Pair** button at the top of the browser window.
37 |
38 | 
39 |
40 | 5. In the resulting pop up window, type **_[First Name]-[Last Name]-Reinvent_** into the **Key Pair Name:** text box and click **Create.**
41 |
42 | 
43 |
44 | 6. The page will download the file **[Your-Name]-Reinvent.pem** to the local drive. Follow the browser instructions to save the file to the default download location.
45 |
46 | 7. Remember the full path to the file .pem file you just downloaded. You will use this Key Pair to manage your EC2 instances for the rest of the lab.
47 |
48 | ## Deploy Solution
49 | In this section we will deploy the solution using CloudFormation template. This CloudFormation template will create required resources for this solution including:
50 |
51 | - A VPC with IGW, two public subnets
52 | - Nginx proxy installed on a EC2 instance with an Elastic IP Address
53 | - Logstash installed on a EC2 instance with Elastic IP address
54 | - A S3 bucket in your region which stores a sample CloudFront access logs
55 | - EC2 IAM role with policies to access the Amazon S3
56 | - Amazon ES domain with 2 nodes with IP-based access policy with access restricted to only Nginx proxy and Logstash instances
57 |
58 | :warning: **Default limit of VPCs per AWS Region is 5. This CloudFormation template needs to create a VPC.**
59 |
60 | The template gives the following outputs:
61 |
62 | - Amazon ES domain and Kibana Endpoints.
63 | - Elastic IP details of Logstash and Nginx proxy servers
64 | - Nginx IP URLs for the Amazon ES Kibana through the proxy. You can use this to access the Kibana.
65 |
66 | 1. Click on **Launch Stack** button below to launch CloudFormation template in EU (Ireland) AWS region.
67 |
68 | Region| Launch
69 | ------|-----
70 | US East (Ohio) | [](https://console.aws.amazon.com/cloudformation/home?region=us-east-2#/stacks/create/review?stackName=CF-LogAnalysis2018&templateURL=https://s3-eu-west-1.amazonaws.com/eu-west-1.data-analytics/labcontent/reInvent2018-ctd410/lab2/templates/CloudFront-Analysis-ELK-Lab.json)
71 | US West (Oregon) | [](https://console.aws.amazon.com/cloudformation/home?region=us-west-2#/stacks/create/review?stackName=CF-LogAnalysis2018&templateURL=https://s3-eu-west-1.amazonaws.com/eu-west-1.data-analytics/labcontent/reInvent2018-ctd410/lab2/templates/CloudFront-Analysis-ELK-Lab.json)
72 | EU (Ireland) | [](https://console.aws.amazon.com/cloudformation/home?region=eu-west-1#/stacks/create/review?CF-LogAnalysis2018&templateURL=https://s3-eu-west-1.amazonaws.com/eu-west-1.data-analytics/labcontent/reInvent2018-ctd410/lab2/templates/CloudFront-Analysis-ELK-Lab.json)
73 |
74 | 2. Enter a unique name for your Stack in **Stack name** text box or you can use the default name **CF-LogAnalysis2018**.
75 |
76 | 3. Select the key pair you created in previous section.
77 |
78 | 4. Update **KibanaPassword** field. Default password is set to **admin123** but we highly recommend to update it to a strong password.
79 |
80 | 5. Under Create stack, check both checkboxes for **I acknowledge that AWS CloudFormation might create IAM resources with custom names** and click **Create** button.
81 |
82 | :warning: **Default limit of VPCs per AWS Region is 5. This CloudFormation template needs to create a VPC.**
83 |
84 | :warning: **We recommend that you restrict the access to the EC2 instances for your specific IP range in production environments. By default, this setup allows SSH and HTTP access to `0.0.0.0/0`**
85 |
86 | 
87 |
88 | 6. You should now see the screen with status **CREATE_IN_PROGRESS**. Click on the **Stacks** link in the top navigation to see current CloudFormation stacks.
89 |
90 | 
91 |
92 | 7. Click on the checkbox next to the stack to see additional details below.
93 |
94 | 
95 |
96 | 8. CloudFormation template will take around 10 minutes to complete. Wait until CloudFormation stack status changes to **CREATE_COMPLETE**.
97 |
98 | 
99 |
100 | 9. Click on "Output" tab and note down the outputs as we will be referring to these values in next steps.
101 |
102 | 
103 |
104 | ## Verify Amazon Elasticsearch Domain access policy
105 | 1. Go to Amazon Elasticsearch(ES) console: https://console.aws.amazon.com/es
106 |
107 | 2. Click on the Elasticsearch domain CloudFormation Template has created.
108 |
109 | 
110 |
111 | 3. Click on the button **Modify access policy**
112 |
113 | 
114 |
115 | 4. Verify that the Elasticsearch domain access policy has a full access to this ES domain for the IP addresses of Logstash and Nginx proxy servers. You can verify the IP addresses of servers from Cloudformation output values as shown in the screenshot.
116 |
117 | 
118 |
119 | ## Verify CloudFront access logs in S3 bucket
120 | As part of this lab, we copy CloudFront access logs in a S3 bucket created by the CloudFormation template. Before continuing with the rest of the lab, you need to make sure those log files are copied to your account. CloudFront access logs are compressed using gzip format. Refer to AWS documentation for [CloudFront access logs](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/AccessLogs.html) format and details. You can download sample log file to your laptop to inspect the contents.
121 |
122 | 1. Go to CloudFormation console : http://console.aws.amazon.com/cloudformation/
123 |
124 | 2. Click the checkbox next to the stack you created.
125 |
126 | 3. Select **Resources** tab and look for **CFLogBucket** and click on the **Physical ID** for it to go to S3 bucket with log files.
127 |
128 | 
129 |
130 | 4. You should be able to see ***.gz** files in S3 bucket. This shows that CloudFront access logs were copied to S3 bucket and we can continue with the rest of the lab.
131 |
132 | 
133 |
134 | ## Logstash ingestion of CloudFront logs
135 | In this step we will configure Logstash agent installed on EC2 instance to ingest CloudFront logs we just verified in S3. Logstash provides built-in transformation and filtering for many log formats using grok filter plugins. In this step, we will also use plugins such as geoip for latitude and longitude and useragent to retrieve the user agent information from the access the logs.
136 | [Index mapping templates](https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-templates.html) allow you to define templates for mapping the appropriate data types with the fields contained in the logs as part of the index creations. In this lab, we will be creating index templates to map the request IP attribute to IP data type and geoip to map latitude and longitude information for creating geo-point data type. This will ensure right mapping of log fields as part of the index creation.
137 |
138 | 1. Go to [CloudFormation console](http://console.aws.amazon.com/cloudformation/) and copy the IP address for **LogstashEC2Instance** from **Outputs** tab.
139 |
140 | 2. You need to connect to the Logstash EC2 instance using SSH. Please make sure that you have configured your machine to SSH into EC2 instances. You can follow the [instructions here](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/AccessingInstances.html) to configure your Mac/Windows machine to connect to EC2 instance using SSH. Use the following command to connect to EC2 instance:
141 |
142 | `ssh -i ec2-user@`
143 |
144 | 3. Create an Index mapping template for processing the CloudFront logs. CloudFormation template has already copied the **indextemplate.json** in `/home/ec2-user/templates directory` . Make sure you copy the Elasticsearch domain from CloudFront output key **ESDomainEndpoint**.
145 | ```bash
146 | sudo su -
147 |
148 | curl -XPUT /_template/cloudfront-template -H "Content-Type: application/json" -d@/home/ec2-user/templates/indextemplate.json
149 | ```
150 | 4. Run the following commands to configure Logstash to start log ingestion.
151 | ```bash
152 | # Run following commands to verify that installed Java version is 1.8.x
153 |
154 | cd /elk/logstash-6.4.2/bin/
155 |
156 | java -version
157 | ```
158 | 5. Copy the logstash configuration file **cloudfront.conf** from **/home/ec2-user/templates/** to **/elk/logstash-6.4.2/bin**.
159 | ```bash
160 | cp /home/ec2-user/templates/cloudfront.conf /elk/logstash-6.4.2/bin/
161 | ```
162 | 6. Logstash uses S3 input plugin for polling the logs continuously and write to Elasticsearch domain using **logstash-output-amazon_es** plugin. Edit **input -> s3** section in **cloudfront.conf** to update S3 bucket.
163 | ```nginx
164 | input{
165 | s3{
166 | #Enter S3 bucket name that has the CloudFront access logs. You can copy it from
167 | #CloudFormation stack output "CFLogBucket"
168 | bucket => ""
169 |
170 | #No change needed for "prefix"
171 | prefix => ""
172 |
173 | #Point "region" to your AWS Region. e.g. eu-west-1
174 | region => ""
175 | }
176 | }
177 | ```
178 | 7. Edit **output-> amazon_es** section to update Elasticsearch domain information for your setup.
179 |
180 | :warning: Make sure the Elasticsearch domain is listed **WITHOUT** https:// in the following section.
181 |
182 | ```nginx
183 | output{
184 | amazon_es{
185 | #Enter Elasticsearch domain name WITHOUT https://. You can copy the Elasticsearch
186 | #domain from CloudFormation stach output "ESDomainEndpoint"
187 | hosts =>[""]
188 |
189 | #Point "region" to AWS Region you have created the CloudFormation stack in. e.g. eu-west-1
190 | region => ""
191 | }
192 | }
193 | ```
194 | 8. Start Logstash process. Logstash will take about 8-10 minutes to index the logs to Amazon Elasticsearch.
195 | ```bash
196 | cd /elk/logstash-6.4.2/bin/
197 | nohup ./logstash -f cloudfront.conf
198 |
199 | ```
200 | 9. You can also verify if the Logstash process started properly by opening another SSH session and tailing the log file as shown below.
201 | **NOTE:** You will see some errors related to installing templates in logstash logs. This is due to a known issue (https://github.com/awslabs/logstash-output-amazon_es/issues/101 -link to github). You can ignore them.
202 |
203 | ```bash
204 | tail -f /elk/logstash-6.4.2/logs/logstash-plain.log
205 | ```
206 |
207 | 10. Check if the Indexes are created on ES domain. Go to [Elasticsearch AWS Console](http://console.aws.amazon.com/es/). Click on the Elasticsearch domain that is created earlier.
208 |
209 | 
210 |
211 | 11. CloudFront logs indices are created on day basis as shown below.
212 |
213 | 
214 |
215 | 
216 |
217 | You have successfully configured Logstash. Let us proceed to Nginx configuration.
218 | ## Nginx proxy configuration
219 | It should be noted that Kibana does not natively support IAM users and roles, but Amazon Elasticsearch offers several solutions for controlling access to Kibana. For more details, please refer to [AWS documentation](https://docs.aws.amazon.com/elasticsearch-service/latest/developerguide/es-kibana.html#es-kibana-access). In this lab, we will be using open source based Nginx proxy solution to access the Kibana console.
220 |
221 | 1. Go to [CloudFormation console](http://console.aws.amazon.com/cloudformation/) and copy the IP address for **NginxEC2Instance** from **Outputs** tab.
222 |
223 | 2. Connect to Nginx proxy EC2 instance as ec2-user using your key pair.
224 | ```bash
225 | ssh -i ec2-user@
226 | ```
227 |
228 | 3. Copy **lab2-nginx.conf** from **/home/ec2-user/templates/**. You will need to update the conf file with your Elasticsearch domain endpoints, Elasticsearch Kibana endpoint and Elastic IPs.
229 | ```bash
230 | sudo su -
231 | cd /etc/nginx
232 |
233 | mv nginx.conf nginx.conf-bkup
234 |
235 | cp /home/ec2-user/templates/lab2-nginx.conf /etc/nginx/nginx.conf
236 | ```
237 | 4. Update following parameters in **nginx.conf** with correct values for Elasticsearch domain endpoint **(ESDomainEndpoint)**, Kibana endpoint **(ESKibanaEndpoint)** and Nginx EC2 IP **(NginxEC2Instance)**. You can get the values from CloudFormation
238 | ```nginx
239 | location / {
240 |
241 | # ES Domain name WITHOUT https://
242 | proxy_set_header Host ;
243 |
244 | #IP of Nginx EC2 Instance
245 | proxy_set_header X-Real-IP ;
246 |
247 | #Elasticsearch Kibana endpoint
248 | proxy_pass https:///_plugin/kibana/;
249 |
250 | #Elasticsearch kibana endpoint and IP of Nginx EC2 Instance
251 | proxy_redirect https:///_plugin/kibana/ http://;
252 | ......
253 | ..........
254 | }
255 | location ~ (/app/kibana|/app/timelion|/bundles|/es_admin|/plugins|/api|/ui|/elasticsearch) {
256 | ......
257 | ........
258 | #Elasticsearch Domain endpoint
259 | proxy_pass https://;
260 | }
261 | ```
262 |
263 | 5. Restart the nginx server after updating the configurations.
264 |
265 | ```bash
266 | service nginx reload
267 | ```
268 |
269 | Nginx configuration is completed. Next step will be to configure Kibana.
270 |
271 | ## Kibana Configuration
272 | 1. Access the Kibana via Nginx proxy IP address. For protection of your proxy server, we will leverage a basic Http authentication. You will be challenged with username and password. Enter the username as admin (lowercase) and password as specified in the parameter section of the CloudFormation template. If you have used default values, then the password is admin123.
273 |
274 | 
275 |
276 | 2. Kibana dashboard will load.
277 |
278 | 
279 |
280 | 3. Create the index pattern in Kibana. Go to **Management** section in Kibana. Click **Index Patterns**.
281 |
282 | 
283 |
284 | 4. Enter **cloudfront*** (lowercase) in Index pattern text box.
285 |
286 | 
287 |
288 | 5. Click **Next** and choose **@timestamp** as **Time Filter field name** and click **Create Index pattern** button.
289 |
290 | 
291 |
292 | 6. You can verify the indexes if it used the correct Index template for mapping . for example, if you browse through the fields, you will see there is a new field named geoip.location which is mapped as geo_point data type.
293 |
294 | 
295 |
296 | Now Kibana has been configured and let us move to final part of this lab where we will create visualizations.
297 |
298 | ## Kibana Visualization
299 | Now we are ready to create visualizations. You can create visualizations manually or import the predefined visualizations as JSON templates to your dashboard. We will go over both cases.
300 |
301 | ### Use Case #1 (User agent Vs Error code)
302 | This visualization will show if customers are experiencing errors and from which specific device types.
303 |
304 | 1. Go to Kibana dashboard.
305 |
306 | 2. Select **Visualize** from the left side menu and click on the **+** in visualize section.
307 |
308 | 
309 |
310 | 3. Select **Heat Map** under **Basic Charts** in **Select visualization type**.
311 |
312 | 
313 |
314 | 4. Select **cloudfront*** from **From a New Search, Select Index** section.
315 |
316 | 
317 |
318 | 5. Change the time for visualization from last 15 minutest to **Last 60 days**
319 |
320 | 
321 |
322 | 6. Select settings under **Bucket** as follows and then click **Apply changes** button (play button on top):
323 |
324 | | | **Aggregation**|**Field** |
325 | | ----------|:--------------:| :----- |
326 | | **X-Axis**| Terms | useragent.device.keyword |
327 | | **Y-Axis**| Terms | sc_status |
328 |
329 |
330 | 
331 |
332 | 7. You will see the graph/visualization. Save the visualization as **User agent-status-code-heatmap**
333 |
334 | 
335 |
336 | ### Use Case #2 (Avg or Max Latency per city)
337 | You can use Geo-spatial visualization using Co-ordinate map. We will show to how to import the visualization from predefined template.
338 |
339 | 1. Save [kibanamaxlatencypercity.json](https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/master/lab2-elk-cloudfront-log-analysis/kibanamaxlatencypercity.json) file to your local computer by either downloading or copy and pasting as JSON file.
340 |
341 | 2. Go to **Management** -> **Saved objects**. Click **Import** and import the downloaded **kibanamaxlatencypercity.json**. This visualization shows the max(time_taken) for each city.
342 |
343 | 
344 |
345 | 3. Click **Yes, overwrite all objects** if asked for **Automatically overwrite all saved objects** dialog box.
346 |
347 | 4. Click **Confirm all changes** for **Index Pattern Conflicts** dialog box.
348 |
349 | 5. You should now see following visualization under the **Visualization** tab.
350 |
351 | 
352 |
353 | 6. Click on **Visualization** from the Kibana dashboard menu and select **Max-Latency-percity** to see the visualization.
354 |
355 | 
356 |
357 | 
358 |
359 | ### Use Case #3 (Number of requests per geo-region or popular regions)
360 | In this case, we will create Geo-spatial visualization using regional map. This visualization shows the number of request distribution for each city. This kind of visualization can be used for analyzing the traffic pattern as well as marketing purposes.
361 |
362 | 1. Save [kibanageorequests.json](https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/master/lab2-elk-cloudfront-log-analysis/kibanageorequests.json) file to your local computer by either downloading or copy and pasting as JSON file.
363 |
364 | 2. Follow **Steps 2 - 6** from the Use Case #2 and import downloaded file (kibanageorequests.json) .
365 |
366 | 3. Once completed, you will be able to see the final visualization for number of requests per geo-region.
367 |
368 | 
369 |
370 | 
371 |
372 | ## Completion
373 | You have successfully this Lab. Please proceed with the clean up of this lab to make sure running resources do not incur unnecessary billing.
374 |
375 | ## Clean up
376 | 1. Delete the S3 buckets created in this lab in Step: [**Verify CloudFront Access Logs**](https://github.com/aws-samples/amazon-cloudfront-log-analysis/tree/master/lab2-elk-cloudfront-log-analysis#verify-cloudfront--access-logs-in-s3-bucket)
377 |
378 | 2. Go to CloudFormation console : http://console.aws.amazon.com/cloudformation/
379 |
380 | 3. Click the checkbox next to the stack you created.
381 |
382 | 5. Click **Actions** button and select **Delete Stack** to delete the stack.
383 |
384 | 
385 |
386 |
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/Cf1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/Cf1.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/Cf2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/Cf2.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/Cf3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/Cf3.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/Cf4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/Cf4.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/Cf5png.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/Cf5png.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/architecture.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/asset.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/asset.txt
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/cleanup1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/cleanup1.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/esDomain1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/esDomain1.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/esDomain2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/esDomain2.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/esDomain3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/esDomain3.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/esIndices1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/esIndices1.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/esIndices2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/esIndices2.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/esIndices3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/esIndices3.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/keyPair1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/keyPair1.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/keyPair2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/keyPair2.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/keyPair3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/keyPair3.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/kibana1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana1.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/kibana10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana10.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/kibana11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana11.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/kibana12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana12.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/kibana13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana13.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/kibana14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana14.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/kibana15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana15.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/kibana16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana16.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/kibana17.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana17.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/kibana18.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana18.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/kibana2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana2.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/kibana3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana3.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/kibana4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana4.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/kibana5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana5.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/kibana6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana6.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/kibana7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana7.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/kibana8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana8.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/kibana9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/kibana9.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/s3bucket1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/s3bucket1.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/assets/s3bucket2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-cloudfront-log-analysis/49e88273eada4f1480d12a6c40c7bc61b96893bd/lab2-elk-cloudfront-log-analysis/assets/s3bucket2.png
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/config/CloudFront-Analysis-ELK-Lab.json:
--------------------------------------------------------------------------------
1 | {
2 | "AWSTemplateFormatVersion": "2010-09-09",
3 | "Description": "CloudFormation template for creating ELK stack for CloudFront Log Analysis Lab.You will be billed for the AWS resources used if you create a stack from this template",
4 | "Parameters": {
5 | "EC2KeyPair": {
6 | "Description": "Amazon EC2 Key Pair",
7 | "Type": "AWS::EC2::KeyPair::KeyName"
8 | },
9 | "VpcCIDR": {
10 | "Description": "Please enter the IP range (CIDR notation) for this VPC",
11 | "Type": "String",
12 | "Default": "10.192.0.0/16",
13 | "AllowedPattern": "(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})/(\\d{1,2})"
14 | },
15 | "PublicSubnet1CIDR": {
16 | "Description": "Please enter the IP range (CIDR notation) for the public subnet in the first Availability Zone",
17 | "Type": "String",
18 | "Default": "10.192.10.0/24",
19 | "AllowedPattern": "(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})/(\\d{1,2})"
20 | },
21 | "PublicSubnet2CIDR": {
22 | "Description": "Please enter the IP range (CIDR notation) for the public subnet in the second Availability Zone",
23 | "Type": "String",
24 | "Default": "10.192.11.0/24",
25 | "AllowedPattern": "(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})/(\\d{1,2})"
26 | },
27 | "LogstashInstanceType": {
28 | "Type": "String",
29 | "Description": "Amazon EC2 instance type for the Logstash Instance",
30 | "Default": "t3.medium",
31 | "AllowedValues": [
32 | "t3.medium",
33 | "t3.large",
34 | "m5.large",
35 | "m4.large"
36 | ]
37 | },
38 | "NginxInstanceType": {
39 | "Type": "String",
40 | "Description": "Amazon EC2 instance type for the Nginx proxy Instance",
41 | "Default": "t3.medium",
42 | "AllowedValues": [
43 | "t3.medium",
44 | "t3.large",
45 | "m5.large",
46 | "m4.large"
47 | ]
48 | },
49 | "ESDomainDataInstanceType": {
50 | "Type": "String",
51 | "Description": "Instance Type for the Elasticsearch Domain",
52 | "Default": "m4.large.elasticsearch",
53 | "AllowedValues": [
54 | "m4.large.elasticsearch",
55 | "m4.medium.elasticsearch",
56 | "c4.large.elasticsearch",
57 | "r4.large.elasticsearch"
58 | ]
59 | },
60 | "KibanaPassword": {
61 | "Default": "admin123",
62 | "NoEcho": "true",
63 | "Description": "Enter password for Kibana user: admin",
64 | "Type": "String",
65 | "MinLength": "8",
66 | "MaxLength": "41",
67 | "AllowedPattern": "[a-zA-Z0-9]*",
68 | "ConstraintDescription": "must contain only alphanumeric characters with minimum of 8 characters."
69 | },
70 | "ClientIP": {
71 | "Description": "The IP address range that can be used to connect to the RDS instances from your local machine.It must be a valid IP CIDR range of the form x.x.x.x/x.Pls get your address using checkip.amazonaws.com or whatsmyip.org",
72 | "Type": "String",
73 | "MinLength": "9",
74 | "MaxLength": "18",
75 | "Default": "0.0.0.0/0",
76 | "AllowedPattern": "(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})/(\\d{1,2})",
77 | "ConstraintDescription": "It must be a valid IP CIDR range of the form x.x.x.x/x. Suggest to enable access to your IP address only. Pls get your address using checkip.amazonaws.com or whatsmyip.org."
78 | }
79 | },
80 | "Conditions": {
81 | "AttachKeyPair": {
82 | "Fn::Not": [
83 | {
84 | "Fn::Equals": [
85 | {
86 | "Ref": "EC2KeyPair"
87 | },
88 | "None"
89 | ]
90 | }
91 | ]
92 | }
93 | },
94 | "Metadata": {
95 | "AWS::CloudFormation::Interface": {
96 | "ParameterGroups": [
97 | {
98 | "Label": {
99 | "default": "VPC Configurations"
100 | },
101 | "Parameters": [
102 | "EC2KeyPair",
103 | "VpcCIDR",
104 | "PublicSubnet1CIDR",
105 | "PublicSubnet2CIDR"
106 | ]
107 | },
108 | {
109 | "Label": {
110 | "default": " ELK Instance Configurations"
111 | },
112 | "Parameters": [
113 | "LogstashInstanceType",
114 | "NginxInstanceType",
115 | "ESDomainDataInstanceType",
116 | "KibanaPassword"
117 | ]
118 | },
119 | {
120 | "Label": {
121 | "default": "Enter IP address for the Security group Configuration"
122 | },
123 | "Parameters": [
124 | "ClientIP"
125 | ]
126 | }
127 | ]
128 | }
129 | },
130 | "Mappings": {
131 | "RegionMap": {
132 | "us-east-1": {
133 | "AMI": "ami-0ff8a91507f77f867"
134 | },
135 | "us-east-2": {
136 | "AMI": "ami-0b59bfac6be064b78"
137 | },
138 | "us-west-1": {
139 | "AMI": "ami-0bdb828fd58c52235"
140 | },
141 | "us-west-2": {
142 | "AMI": "ami-a0cfeed8"
143 | },
144 | "eu-west-1": {
145 | "AMI": "ami-047bb4163c506cd98"
146 | },
147 | "sa-east-1": {
148 | "AMI": "ami-07b14488da8ea02a0"
149 | },
150 | "ap-southeast-1": {
151 | "AMI": "ami-08569b978cc4dfa10"
152 | },
153 | "ap-southeast-2": {
154 | "AMI": "ami-09b42976632b27e9b"
155 | },
156 | "ap-northeast-1": {
157 | "AMI": "ami-06cd52961ce9f0d85"
158 | }
159 | }
160 | },
161 | "Resources": {
162 | "VPC": {
163 | "Type": "AWS::EC2::VPC",
164 | "Properties": {
165 | "CidrBlock": {
166 | "Ref": "VpcCIDR"
167 | },
168 | "Tags": [
169 | {
170 | "Key": "Name",
171 | "Value": {
172 | "Ref": "AWS::StackName"
173 | }
174 | }
175 | ]
176 | }
177 | },
178 | "InternetGateway": {
179 | "Type": "AWS::EC2::InternetGateway",
180 | "Properties": {
181 | "Tags": [
182 | {
183 | "Key": "Name",
184 | "Value": {
185 | "Ref": "AWS::StackName"
186 | }
187 | }
188 | ]
189 | }
190 | },
191 | "InternetGatewayAttachment": {
192 | "Type": "AWS::EC2::VPCGatewayAttachment",
193 | "Properties": {
194 | "InternetGatewayId": {
195 | "Ref": "InternetGateway"
196 | },
197 | "VpcId": {
198 | "Ref": "VPC"
199 | }
200 | }
201 | },
202 | "PublicSubnet1": {
203 | "Type": "AWS::EC2::Subnet",
204 | "Properties": {
205 | "VpcId": {
206 | "Ref": "VPC"
207 | },
208 | "AvailabilityZone": {
209 | "Fn::Select": [
210 | "0",
211 | {
212 | "Fn::GetAZs": ""
213 | }
214 | ]
215 | },
216 | "CidrBlock": {
217 | "Ref": "PublicSubnet1CIDR"
218 | },
219 | "MapPublicIpOnLaunch": true,
220 | "Tags": [
221 | {
222 | "Key": "Name",
223 | "Value": {
224 | "Fn::Sub": "${AWS::StackName} Public Subnet (AZ1)"
225 | }
226 | }
227 | ]
228 | }
229 | },
230 | "PublicSubnet2": {
231 | "Type": "AWS::EC2::Subnet",
232 | "Properties": {
233 | "VpcId": {
234 | "Ref": "VPC"
235 | },
236 | "AvailabilityZone": {
237 | "Fn::Select": [
238 | "1",
239 | {
240 | "Fn::GetAZs": ""
241 | }
242 | ]
243 | },
244 | "CidrBlock": {
245 | "Ref": "PublicSubnet2CIDR"
246 | },
247 | "MapPublicIpOnLaunch": true,
248 | "Tags": [
249 | {
250 | "Key": "Name",
251 | "Value": {
252 | "Fn::Sub": "${AWS::StackName} Public Subnet (AZ2)"
253 | }
254 | }
255 | ]
256 | }
257 | },
258 | "PublicRouteTable": {
259 | "Type": "AWS::EC2::RouteTable",
260 | "Properties": {
261 | "VpcId": {
262 | "Ref": "VPC"
263 | },
264 | "Tags": [
265 | {
266 | "Key": "Name",
267 | "Value": {
268 | "Fn::Sub": "${AWS::StackName} Public Routes"
269 | }
270 | }
271 | ]
272 | }
273 | },
274 | "DefaultPublicRoute": {
275 | "Type": "AWS::EC2::Route",
276 | "DependsOn": "InternetGatewayAttachment",
277 | "Properties": {
278 | "RouteTableId": {
279 | "Ref": "PublicRouteTable"
280 | },
281 | "DestinationCidrBlock": "0.0.0.0/0",
282 | "GatewayId": {
283 | "Ref": "InternetGateway"
284 | }
285 | }
286 | },
287 | "PublicSubnet1RouteTableAssociation": {
288 | "Type": "AWS::EC2::SubnetRouteTableAssociation",
289 | "Properties": {
290 | "RouteTableId": {
291 | "Ref": "PublicRouteTable"
292 | },
293 | "SubnetId": {
294 | "Ref": "PublicSubnet1"
295 | }
296 | }
297 | },
298 | "PublicSubnet2RouteTableAssociation": {
299 | "Type": "AWS::EC2::SubnetRouteTableAssociation",
300 | "Properties": {
301 | "RouteTableId": {
302 | "Ref": "PublicRouteTable"
303 | },
304 | "SubnetId": {
305 | "Ref": "PublicSubnet2"
306 | }
307 | }
308 | },
309 | "SGCFLabPublicEC2Access": {
310 | "Type": "AWS::EC2::SecurityGroup",
311 | "Properties": {
312 | "GroupDescription": "Security group for EC2 SSH and Proxy access",
313 | "SecurityGroupIngress": [
314 | {
315 | "IpProtocol": "tcp",
316 | "FromPort": "22",
317 | "ToPort": "22",
318 | "CidrIp": {
319 | "Ref": "ClientIP"
320 | }
321 | },
322 | {
323 | "IpProtocol": "tcp",
324 | "FromPort": "80",
325 | "ToPort": "80",
326 | "CidrIp": {
327 | "Ref": "ClientIP"
328 | }
329 | }
330 | ],
331 | "VpcId": {
332 | "Ref": "VPC"
333 | }
334 | }
335 | },
336 | "EC2InstanceRole": {
337 | "Type": "AWS::IAM::Role",
338 | "Properties": {
339 | "ManagedPolicyArns": [
340 | "arn:aws:iam::aws:policy/AmazonS3FullAccess"
341 | ],
342 | "AssumeRolePolicyDocument": {
343 | "Version": "2012-10-17",
344 | "Statement": [
345 | {
346 | "Sid": "",
347 | "Effect": "Allow",
348 | "Principal": {
349 | "Service": [
350 | "ec2.amazonaws.com"
351 | ]
352 | },
353 | "Action": [
354 | "sts:AssumeRole"
355 | ]
356 | }
357 | ]
358 | },
359 | "Path": "/"
360 | }
361 | },
362 | "IAMlogstashInstanceProfile": {
363 | "Type": "AWS::IAM::InstanceProfile",
364 | "Properties": {
365 | "Path": "/",
366 | "Roles": [
367 | {
368 | "Ref": "EC2InstanceRole"
369 | }
370 | ]
371 | }
372 | },
373 | "CFLogBucket": {
374 | "Type": "AWS::S3::Bucket"
375 | },
376 | "LogstashEC2Instance": {
377 | "Type": "AWS::EC2::Instance",
378 | "DependsOn": "CFLogBucket",
379 | "Properties": {
380 | "ImageId": {
381 | "Fn::FindInMap": [
382 | "RegionMap",
383 | {
384 | "Ref": "AWS::Region"
385 | },
386 | "AMI"
387 | ]
388 | },
389 | "InstanceType": {
390 | "Ref": "LogstashInstanceType"
391 | },
392 | "SecurityGroupIds": [
393 | {
394 | "Ref": "SGCFLabPublicEC2Access"
395 | }
396 | ],
397 | "KeyName": {
398 | "Ref": "EC2KeyPair"
399 | },
400 | "IamInstanceProfile": {
401 | "Ref": "IAMlogstashInstanceProfile"
402 | },
403 | "SubnetId": {
404 | "Ref": "PublicSubnet1"
405 | },
406 | "Tags": [
407 | {
408 | "Key": "Name",
409 | "Value": "Logstash EC2 server"
410 | }
411 | ],
412 | "UserData": {
413 | "Fn::Base64": {
414 | "Fn::Sub": "#!/bin/bash\necho \"${CFLogBucket}\" > /home/ec2-user/s3bucket.txt\naws s3 sync s3://eu-west-1.data-analytics/cflogworkshop/raw/cf-accesslogs s3://${CFLogBucket}\nmkdir /home/ec2-user/templates/\naws s3 sync s3://us-east-1.data-analytics/labcontent/reInvent2018content-ctd410/lab2/templates/ /home/ec2-user/templates/\nsudo mkdir /elk\ncd /elk\nsudo wget https://artifacts.elastic.co/downloads/logstash/logstash-6.4.2.tar.gz /elk/\nsudo gunzip logstash-6.4.2.tar.gz\nsudo tar -xvf logstash-6.4.2.tar\nsudo /elk/logstash-6.4.2/bin/logstash-plugin install logstash-output-amazon_es\nsudo sudo yum -y install java-1.8.*\ntmp_javapath=`alternatives --display java|grep priority|grep 1.8|awk '{print $1}'`\necho $tmp_javapath > /home/ec2-user/javapath.txt\nsudo alternatives --set java `cat /home/ec2-user/javapath.txt`\n"
415 | }
416 | }
417 | }
418 | },
419 | "LogstashElasticIP": {
420 | "Type": "AWS::EC2::EIP",
421 | "DependsOn": "LogstashEC2Instance",
422 | "Properties": {
423 | "InstanceId": {
424 | "Ref": "LogstashEC2Instance"
425 | },
426 | "Domain": "vpc"
427 | }
428 | },
429 | "NginxEC2Instance": {
430 | "Type": "AWS::EC2::Instance",
431 | "Properties": {
432 | "ImageId": {
433 | "Fn::FindInMap": [
434 | "RegionMap",
435 | {
436 | "Ref": "AWS::Region"
437 | },
438 | "AMI"
439 | ]
440 | },
441 | "InstanceType": {
442 | "Ref": "NginxInstanceType"
443 | },
444 | "SecurityGroupIds": [
445 | {
446 | "Ref": "SGCFLabPublicEC2Access"
447 | }
448 | ],
449 | "KeyName": {
450 | "Ref": "EC2KeyPair"
451 | },
452 | "SubnetId": {
453 | "Ref": "PublicSubnet2"
454 | },
455 | "Tags": [
456 | {
457 | "Key": "Name",
458 | "Value": "Nginx Proxy EC2 server"
459 | }
460 | ],
461 | "UserData": {
462 | "Fn::Base64": {
463 | "Fn::Sub": "#!/bin/bash\nmkdir /home/ec2-user/templates/\n cd /home/ec2-user/templates/\n wget https://s3-eu-west-1.amazonaws.com/eu-west-1.data-analytics/labcontent/reInvent2018-ctd410/lab2/templates/lab2-nginx.conf\nsudo yum -y install nginx\nsudo yum -y install httpd-tools\nsudo service nginx start\nsudo htpasswd -b -c /etc/nginx/.secrets_kibana admin ${KibanaPassword}\n"
464 | }
465 | }
466 | }
467 | },
468 | "NginxElasticIP": {
469 | "Type": "AWS::EC2::EIP",
470 | "DependsOn": "NginxEC2Instance",
471 | "Properties": {
472 | "InstanceId": {
473 | "Ref": "NginxEC2Instance"
474 | },
475 | "Domain": "vpc"
476 | }
477 | },
478 | "ElasticsearchDomain": {
479 | "Type": "AWS::Elasticsearch::Domain",
480 | "DependsOn": "LogstashElasticIP",
481 | "DependsOn": "NginxElasticIP",
482 | "Properties": {
483 | "ElasticsearchVersion": "6.3",
484 | "ElasticsearchClusterConfig": {
485 | "InstanceCount": "2",
486 | "ZoneAwarenessEnabled": "true",
487 | "InstanceType": {
488 | "Ref": "ESDomainDataInstanceType"
489 | }
490 | },
491 | "EBSOptions": {
492 | "EBSEnabled": true,
493 | "Iops": 0,
494 | "VolumeSize": 50,
495 | "VolumeType": "gp2"
496 | },
497 | "SnapshotOptions": {
498 | "AutomatedSnapshotStartHour": "0"
499 | },
500 | "Tags": [
501 | {
502 | "Key": "Name",
503 | "Value": {
504 | "Fn::Sub": "${AWS::StackName} ES Domain"
505 | }
506 | }
507 | ],
508 | "AccessPolicies": {
509 | "Version": "2012-10-17",
510 | "Statement": [
511 | {
512 | "Effect": "Allow",
513 | "Principal": {
514 | "AWS": "*"
515 | },
516 | "Action": "es:*",
517 | "Resource": {
518 | "Fn::Sub": "arn:aws:es:${AWS::Region}:${AWS::AccountId}:domain/*"
519 | },
520 | "Condition": {
521 | "IpAddress": {
522 | "aws:SourceIp": [
523 | {
524 | "Ref": "LogstashElasticIP"
525 | },
526 | {
527 | "Ref": "NginxElasticIP"
528 | }
529 | ]
530 | }
531 | }
532 | }
533 | ]
534 | },
535 | "AdvancedOptions": {
536 | "rest.action.multi.allow_explicit_index": "true"
537 | }
538 | }
539 | }
540 | },
541 | "Outputs": {
542 |
543 | "ESDomainEndpoint": {
544 | "Description": "ElasticSearch Domain Endpoint",
545 | "Value": {
546 | "Fn::Join": [
547 | "",
548 | ["https://",
549 | {
550 | "Fn::GetAtt": [
551 | "ElasticsearchDomain",
552 | "DomainEndpoint"
553 | ]
554 | }
555 | ]
556 | ]
557 | }
558 |
559 | },
560 |
561 | "ESKibanaEndpoint": {
562 | "Description": "ElasticSearch Kibana Endpoint",
563 | "Value": {
564 | "Fn::Join": [
565 | "",
566 | ["https://",
567 | {
568 | "Fn::GetAtt": [
569 | "ElasticsearchDomain",
570 | "DomainEndpoint"
571 | ]
572 | },
573 | "/_plugin/kibana/"
574 | ]
575 | ]
576 | }
577 |
578 |
579 | },
580 | "S3bucketName": {
581 | "Description": "S3 bucket to store CloudFront access logs",
582 | "Value": {
583 | "Ref": "CFLogBucket"
584 | }
585 | },
586 | "LogstashEC2Instance": {
587 | "Description": "IP for SSH access to Logstash server",
588 | "Value": {
589 | "Ref": "LogstashElasticIP"
590 | }
591 | },
592 | "NginxEC2Instance": {
593 | "Description": "IP for SSH access to Nginx proxy server",
594 | "Value": {
595 | "Ref": "NginxElasticIP"
596 | }
597 | },
598 | "ESKibanaProxyEndpoint": {
599 | "Description": "Kibana Proxy Endpoint",
600 | "Value": {
601 | "Fn::Join": [
602 | "",
603 | [
604 | "http://",
605 | {
606 | "Ref": "NginxElasticIP"
607 | }
608 | ]
609 | ]
610 | }
611 | }
612 | }
613 | }
614 |
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/config/cloudfront.conf:
--------------------------------------------------------------------------------
1 | #cloudfront.conf
2 | input {
3 | s3 {
4 | #Enter S3 bucket name that has the CloudFront access logs. You can copy it from CloudFormation stack output "CFLogBucket"
5 | bucket => ""
6 | #No change needed for "prefix"
7 | prefix => ""
8 | #Point "region" to your AWS Region.
9 | region => ""
10 | }
11 | }
12 |
13 |
14 | filter {
15 | grok {
16 | match => { "message" => "%{DATE_EU:date}\t%{TIME:time}\t%{GREEDYDATA:x_edge_location}\t(?:%{NUMBER:sc_bytes:int}|-)\t%{IPORHOST:c_ip}\t%{WORD:cs_method}\t%{HOSTNAME:cs_host}\t%{NOTSPACE:cs_uri_stem}\t%{NUMBER:sc_status:int}\t%{GREEDYDATA:referrer}\t%{GREEDYDATA:User_Agent}\t%{GREEDYDATA:cs-uri-query}\t%{GREEDYDATA:cookies}\t%{WORD:x_edge_result_type}\t%{NOTSPACE:x_edge_request_id}\t%{HOSTNAME:x_host_header}\t%{URIPROTO:cs_protocol}\t%{INT:cs_bytes:int}\t%{NUMBER:time_taken:float}\t%{GREEDYDATA:x_forwarded_for}\t%{GREEDYDATA:ssl_protocol}\t%{GREEDYDATA:ssl_cipher}\t%{GREEDYDATA:x_edge_response_result_type}\t%{GREEDYDATA:cs-protocol-version}\t%{GREEDYDATA:fle-status}\t%{GREEDYDATA:fle-encrypted-fields}" }
17 | }
18 |
19 | mutate {
20 | add_field => [ "listener_timestamp", "%{date} %{time}" ]
21 | }
22 |
23 | date {
24 | match => [ "listener_timestamp", "yy-MM-dd HH:mm:ss" ]
25 | target => "@timestamp"
26 | }
27 |
28 | geoip {
29 | source => "c_ip"
30 | }
31 |
32 | useragent {
33 | source => "User_Agent"
34 | target => "useragent"
35 | }
36 |
37 | mutate {
38 | remove_field => ["date", "time", "listener_timestamp", "cloudfront_version", "message", "cloudfront_fields", "User_Agent"]
39 | }
40 | }
41 |
42 | output {
43 | amazon_es {
44 | #Enter Elasticsearch domain name WITHOUT https://. You can copy the Elasticsearch domain from CloudFormation stach output "ESDomainEndpoint"
45 | hosts => [""]
46 | #Point "region" to AWS Region you have created the CloudFormation stack in
47 | region => ""
48 | index => "cloudfront-logs-%{+YYYY.MM.dd}"
49 | }
50 | }
51 |
52 |
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/config/indextemplate.json:
--------------------------------------------------------------------------------
1 | {
2 | "index_patterns": ["cloudfront*"],
3 | "settings": {
4 | "number_of_shards": 2 },
5 |
6 | "mappings" : {
7 | "doc" : {
8 | "dynamic_templates" : [ {
9 | "string_fields" : {
10 | "match" : "*",
11 | "match_mapping_type" : "string",
12 | "mapping" : {
13 | "type" : "text", "norms" : false,
14 | "fields" : {
15 | "keyword" : { "type": "keyword", "ignore_above": 256 }
16 | }
17 | }
18 | }
19 | } ],
20 | "properties" : {
21 | "@timestamp": { "type": "date"},
22 | "@version": { "type": "keyword"},
23 | "geoip" : {
24 | "dynamic": true,
25 | "properties" : {
26 | "ip": { "type": "ip" },
27 | "location" : { "type" : "geo_point" },
28 | "latitude" : { "type" : "half_float" },
29 | "longitude" : { "type" : "half_float" }
30 | }
31 | }
32 | }
33 | }
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/config/lab2-nginx.conf:
--------------------------------------------------------------------------------
1 | events {
2 | worker_connections 1024;
3 | }
4 |
5 | http {
6 |
7 |
8 | server {
9 | listen 80;
10 | # error logging
11 | error_log /var/log/nginx/elasticsearch_error.log;
12 |
13 | # authentication: kibana
14 | auth_basic "Kibana Auth";
15 | auth_basic_user_file /etc/nginx/.secrets_kibana;
16 |
17 |
18 | location / {
19 | # ES Domain name WITHOUT https://
20 | proxy_set_header Host ;
21 | #IP of Nginx EC2 Instance
22 | proxy_set_header X-Real-IP ;
23 | proxy_buffer_size 128k;
24 | proxy_buffers 4 256k;
25 | proxy_busy_buffers_size 256k;
26 | proxy_set_header Connection "Keep-Alive";
27 | proxy_set_header Proxy-Connection "Keep-Alive";
28 | proxy_http_version 1.1;
29 | proxy_set_header Authorization "";
30 | #Elasticsearch Kibana endpoint
31 | proxy_pass https:///_plugin/kibana/;
32 | #Elasticsearch Kibana endpoint and IP of Nginx EC2 Instance
33 | proxy_redirect https:///_plugin/kibana/ http://;
34 | }
35 |
36 | location ~ (/app/kibana|/app/timelion|/bundles|/es_admin|/plugins|/api|/ui|/elasticsearch) {
37 | #Elasticsearch Domain endpoint
38 | proxy_pass https://;
39 | proxy_set_header Host $host;
40 | proxy_set_header X-Real-IP $remote_addr;
41 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
42 | proxy_set_header X-Forwarded-Proto $scheme;
43 | proxy_set_header X-Forwarded-Host $http_host;
44 | proxy_set_header Authorization "";
45 | }
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/kibanageorequests.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "_id": "595e9c70-d305-11e8-aa33-3358ecf94586",
4 | "_type": "visualization",
5 | "_source": {
6 | "title": "Geo-requests-distribution",
7 | "visState": "{\"title\":\"Geo-requests-distribution\",\"type\":\"region_map\",\"params\":{\"legendPosition\":\"bottomright\",\"addTooltip\":true,\"colorSchema\":\"Yellow to Red\",\"selectedLayer\":{\"attribution\":\"Made with NaturalEarth|Elastic Maps Service
\",\"weight\":1,\"name\":\"World Countries\",\"url\":\"https://vector.maps.elastic.co/blob/5659313586569216?elastic_tile_service_tos=agree&my_app_version=6.3.1\",\"format\":{\"type\":\"geojson\"},\"fields\":[{\"name\":\"iso2\",\"description\":\"Two letter abbreviation\"},{\"name\":\"name\",\"description\":\"Country name\"},{\"name\":\"iso3\",\"description\":\"Three letter abbreviation\"}],\"created_at\":\"2017-04-26T17:12:15.978370\",\"tags\":[],\"id\":5659313586569216,\"layerId\":\"elastic_maps_service.World Countries\"},\"selectedJoinField\":{\"name\":\"name\",\"description\":\"Country name\"},\"isDisplayWarning\":true,\"wms\":{\"enabled\":true,\"options\":{\"format\":\"image/png\",\"transparent\":true},\"baseLayersAreLoaded\":{},\"tmsLayers\":[{\"minZoom\":0,\"maxZoom\":10,\"attribution\":\"\",\"url\":\"https://example.com/v1/default/{z}/{x}/{y}.png\",\"id\":\"TMS in config/kibana.yml\"},{\"id\":\"road_map\",\"url\":\"https://tiles.maps.elastic.co/v2/default/{z}/{x}/{y}.png?elastic_tile_service_tos=agree&my_app_name=kibana&my_app_version=6.3.1\",\"minZoom\":0,\"maxZoom\":10,\"attribution\":\"© OpenStreetMap contributors | Elastic Maps Service
\",\"subdomains\":[]}],\"selectedTmsLayer\":{\"minZoom\":0,\"maxZoom\":10,\"attribution\":\"\",\"url\":\"https://example.com/v1/default/{z}/{x}/{y}.png\",\"id\":\"TMS in config/kibana.yml\"}},\"mapZoom\":2,\"mapCenter\":[0,0],\"outlineWeight\":1,\"showAllShapes\":true},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{\"customLabel\":\"requests-distribution\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"geoip.country_name.keyword\",\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\",\"customLabel\":\"Geo-requests-Distribution\"}}]}",
8 | "uiStateJSON": "{\"mapZoom\":2,\"mapCenter\":[6.926426847059551,15.292968750000002]}",
9 | "description": "",
10 | "version": 1,
11 | "kibanaSavedObjectMeta": {
12 | "searchSourceJSON": "{\"index\":\"3b754c00-d2fa-11e8-9179-51a584345c01\",\"filter\":[],\"query\":{\"query\":\"\",\"language\":\"kuery\"}}"
13 | }
14 | }
15 | }
16 | ]
--------------------------------------------------------------------------------
/lab2-elk-cloudfront-log-analysis/kibanamaxlatencypercity.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "_id": "63df0410-d304-11e8-aa33-3358ecf94586",
4 | "_type": "visualization",
5 | "_source": {
6 | "title": "Max-Latency-percity",
7 | "visState": "{\"title\":\"Max-Latency-percity\",\"type\":\"tile_map\",\"params\":{\"mapType\":\"Scaled Circle Markers\",\"isDesaturated\":true,\"addTooltip\":true,\"heatClusterSize\":1.5,\"legendPosition\":\"bottomright\",\"mapZoom\":2,\"mapCenter\":[0,0],\"wms\":{\"enabled\":true,\"options\":{\"format\":\"image/png\",\"transparent\":true,\"layers\":\"http://ows-tile.terrestris.de/osm-basemap/service?\"},\"baseLayersAreLoaded\":{},\"tmsLayers\":[{\"minZoom\":0,\"maxZoom\":10,\"attribution\":\"\",\"url\":\"https://example.com/v1/default/{z}/{x}/{y}.png\",\"id\":\"TMS in config/kibana.yml\"},{\"id\":\"road_map\",\"url\":\"https://tiles.maps.elastic.co/v2/default/{z}/{x}/{y}.png?elastic_tile_service_tos=agree&my_app_name=kibana&my_app_version=6.3.1\",\"minZoom\":0,\"maxZoom\":10,\"attribution\":\"© OpenStreetMap contributors | Elastic Maps Service
\",\"subdomains\":[]}],\"selectedTmsLayer\":{\"minZoom\":0,\"maxZoom\":10,\"attribution\":\"\",\"url\":\"https://example.com/v1/default/{z}/{x}/{y}.png\",\"id\":\"TMS in config/kibana.yml\"},\"url\":\"https://maps.wikimedia.org/osm-intl/{z}/{x}/{y}.png\"}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"max\",\"schema\":\"metric\",\"params\":{\"field\":\"time_taken\",\"customLabel\":\"Maximum-Last-Byte-Latency\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"geohash_grid\",\"schema\":\"segment\",\"params\":{\"field\":\"geoip.location\",\"autoPrecision\":true,\"isFilteredByCollar\":true,\"useGeocentroid\":true,\"precision\":2,\"customLabel\":\"City\"}}]}",
8 | "uiStateJSON": "{}",
9 | "description": "",
10 | "version": 1,
11 | "kibanaSavedObjectMeta": {
12 | "searchSourceJSON": "{\"index\":\"3b754c00-d2fa-11e8-9179-51a584345c01\",\"filter\":[],\"query\":{\"query\":\"\",\"language\":\"kuery\"}}"
13 | }
14 | }
15 | }
16 | ]
--------------------------------------------------------------------------------