├── LICENSE
├── README.md
└── monitor
    ├── Dockerfile
    ├── aws_usage_report_to_slack.py
    ├── azure_usage_report_to_slack.py
    ├── ec2_management
        ├── README.md
        ├── aws_delete_snapshot.py
        ├── aws_ec2_report_to_slack.py
        └── aws_report_snapshot.py
    ├── ecr_management
        ├── aws_ecr_lambda_report_to_slack.py
        ├── get_ecr_object.py
        ├── get_lambda_object.py
        ├── readme.md
        └── slack_utils.py
    ├── gcp_usage_report_to_slack.py
    ├── s3_management
        ├── README.md
        ├── aws_auto_s3_archiving.py
        ├── aws_s3_archiving_report.py
        ├── aws_s3_standard_report.py
        ├── usage_created_archiving_instance.py
        └── usage_terminated_archiving_instance.py
    └── spot_management
        ├── README.md
        └── aws_daily_instance_usage_report.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Distributed Data Processing Systems Lab.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # cloud-usage
2 | 
3 | A repository to store source codes to monitor cloud cost and track issues related to cloud usages.
4 | 


--------------------------------------------------------------------------------
/monitor/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM amazonlinux:latest
 2 | 
 3 | RUN yum install -y python3 python3-pip gcc
 4 | 
 5 | RUN pip3 install virtualenv
 6 | 
 7 | RUN mkdir /build
 8 | 
 9 | WORKDIR /build
10 | 
11 | RUN python3 -m venv env
12 | 
13 | ENV VIRTUAL_ENV=/build/env
14 | ENV PATH="$VIRTUAL_ENV/bin:$PATH"
15 | 
16 | RUN pip3 install google-cloud-bigquery google-auth google-auth-oauthlib
17 | 


--------------------------------------------------------------------------------
/monitor/aws_usage_report_to_slack.py:
--------------------------------------------------------------------------------
 1 | import boto3
 2 | import urllib.request, urllib.parse, json
 3 | from datetime import datetime, timedelta
 4 | import operator
 5 | 
 6 | query_start_day = 2
 7 | query_end_day = query_start_day - 1
 8 | account_name = "ddps@ddps.cloud"  # should be updated
 9 | slack_hook_url = ""  # should be updated
10 | 
11 | def get_usage():
12 |     ce_client = boto3.client("ce")
13 |     period = {
14 |         "Start" : datetime.strftime(datetime.now() - timedelta(query_start_day), '%Y-%m-%d'),
15 |         "End" : datetime.strftime(datetime.now() - timedelta(query_end_day), '%Y-%m-%d')
16 |     }
17 |     response = ce_client.get_cost_and_usage(TimePeriod=period, Granularity="DAILY",
18 |                                             Metrics=["UnblendedCost"], GroupBy=[
19 |                                                     {
20 |                                                         'Type': 'DIMENSION',
21 |                                                         'Key': 'SERVICE'
22 |                                                     },
23 |                                                     {
24 |                                                         'Type': 'DIMENSION',
25 |                                                         'Key': 'USAGE_TYPE'
26 |                                                     }
27 |                                                 ])
28 | 
29 |     result = {}
30 |     for r in response['ResultsByTime'][0]['Groups']:
31 |         result.setdefault(r['Keys'][0], [])
32 |         result[r['Keys'][0]].append({r['Keys'][1]: r['Metrics']['UnblendedCost']['Amount']})
33 |     return result
34 | 
35 | def generate_slack_message(result):
36 |     # 총 가격 계산
37 |     total_price = sum(float(detail[list(detail.keys())[0]]) for service in result.values() for detail in service)
38 | 
39 |     # 서비스별 가격 계산 및 정렬
40 |     service_prices = {service: sum(float(detail[list(detail.keys())[0]]) for detail in details)
41 |                       for service, details in result.items()}
42 |     sorted_services = sorted(service_prices.items(), key=lambda x: x[1], reverse=True)
43 | 
44 |     # 서비스별 세부 사항 정렬
45 |     sorted_details = {service: sorted(details, key=lambda x: float(list(x.values())[0]), reverse=True)
46 |                       for service, details in result.items()}
47 | 
48 |     # 결과 출력을 위한 문자열 생성
49 |     output_str = "Acount: " + account_name + "\nDaily Total : $" + str(total_price) + "\n"
50 | 
51 |     for service, price in sorted_services:
52 |         if price == 0:
53 |             continue
54 |         output_str += f"{service}: ${price}\n"
55 |         for detail in sorted_details[service]:
56 |             detail_name = list(detail.keys())[0]
57 |             detail_price = float(list(detail.values())[0])
58 |             if detail_price == 0:
59 |                 continue
60 |             output_str += f"        {detail_name}: ${detail_price}\n"
61 |         output_str += "\n\n"
62 | 
63 |     return output_str
64 | 
65 | def generate_curl_message(message):
66 |     payload = {"text": message}
67 |     return json.dumps(payload).encode("utf-8")
68 | 
69 | def post_message(url, data):
70 |     req = urllib.request.Request(url)
71 |     req.add_header("Content-Type", "application/json")
72 |     return urllib.request.urlopen(req, data)
73 | 
74 | def lambda_handler(event, context):
75 |     url = slack_hook_url
76 |     report = get_usage()
77 |     message = generate_slack_message(report)
78 |     data = generate_curl_message(message)
79 |     response = post_message(url, data)
80 |     return response.status
81 |     
82 | 


--------------------------------------------------------------------------------
/monitor/azure_usage_report_to_slack.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import adal
  3 | import requests
  4 | import boto3
  5 | import os
  6 | from base64 import b64decode
  7 | from datetime import datetime, timezone, timedelta
  8 | 
  9 | encrypted_clinentId = os.environ['clientId']
 10 | encrypted_tenantId = os.environ['tenantId']
 11 | encrypted_clientSecret = os.environ['clientSecret']
 12 | encrypted_subscriptionId = os.environ['subscriptionId']
 13 | 
 14 | decrypt_clientId = boto3.client('kms').decrypt(
 15 |     CiphertextBlob=b64decode(encrypted_clinentId),
 16 |     EncryptionContext={'LambdaFunctionName': os.environ['AWS_LAMBDA_FUNCTION_NAME']}
 17 | )['Plaintext'].decode('utf-8')
 18 | 
 19 | decrypt_tenantId = boto3.client('kms').decrypt(
 20 |     CiphertextBlob=b64decode(encrypted_tenantId),
 21 |     EncryptionContext={'LambdaFunctionName': os.environ['AWS_LAMBDA_FUNCTION_NAME']}
 22 | )['Plaintext'].decode('utf-8')
 23 | 
 24 | decrypt_clientSecret = boto3.client('kms').decrypt(
 25 |     CiphertextBlob=b64decode(encrypted_clientSecret),
 26 |     EncryptionContext={'LambdaFunctionName': os.environ['AWS_LAMBDA_FUNCTION_NAME']}
 27 | )['Plaintext'].decode('utf-8')
 28 | 
 29 | decrypt_subscriptionId = boto3.client('kms').decrypt(
 30 |     CiphertextBlob=b64decode(encrypted_subscriptionId),
 31 |     EncryptionContext={'LambdaFunctionName': os.environ['AWS_LAMBDA_FUNCTION_NAME']}
 32 | )['Plaintext'].decode('utf-8')
 33 | 
 34 | 
 35 | class AzureUsage:
 36 | 
 37 |     def __init__(self):
 38 |         scope = "subscriptions/" + decrypt_subscriptionId
 39 |         self.costmanagementUrl = "https://management.azure.com/" + scope + "/providers/Microsoft.CostManagement/query?api-version=2019-11-01"
 40 |         authority_uri = os.environ['activeDirectoryEndpointUrl'] + "/" + decrypt_tenantId
 41 |         context = adal.AuthenticationContext(authority_uri)
 42 |         token = context.acquire_token_with_client_credentials(
 43 |             os.environ["resourceManagerEndpointUrl"],
 44 |             decrypt_clientId,
 45 |             decrypt_clientSecret)
 46 |         bearer = "bearer " + token.get("accessToken")
 47 |         self.headers = {"Authorization": bearer, "Content-Type": "application/json"}
 48 |         self.usagedata = []
 49 | 
 50 |     def run(self, date, grain="Monthly"):
 51 | 
 52 |         payload = {
 53 |             "type": "ActualCost",
 54 |             "dataSet": {
 55 |                 "granularity": grain,
 56 |                 "aggregation": {
 57 |                     "totalCost": {
 58 |                         "name": "PreTaxCost",
 59 |                         "function": "Sum"
 60 |                     },
 61 |                     "totalCostUSD": {
 62 |                         "name": "PreTaxCostUSD",
 63 |                         "function": "Sum"
 64 |                     }
 65 |                 }
 66 |             },
 67 |             "timeframe": "Custom",
 68 |             "timePeriod": {
 69 |                 "from": date,
 70 |                 "to": date
 71 |             }
 72 |         }
 73 | 
 74 |         payload['dataSet']['grouping'] = [{
 75 |             "type": "Dimension",
 76 |             "name": "ResourceGroupName"
 77 |         },
 78 |             {
 79 |                 "type": "Dimension",
 80 |                 "name": "ServiceName"
 81 |             },
 82 |             {
 83 |                 "type": "Dimension",
 84 |                 "name": "Meter"
 85 |             }
 86 |         ]
 87 | 
 88 |         payloadjson = json.dumps(payload)
 89 |         self.usagedata = []
 90 |         response = requests.post(self.costmanagementUrl, data=payloadjson, headers=self.headers)
 91 |         if response.status_code == 200:
 92 |             self.transform(payloadjson, response.text)
 93 |         else:
 94 |             print("error")
 95 |             print("error " + response.text)
 96 | 
 97 |         return self.usagedata
 98 | 
 99 |     def transform(self, payloadjson, response):
100 |         result = json.loads(response)
101 |         for record in result["properties"]["rows"]:
102 |             usageRecord = {}
103 |             for index, val in enumerate(record):
104 |                 columnName = result["properties"]["columns"][index]
105 |                 if columnName["type"] == "number":
106 |                     usageRecord[columnName["name"]] = float(val)
107 |                 else:
108 |                     usageRecord[columnName["name"]] = val
109 | 
110 |             self.usagedata.append(usageRecord)
111 | 
112 |         nextLink = result["properties"]["nextLink"]
113 |         if nextLink != None:
114 |             nextLinkResponse = requests.post(nextLink, data=payloadjson, headers=self.headers)
115 |             if nextLinkResponse.status_code == 200:
116 |                 self.transform(payloadjson, nextLinkResponse.text)
117 |             else:
118 |                 print("error in fetching next page " + nextLink)
119 |                 print("error " + nextLinkResponse.text)
120 | 
121 | 
122 | def run_example():
123 |     str_datetime = (datetime.now(timezone.utc).date() + timedelta(days=-1)).strftime('%Y/%m/%d')
124 |     azure_usage = AzureUsage()
125 |     usageResult = azure_usage.run(f"{str_datetime}", "daily")
126 |     post_message(usageResult)
127 | 
128 | 
129 | def post_message(usageResult):
130 |     header = "Azure usage\nAcount: KMU@ddpslab.onmicrosoft.com\n"
131 |     body = ""
132 |     total = 0.0
133 |     usageResult = sorted(usageResult, key=lambda d: d['PreTaxCostUSD'], reverse=True)
134 |     for i in usageResult:
135 |         body += f"{i['ResourceGroupName']} : {i['ServiceName']} : {i['Meter']} = {i['PreTaxCostUSD']}\n"
136 |         total += float(i['PreTaxCostUSD'])
137 |     total_header = f"Daily Total = {total}\n"
138 |     send_to_slack(header + total_header + body)
139 | 
140 | 
141 | def send_to_slack(msg):
142 |     url = os.environ['web_hook']
143 |     data = {'text': msg}
144 |     resp = requests.post(url=url, json=data)
145 | 
146 | 
147 | def lambda_handler(event, context):
148 |     run_example()
149 | 
150 | 


--------------------------------------------------------------------------------
/monitor/ec2_management/README.md:
--------------------------------------------------------------------------------
 1 | # ec2 and ebs management mennual
 2 | ### 이 문서는 aws의 instance 및 ebs 자원을 관리하기 위한 설명서입니다.
 3 | 
 4 | 
 5 | ## 1. 파일 소개
 6 | ### aws_ec2_report_to_slack.py
 7 | - 서울 리전의 람다 함수인 `usage_ec2_report` 의 파일입니다.
 8 | - 서울 리전의 이벤트 브릿지인 `usage-EC2-morning` 과 `usage-EC2-night` 가 트리거로 존재합니다.
 9 | - 매일 오전 8시 32분과 오후 10시에 실행 중이거나 정지해 둔 인스턴스의 목록을 슬랙으로 전송합니다.
10 | - 또한 인스턴스와 밀접한 관계가 있는 볼륨 중 인스턴스와 연결되어 있지 않은 목록을 추려내어 슬랙으로 전송합니다.
11 | - 하루에 2번씩 실행되는 함수이기에 에러가 발생할 시 빠른 디버깅을 위해 `slack_msg_sender` 의 `send_slack_message` 를 이용하여 에러메세지를 전송합니다.
12 | 
13 | 
14 | ### aws_snapshot_cleaning.py
15 | - 서울 리전의 람다 함수인 `usage_snapshot_cleaning` 의 파일입니다.
16 | - 이 파일은 수동으로 실행됩니다. (자세한 작동 방법은 이 설명서의 `2. 파일 실행` 을 확인하시길 바랍니다.)
17 | - 계정에 존재하는 특이점 없는 스냅샷을 모두 삭제합니다.
18 | - 볼륨과 연결되어 있는 스냅샷, AMI 구성을 위한 스냅샷, 백업을 위해 Name Tag를 붙여둔 스냅샷을 제외하고 모두 삭제되는 것에 유의하십시오.
19 | - 정리한 내용을 슬랙으로 전송한 후 종료합니다.
20 | 
21 | 
22 | ## 2. 파일 실행
23 | ### a. usage_ec2_report
24 | 1. 정상적인 실행
25 |     - 제대로 실행될 경우 슬랙으로 현재 실행 중인 인스턴스와 정지해둔 인스턴스의 목록을 확인할 수 있습니다.
26 | 2. 비정상적인 실행
27 |     - 실행 중 오류가 생길 경우 슬랙으로 에러 메세지를 전송합니다.
28 |     - 이때 처음 에러가 발생한 곳부터 여러 에러 메세지가 전송되었을 수 있습니다.
29 |     - 제일 처음 전송된 에러 메세지부터 확인하며 디버깅합니다.
30 | 
31 | 
32 | ### b. usage_snapshot_cleaning
33 | 1. 함수 실행 전
34 |     - 함수를 실행하기 전 삭제하길 원치 않는 스냅샷에 모두 Name Tag를 설정하십시오.
35 |     - 이 함수를 실행하면, 특이점 없는 일반 스냅샷이 모든 리전에서 사라진다는 것을 명심하십시오.
36 |     - 함수 실행 시 삭제된 스냅샷을 복원하는 것이 어렵습니다.
37 | 
38 | 2. 함수 실행 방법
39 |     - aws 계정 내에 서울 리전의 lambda 목록을 확인합니다.
40 |     - `usage_snapshot_cleaning` 을 찾아 클릭합니다.
41 |     - Code source 창에서 `test` 버튼을 눌러 함수를 실행합니다.
42 | 
43 | 3. 함수 실행 후
44 |     - 환경 변수로 설정해둔 슬랙으로 메세지가 전송되어야 합니다.
45 |     - 실행한 창에서 성공적으로 끝마치었다는 메세지를 확인합니다.
46 |     - 메세지를 확인하지 못하였다면 작업에 이상이 생겼을 수 있으니 실행 로그를 확인하시길 바랍니다.
47 | 
48 | 
49 | 
50 | # 주의 사항
51 | 1. 설정해둔 aws 내의 함수에 변경 사항이 생기면 실행이 원활하지 않을 수 있습니다.
52 | 2. 스냅샷 정리 파일은 수동으로 실행하는 파일입니다. 언제든 이벤트 브릿지를 설정하여 자동화할 수 있습니다.


--------------------------------------------------------------------------------
/monitor/ec2_management/aws_delete_snapshot.py:
--------------------------------------------------------------------------------
  1 | import boto3
  2 | import urllib.request, json, os
  3 | from datetime import datetime, timedelta, timezone
  4 | from slack_msg_sender import send_slack_message
  5 | 
  6 | SLACK_URL = os.environ['SLACK_URL']
  7 |     
  8 | sts_client = boto3.client('sts')
  9 | response = sts_client.get_caller_identity()
 10 | ACCOUNT_ID = response['Account']
 11 | 
 12 | 
 13 | # remove_ami : available 상태가 아닌 모든 AMI 삭제
 14 | def remove_ami(ebs):
 15 |     ami_count = 0
 16 |     ami_dic = {"available":[], "disabled":[]}
 17 |     all_ami = ebs.describe_images(
 18 |         Owners=[ACCOUNT_ID], 
 19 |         Filters=[{'Name': 'is-public','Values': ['false']}],
 20 |         IncludeDeprecated=True,
 21 |         IncludeDisabled=True,
 22 |     )
 23 |     for ami in all_ami.get("Images"):
 24 |         storage = ami.get("BlockDeviceMappings")[0].get("Ebs")
 25 |         if ami.get("State") != "available":
 26 |             try:
 27 |                 ebs.deregister_image(ImageId=ami.get("ImageId"))
 28 |                 ami_dic["disabled"].append(storage.get("SnapshotId"))
 29 |                 ami_count += 1
 30 |             except Exception as e:
 31 |                 send_slack_message(f"AMI 삭제 실패 :\n{e}")
 32 |         else:
 33 |             ami_dic["available"].append(storage.get("SnapshotId")) 
 34 | 
 35 |     return ami_count, ami_dic
 36 | 
 37 | 
 38 | # remove_snapshot : tag 를 부여하지 않은 모든 스냅샷 삭제
 39 | def remove_snapshot(ebs, ami_dic):
 40 |     snapshot_count = 0
 41 |     snapshots = ebs.describe_snapshots(OwnerIds=[ACCOUNT_ID])
 42 |     # snapshot list in region
 43 |     for snapshot in snapshots["Snapshots"]:
 44 |         try:
 45 |             if snapshot["SnapshotId"] not in ami_dic["available"] or snapshot["SnapshotId"] in ami_dic["disabled"]:
 46 |                 ebs.delete_snapshot(SnapshotId=snapshot["SnapshotId"])
 47 |                 snapshot_count += 1
 48 |             if snapshot.get("Tags"):
 49 |                 continue
 50 |         except Exception as e:
 51 |                 send_slack_message(f"스냅샷 삭제 실패 :\n{e}")
 52 |     
 53 |     return snapshot_count
 54 | 
 55 | 
 56 | # created message : 슬랙에 알릴 내용을 메세지로 생성
 57 | def created_message(result):
 58 |     message = ""
 59 |     for region in result:
 60 |         item = result[region]
 61 |         if item.get("ami_count") > 0 or item.get("snapshot_count") > 0:
 62 |             message += f"{region}\n"
 63 |         if item.get("ami_count") > 0:
 64 |             message += f"> AMI {item.get("ami_count")}개 삭제 \n"
 65 |         if item.get("snapshot_count") > 0:
 66 |             message += f"> 스냅샷 {item.get("snapshot_count")}개 삭제 \n"
 67 |     
 68 |     if message == "":
 69 |         message = "> There is no snapshot list to delete."
 70 |     return message
 71 | 
 72 | 
 73 | # slack message : 생성한 메세지를 슬랙으로 전달
 74 | def slack_message(message):
 75 |     payload = {"text": message}
 76 |     data = json.dumps(payload).encode("utf-8")
 77 | 
 78 |     req = urllib.request.Request(SLACK_URL)
 79 |     req.add_header("Content-Type", "application/json")
 80 |     return urllib.request.urlopen(req, data)
 81 | 
 82 | 
 83 | # lambda handler : 람다 실행
 84 | def lambda_handler(event, context):
 85 |     # Record code execution time.
 86 |     utc_time = datetime.now(timezone.utc)
 87 |     korea_time = (utc_time + timedelta(hours=9)).strftime("%Y-%m-%d %H:%M")
 88 | 
 89 |     # Get a region list from ec2.
 90 |     client = boto3.client('ec2')
 91 |     regions = [ region['RegionName'] for region in client.describe_regions()['Regions'] ]
 92 | 
 93 |     # Set a list value for recording snapshot list by region.
 94 |     result = {}
 95 | 
 96 |     # Get snapshot lists by get_snapshot_list function.
 97 |     for region in regions:
 98 |         ebs = boto3.client('ec2', region_name=region)
 99 |         ami_count, ami_list = remove_ami(ebs)
100 |         snapshot_count = remove_snapshot(ebs, ami_list)
101 |         if ami_count > 0 or snapshot_count > 0:
102 |             result[region] = {"ami_count":ami_count, "snapshot_count":snapshot_count}
103 |         
104 |     # Write a message to send to slack
105 |     head_message = f"*snapshot management* ({korea_time})\n"
106 |     message = created_message(result)
107 |     
108 |     # Send a message to slack
109 |     slack_message(head_message)
110 |     slack_message(message)
111 |     
112 |     return "The snapshot is deleted succesfully. Check the Slack message."


--------------------------------------------------------------------------------
/monitor/ec2_management/aws_ec2_report_to_slack.py:
--------------------------------------------------------------------------------
  1 | import boto3, re, os
  2 | import urllib.request, urllib.parse, json
  3 | from datetime import datetime, timezone, timedelta
  4 | from slack_msg_sender import send_slack_message
  5 | 
  6 | 
  7 | SLACK_URL = os.environ['SLACK_DDPS']
  8 | 
  9 | # get instance items : 모든 리전의 인스턴스 탐색 후 리스트 반환
 10 | def get_instance_items(regions):
 11 |     try:
 12 |         running_instances = []
 13 |         stopped_instances = []
 14 | 
 15 |         # 리전에 존재하는 모든 인스턴스 탐색
 16 |         for ec2_region in regions:
 17 |             ec2_list = boto3.client('ec2', region_name=ec2_region)
 18 |             instances_data = ec2_list.describe_instances(Filters=[{'Name': 'instance-state-name', 'Values': ['running', 'stopped']}]).get('Reservations')
 19 |             
 20 |             if not instances_data:
 21 |                 continue
 22 |             
 23 |             current_time = datetime.now(timezone.utc)
 24 | 
 25 |             # 한 리전의 인스턴스 정보 추출
 26 |             for instances in instances_data:
 27 |                 if instances.get('Instances') is None:
 28 |                     continue
 29 | 
 30 |                 for instance in instances.get('Instances'):
 31 |                     # 인스턴스 탐색
 32 |                     key_name = instance.get('KeyName')
 33 | 
 34 |                     instance_info = instance.get('InstanceId')
 35 |                     if instance.get('Tags') is not None:
 36 |                         for tag in instance.get('Tags'):
 37 |                                 if tag.get('Key') == 'Name':
 38 |                                     instance_info = tag.get('Value')
 39 |                                     break
 40 |                         
 41 |                     instance_type = instance.get('InstanceType')
 42 |                     instance_state = instance['State']['Name']
 43 |                     if instance_state == 'running':
 44 |                         launch_time = instance['LaunchTime'].replace(tzinfo=timezone.utc)
 45 |                         instance_time = current_time - launch_time
 46 |                     else:
 47 |                         stopped_time = re.findall(r'.*\((.*)\)', instance['StateTransitionReason'])[0][:-4]
 48 |                         instance_time = current_time - datetime.strptime(stopped_time, '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
 49 |                     days = instance_time.days
 50 |                     hours = instance_time.seconds // 3600
 51 |                     minutes = (instance_time.seconds % 3600) // 60
 52 | 
 53 |                     # 인스턴스의 볼륨ID 확인
 54 |                     for mapping in instance['BlockDeviceMappings']:
 55 |                         volume_id = mapping['Ebs']['VolumeId']
 56 | 
 57 |                     # 인스턴스 저장
 58 |                     instance_dsc = {'region':ec2_region, 'key_name':key_name, 'info':instance_info, 'type':instance_type, 'volume':volume_id, 'time_days':days, 'time_hours':hours, 'time_minutes':minutes}
 59 |                     if instance_state == 'running':
 60 |                         running_instances.append(instance_dsc)
 61 |                     else:
 62 |                         stopped_instances.append(instance_dsc)
 63 | 
 64 |         # 인스턴스 항목 내림차순 정렬
 65 |         sorted_running_instances = sorted(running_instances, key=lambda x: (x['time_days'], x['time_hours'], x['time_minutes']), reverse=True)
 66 |         sorted_stopped_instances = sorted(stopped_instances, key=lambda x: (x['time_days'], x['time_hours'], x['time_minutes']), reverse=True)
 67 | 
 68 |         return sorted_running_instances, sorted_stopped_instances
 69 |     except Exception as e:
 70 |         send_slack_message(f"인스턴스 조회 실패\n{e}")
 71 | 
 72 | 
 73 | # get volume items : 모든 리전의 볼륨 탐색 후 리스트 반환
 74 | def get_volume_items(regions):
 75 |     try:
 76 |         orphaned_volumes = []
 77 | 
 78 |         # 리전 별로 volume 탐색
 79 |         for volume_region in regions:
 80 |             volume_list = boto3.client('ec2', region_name=volume_region)
 81 |             volumes = volume_list.describe_volumes(Filters=[{'Name': 'status', 'Values': ['available']}])
 82 |             
 83 |             current_time = datetime.now(timezone.utc)
 84 |             # 하나의 EBS 볼륨 확인
 85 |             if volumes.get('Volumes') is not None:
 86 |                 for volume in volumes.get('Volumes'):
 87 |                     volume_id = volume.get('VolumeId')
 88 |                     size_gb = volume.get('Size')
 89 |                     volume_type = volume.get('VolumeType')
 90 |                     snapshot_id = volume.get('SnapshotId')
 91 |                     created_time = current_time - volume.get('CreateTime').replace(tzinfo=timezone.utc)
 92 |                     
 93 |                     # check the callisto volume
 94 |                     callisto_volume = False
 95 |                     if volume.get('Tags') is not None:
 96 |                         for key in volume['Tags']:
 97 |                             if "kubernetes.io" in key.get('Key'):
 98 |                                 callisto_volume = True
 99 |                                 break
100 |                         if callisto_volume and created_time.days <= 14:
101 |                             continue
102 |                     orphaned_volumes.append({'region': volume_region, 'id':volume_id, 'type':volume_type, 'size':size_gb, 'snapshot':snapshot_id, 'time':created_time.days, 'callisto':callisto_volume})
103 | 
104 |         sorted_orphaned_volumes = sorted(orphaned_volumes, key=lambda x: (x['time']), reverse=True)
105 |         return sorted_orphaned_volumes
106 |     except Exception as e:
107 |         send_slack_message(f"볼륨 조회 실패\n{e}")
108 | 
109 | 
110 | # created message : 인스턴스 및 볼륨 리스트를 메세지로 생성
111 | def created_message(head_message, running_list, stopped_list, volume_list):
112 |     try:
113 |         message = head_message
114 | 
115 |         if len(running_list) > 0:
116 |             message += (f"\n[Running EC2 Instances] ({len(running_list)})\n")
117 |             for running_instance in running_list:
118 |                 meg = (f"{running_instance['region']} / {running_instance['info']}({running_instance['type']}) / {running_instance['key_name']} / {running_instance['volume']} ~ {running_instance['time_days']}일 {running_instance['time_hours']}시간 {running_instance['time_minutes']}분간")
119 |                 if running_instance['time_days'] == 0 or running_instance['time_days'] > 3:
120 |                     message += (meg+" 실행 중 :large_green_circle:\n")
121 |                 else:
122 |                     message += (meg+" 실행 중 :red_circle:\n")
123 |         
124 |         if len(stopped_list) > 0:
125 |             message += (f"\n[Stopped EC2 Instances] ({len(stopped_list)})\n")
126 |             for stopped_instance in stopped_list:
127 |                 meg = (f"{stopped_instance['region']} / {stopped_instance['info']}({stopped_instance['type']}) / {stopped_instance['key_name']} / {stopped_instance['volume']} ~ {stopped_instance['time_days']}일 {stopped_instance['time_hours']}시간 {stopped_instance['time_minutes']}분간")
128 |                 if stopped_instance['time_days'] < 7:
129 |                     message += (meg+" 정지 중 :white_circle:\n")
130 |                 elif stopped_instance['time_days'] < 13:
131 |                     message += (meg+" 정지 중 :large_yellow_circle:\n")
132 |                 else:
133 |                     message += (meg+" 정지 중 :large_brown_circle:\n")
134 | 
135 |         if len(volume_list) > 0:
136 |             message += (f"\n[Orphaned Volumes] ({len(volume_list)})\n")
137 |             for volume in volume_list:
138 |                 message += (f"{volume['region']} / {volume['id']} / {volume['type']} / {volume['size']} / {volume['snapshot']} ~ {volume['time']}일 동안 존재 ")
139 |                 if volume['callisto']:
140 |                     message += (":comet:\n")
141 |                 else:
142 |                     message += (":warning:\n")
143 | 
144 |         if len(message) == len(head_message):
145 |             message += "No instances have been running or stopped."
146 | 
147 |         return message
148 |     except Exception as e:
149 |         send_slack_message(f"메세지 생성 실패\n{e}")
150 | 
151 | 
152 | # slack message : 생성한 메세지를 슬랙으로 전달
153 | def slack_message(message):
154 |     payload = {"text": message}
155 |     data = json.dumps(payload).encode("utf-8")
156 | 
157 |     req = urllib.request.Request(SLACK_URL)
158 |     req.add_header("Content-Type", "application/json")
159 |     return urllib.request.urlopen(req, data)
160 | 
161 | 
162 | # lambda handler : 람다 실행
163 | def lambda_handler(event, context):
164 |     utc_time = datetime.now(timezone.utc)
165 |     korea_time = (utc_time + timedelta(hours=9)).strftime("%Y-%m-%d %H:%M")
166 | 
167 |     head_message = f"Account: {os.environ['EMAIL']}\n"
168 |     head_message += (korea_time+"\n")
169 | 
170 |     running_instances, stopped_instances, orphaned_volumes = 0, 0, 0
171 |     
172 |     try:
173 |         # regions 검색용 Boto3 EC2 클라이언트 생성
174 |         ec2_client = boto3.client('ec2')
175 |         regions = [ region['RegionName'] for region in ec2_client.describe_regions()['Regions'] ]
176 | 
177 |         running_instances, stopped_instances = get_instance_items(regions)
178 |         orphaned_volumes = get_volume_items(regions)
179 |     except Exception as e:
180 |         send_slack_message(f"인스턴스 관리가 정상적으로 이루어지지 않았습니다.\n{e}")
181 |     try:
182 |         message = created_message(head_message, running_instances, stopped_instances, orphaned_volumes)
183 |         response = slack_message(message)
184 |         return "The Instance List was successfully sent in a Slack. Check the Slack message."
185 |     except Exception as e:
186 |         send_slack_message(f"메세지 생성이 원활하게 이루어지지 않았습니다.\n{e}")
187 |         return "This instance management was failed. Check the Code or instances in aws."


--------------------------------------------------------------------------------
/monitor/ec2_management/aws_report_snapshot.py:
--------------------------------------------------------------------------------
  1 | import boto3
  2 | import urllib.request, json, os
  3 | from datetime import datetime, timedelta, timezone
  4 | from slack_msg_sender import send_slack_message
  5 | 
  6 | SLACK_URL = os.environ['SLACK_URL']
  7 |     
  8 | sts_client = boto3.client('sts')
  9 | response = sts_client.get_caller_identity()
 10 | ACCOUNT_ID = response['Account']
 11 | 
 12 | 
 13 | # remove_ami : available 상태가 아닌 모든 AMI 삭제
 14 | def remove_ami(ebs):
 15 |     ami_count = 0
 16 |     ami_dic = {"available":[], "disabled":[]}
 17 |     all_ami = ebs.describe_images(
 18 |         Owners=[ACCOUNT_ID], 
 19 |         Filters=[{'Name': 'is-public','Values': ['false']}],
 20 |         IncludeDeprecated=True,
 21 |         IncludeDisabled=True,
 22 |     )
 23 |     for ami in all_ami.get("Images"):
 24 |         storage = ami.get("BlockDeviceMappings")[0].get("Ebs")
 25 |         if ami.get("State") != "available":
 26 |             try:
 27 |                 ami_dic["disabled"].append(storage.get("SnapshotId"))
 28 |                 ami_count += 1
 29 |             except Exception as e:
 30 |                 send_slack_message(f"AMI 삭제 실패 :\n{e}")
 31 |         else:
 32 |             ami_dic["available"].append(storage.get("SnapshotId")) 
 33 | 
 34 |     return ami_count, ami_dic
 35 | 
 36 | 
 37 | # remove_snapshot : tag 를 부여하지 않은 모든 스냅샷 삭제
 38 | def remove_snapshot(ebs, ami_dic):
 39 |     snapshot_count = 0
 40 |     snapshots = ebs.describe_snapshots(OwnerIds=[ACCOUNT_ID])
 41 |     # snapshot list in region
 42 |     for snapshot in snapshots["Snapshots"]:
 43 |         try:
 44 |             if snapshot["SnapshotId"] not in ami_dic["available"] or snapshot["SnapshotId"] in ami_dic["disabled"]:
 45 |                 snapshot_count += 1
 46 |             if snapshot.get("Tags"):
 47 |                 continue
 48 |         except Exception as e:
 49 |                 send_slack_message(f"스냅샷 삭제 실패 :\n{e}")
 50 |     
 51 |     return snapshot_count
 52 | 
 53 | 
 54 | # created message : 슬랙에 알릴 내용을 메세지로 생성
 55 | def created_message(result):
 56 |     message = ""
 57 |     for region in result:
 58 |         item = result[region]
 59 |         if item.get("ami_count") > 0 or item.get("snapshot_count") > 0:
 60 |             message += f"{region}\n"
 61 |         if item.get("ami_count") > 0:
 62 |             message += f"> AMI {item.get("ami_count")}개 삭제 예정 \n"
 63 |         if item.get("snapshot_count") > 0:
 64 |             message += f"> 스냅샷 {item.get("snapshot_count")}개 삭제 예정 \n"
 65 |     
 66 |     if message == "":
 67 |         message = "> There is no snapshot list to delete."
 68 |     return message
 69 | 
 70 | 
 71 | # slack message : 생성한 메세지를 슬랙으로 전달
 72 | def slack_message(message):
 73 |     payload = {"text": message}
 74 |     data = json.dumps(payload).encode("utf-8")
 75 | 
 76 |     req = urllib.request.Request(SLACK_URL)
 77 |     req.add_header("Content-Type", "application/json")
 78 |     return urllib.request.urlopen(req, data)
 79 | 
 80 | 
 81 | # lambda handler : 람다 실행
 82 | def lambda_handler(event, context):
 83 |     # Record code execution time.
 84 |     utc_time = datetime.now(timezone.utc)
 85 |     korea_time = (utc_time + timedelta(hours=9)).strftime("%Y-%m-%d %H:%M")
 86 | 
 87 |     # Get a region list from ec2.
 88 |     client = boto3.client('ec2')
 89 |     regions = [ region['RegionName'] for region in client.describe_regions()['Regions'] ]
 90 | 
 91 |     # Set a list value for recording snapshot list by region.
 92 |     result = {}
 93 | 
 94 |     # Get snapshot lists by get_snapshot_list function.
 95 |     for region in regions:
 96 |         ebs = boto3.client('ec2', region_name=region)
 97 |         ami_count, ami_list = remove_ami(ebs)
 98 |         snapshot_count = remove_snapshot(ebs, ami_list)
 99 |         if ami_count > 0 or snapshot_count > 0:
100 |             result[region] = {"ami_count":ami_count, "snapshot_count":snapshot_count}
101 |         
102 |     # Write a message to send to slack
103 |     head_message = f"*snapshot management* ({korea_time})\n"
104 |     message = created_message(result)
105 |     
106 |     # Send a message to slack
107 |     slack_message(head_message)
108 |     slack_message(message)
109 |     
110 |     return "The snapshot is deleted succesfully. Check the Slack message."


--------------------------------------------------------------------------------
/monitor/ecr_management/aws_ecr_lambda_report_to_slack.py:
--------------------------------------------------------------------------------
  1 | import boto3
  2 | import get_lambda_object
  3 | import get_ecr_object
  4 | import os
  5 | import json
  6 | from datetime import datetime, timedelta, timezone
  7 | from slack_utils import send_message_to_slack, send_error_message_to_slack
  8 | 
  9 | time_string_format = "%Y-%m-%d %H:%M"
 10 | SLACK_URL = os.environ['SLACK_DDPS']
 11 | EMAIL = os.environ['EMAIL']
 12 | 
 13 | def get_last_execution_time(client, log_group_name):
 14 |     try:
 15 |         # CloudWatch Logs 그룹에서 최근 로그 스트림 가져오기
 16 |         response_logs = client.describe_log_streams(
 17 |             logGroupName=log_group_name,
 18 |             orderBy='LastEventTime',
 19 |             descending=True
 20 |         )
 21 |     except client.exceptions.ResourceNotFoundException as e:
 22 |         return "No Log Group"
 23 |     except Exception as e:
 24 |         return "error in get_last_execution_time() : " + str(e)
 25 |     
 26 |     dt_utc = datetime.utcfromtimestamp(int(response_logs['logStreams'][0]['lastEventTimestamp'] / 1000.0))
 27 |     dt_korea = dt_utc + timedelta(hours=9)
 28 |     return dt_korea
 29 |     
 30 | def get_repository_string(client, ecr_repository_object, lambda_region_object):
 31 |     ret = f"repository name : {ecr_repository_object['repositoryName']} / "
 32 |     ret += f"repository size : {ecr_repository_object['totalSizeGB']:.3f} GB / "
 33 |     korea_utc_time = ecr_repository_object['lastPushedDate'].astimezone(timezone(timedelta(hours=9)))
 34 |     korea_utc_time = korea_utc_time.strftime(time_string_format)
 35 |     ret += f"last pushed date : {korea_utc_time} / "
 36 |     cur_use_lambda = []
 37 |     
 38 |     if lambda_region_object != None:
 39 |         for image in ecr_repository_object['images']:
 40 |             for imageUri in image['imageUris']:
 41 |                 for func in lambda_region_object:
 42 |                     if func['PackageType'] != 'Image':
 43 |                         continue
 44 |                     if func['ImageUri'] == imageUri:
 45 |                         cur_use_lambda.append(func)
 46 |                     
 47 |     if len(cur_use_lambda) <= 0:
 48 |         ret += f"current using lambda : None :red_circle:\n"
 49 |     else:
 50 |         ret += f"current using lambda : :large_green_circle:\n"
 51 |         for func in cur_use_lambda:
 52 |             name = func['FunctionName']
 53 |             last_execution_time = get_last_execution_time(client, func['LogGroupName'])
 54 |             if (type(last_execution_time) == type(datetime.now())):
 55 |                 last_execution_time.strftime(time_string_format)
 56 |             ret += f"\t- function name : {name} / "
 57 |             ret += f"last execution time : {last_execution_time}\n"
 58 |             
 59 |     return ret
 60 | 
 61 | def get_region_string(session, region, ecr_region_object, lambda_region_object):
 62 |     try:
 63 |         client = session.client('logs', region_name=region)
 64 |     except Exception as e:
 65 |         print(f"get_region_string(). error : {e}")
 66 |         return None
 67 |     
 68 |     ret = f"\n====== REGION {region} Total Size : {ecr_region_object['totalSizeGB']:.3f} GB ======\n"
 69 |     for repository_object in ecr_region_object['repositories']:
 70 |         repository_string = get_repository_string(client, repository_object, lambda_region_object)
 71 |         ret += repository_string
 72 |     
 73 |     return ret
 74 | 
 75 | def get_total_string(session, ecr_object, lambda_object):
 76 |     ret = ""
 77 |     repositoryNames_sorted_less = [(ecr_object[region]['totalSizeGB'], region) for region in ecr_object.keys()]
 78 |     repositoryNames_sorted_less.sort(reverse=True)
 79 |     for size, region in repositoryNames_sorted_less:
 80 |         region_string = get_region_string(session, region, ecr_object[region], lambda_object.get(region))
 81 |         ret += region_string
 82 |     return ret
 83 | 
 84 | def lambda_handler(event, context):
 85 |     session = boto3.Session()
 86 | 
 87 |     try:
 88 |         ecr_object = get_ecr_object.get_region_ecr_object_dic(session)
 89 |     except Exception as e:
 90 |         print(e)
 91 |         send_error_message_to_slack(f"ECR 객체를 생성하는데 실패했습니다. error : {str(e)}")
 92 |         return json.dumps({'message' : str(e)})
 93 |     
 94 |     try:
 95 |         lambda_object = get_lambda_object.get_region_lambda_object_dic(session)
 96 |     except Exception as e:
 97 |         print(e)
 98 |         send_error_message_to_slack(f"람다 객체를 생성하는데 실패했습니다. error : {str(e)}")
 99 |         return json.dumps({'message' : str(e)})
100 |     
101 |     cur_korea_utc = datetime.now(timezone(timedelta(hours=9)))
102 |     
103 |     total_string = "[ECR repository 사용 현황]\n"
104 |     total_string += f"Account: {EMAIL}\n"
105 |     total_string += cur_korea_utc.strftime(time_string_format) + "\n"
106 |     total_string += "해당 이미지와 연결된 람다 함수가 있는 경우 : :large_green_circle:\n"
107 |     total_string += "해당 이미지와 연결된 람다 함수가 없는 경우 : :red_circle:\n"
108 |     
109 |     try:
110 |         total_string += get_total_string(session, ecr_object, lambda_object)
111 |     except Exception as e:
112 |         print(e)
113 |         send_error_message_to_slack(f"ECR, 람다 객체 문자열화에 실패했습니다. error : {str(e)}")
114 |         return json.dumps({'message' : str(e)})
115 |     
116 |     try:
117 |         response = send_message_to_slack(total_string)
118 |         return response.status
119 |     except Exception as e:
120 |         print(e)
121 |         send_error_message_to_slack(f"ECR, 람다 객체 문자열의 슬랙 전송에 실패했습니다. error : {str(e)}")
122 |         return json.dumps({'message' : str(e)})
123 |     
124 | if __name__ == "__main__":
125 |     response = lambda_handler(None, None)
126 |     print(response)


--------------------------------------------------------------------------------
/monitor/ecr_management/get_ecr_object.py:
--------------------------------------------------------------------------------
 1 | import boto3
 2 | from datetime import datetime, timezone, timedelta
 3 | 
 4 | ecr_repository_errors = []
 5 | ecr_image_errors = []
 6 | korea_utc_timezone_info = timezone(timedelta(hours=9))
 7 | 
 8 | def get_region_ecr_object(client, region):
 9 |     print(f"get {region} region's ecr object")
10 |     ret = { 'repositories': [], 'totalSizeGB': 0 }
11 |     try:
12 |         response = client.describe_repositories()
13 |     except Exception as e:
14 |         ecr_repository_errors.append((region, str(e)))
15 |         return ret
16 | 
17 |     repositoryInfo = [(repo['repositoryName'], repo['repositoryUri']) for repo in response['repositories']]
18 | 
19 |     for repositoryName, repositoryUri in repositoryInfo:
20 |         repository_object = get_repository_object(client, repositoryName, repositoryUri)
21 |         ret['repositories'].append(repository_object)
22 |         ret['totalSizeGB'] += repository_object['totalSizeGB']
23 |     
24 |     return ret
25 | 
26 | def get_repository_object(client, repositoryName, repositoryUri):
27 |     ret = { 'repositoryName': repositoryName, 'images': [], 'totalSizeGB': 0, 'repositoryUri': repositoryUri }
28 |     try:
29 |         imageDetails = client.describe_images(repositoryName=repositoryName)['imageDetails']
30 |     except Exception as e:
31 |         ecr_image_errors.append((repositoryName, str(e)))
32 |         return ret
33 |     
34 |     ret['lastPushedDate'] = datetime(1111, 1, 1, 1, 1, 1, tzinfo=korea_utc_timezone_info)
35 |     
36 |     for image in imageDetails:
37 |         imageTags = image['imageTags'] if 'imageTags' in image.keys() else ['-']
38 |         imageSizeGB = image['imageSizeInBytes'] / 1000000000.0
39 |         imagePushedAt = image['imagePushedAt']
40 |         ret['lastPushedDate'] = max(ret['lastPushedDate'], imagePushedAt)
41 |         image_object = {'imageTags':imageTags, 'imageSizeGB':imageSizeGB, 'imagePushedAt':imagePushedAt, 'imageUris':None}
42 |         uris = []
43 |         for tag in imageTags:
44 |             if tag != '-':
45 |                 uri = f"{repositoryUri}:{tag}"
46 |             else:
47 |                 uri = f"{repositoryUri}@{image['imageDigest']}"
48 |             uris.append(uri)
49 |         image_object['imageUris'] = uris
50 |         ret['images'].append(image_object)
51 |         ret['totalSizeGB'] += imageSizeGB
52 | 
53 |     return ret
54 | 
55 | 
56 | def get_region_ecr_object_dic(session):
57 |     regions = session.get_available_regions('ecr')
58 |     ret = {}
59 |     for region in regions:
60 |         client = session.client('ecr', region_name=region)
61 |         region_object = get_region_ecr_object(client, region)
62 |         if len(region_object['repositories']) <= 0:
63 |             continue
64 |         region_object['repositories'] = \
65 |             sorted(region_object['repositories'], key=lambda x: (x['totalSizeGB'], x['lastPushedDate']), reverse=True)
66 |         ret[region] = region_object
67 |     return ret
68 |     
69 | 
70 | 
71 | if __name__ == "__main__":
72 |     session = boto3.Session()
73 |     region_ecr_object_dic = get_region_ecr_object_dic(session)
74 |     for region in region_ecr_object_dic.keys():
75 |         region_ecr_object = region_ecr_object_dic[region]
76 |         repositories = region_ecr_object['repositories']
77 |         for repository_object in repositories:
78 |             images = repository_object['images']
79 |             for image in images:
80 |                 print(image)
81 | 


--------------------------------------------------------------------------------
/monitor/ecr_management/get_lambda_object.py:
--------------------------------------------------------------------------------
 1 | import boto3
 2 | from datetime import datetime, timezone, timedelta
 3 | 
 4 | error_list = []
 5 | 
 6 | def get_region_lambda_object(client, region):
 7 |     ret = []
 8 |     functions = []
 9 |     marker = None
10 |     try:
11 |         while True:
12 |             if marker:
13 |                 response = client.list_functions(Marker=marker)
14 |             else:
15 |                 response = client.list_functions()
16 |             functions.extend(response['Functions'])
17 |             marker = response.get('NextMarker')
18 |             if not marker:
19 |                 break
20 |     except Exception as e:
21 |         error_list.append((region, str(e)))
22 |         return ret
23 | 
24 |     for function_object in functions:
25 |         func = {
26 |             'FunctionName': function_object['FunctionName'],
27 |             'MemorySize': function_object.get('MemorySize'), 
28 |             'LastModified': function_object['LastModified'], 
29 |             'PackageType': function_object['PackageType'],
30 |             'ImageUri': None,
31 |             'Description': function_object.get('Description'),
32 |             'LogGroupName': None
33 |         }
34 |         try:
35 |             response_func = client.get_function(FunctionName=function_object['FunctionName'])
36 |         except Exception as e:
37 |             error_list.append((function_object['FunctionName'], str(e)))
38 |             print(e)
39 |             ret.append(func)
40 |             continue
41 |         
42 |         if function_object['PackageType'] == 'Image':
43 |             func['ImageUri'] = response_func['Code']['ImageUri']
44 |         if 'LoggingConfig' in response_func['Configuration']:
45 |             func['LogGroupName'] = response_func['Configuration']['LoggingConfig']['LogGroup']
46 |         else:
47 |             func['LogGroupName'] = f"/aws/lambda/{function_object['FunctionName']}"
48 |         ret.append(func)
49 |     return ret
50 | 
51 | 
52 | def get_region_lambda_object_dic(session):
53 |     regions = session.get_available_regions('lambda')
54 |     ret = {}
55 |     for region in regions:
56 |         print(f"get {region} region's lambda_function_objects")
57 |         client = session.client('lambda', region_name=region)
58 |         region_object = get_region_lambda_object(client, region)
59 |         if len(region_object) <= 0:
60 |             continue
61 |         ret[region] = region_object
62 |     return ret
63 | 
64 | 
65 | if __name__ == "__main__":
66 |     session = boto3.Session()
67 |     regions = session.get_available_regions('lambda')
68 |     region_lambda_object_dic = get_region_lambda_object_dic(session)
69 |     
70 |     for region in region_lambda_object_dic.keys():
71 |         print(f"====== REGION {region} ======")
72 |         functions = region_lambda_object_dic[region]
73 |         for function_object in functions:
74 |             print(f"function name : {function_object['FunctionName']} / "\
75 |                     f"PackageType : {function_object['PackageType']} / "\
76 |                     f"ImageUri : {function_object['ImageUri']}")
77 | 
78 |     for error in error_list:
79 |         print(error)
80 | 


--------------------------------------------------------------------------------
/monitor/ecr_management/readme.md:
--------------------------------------------------------------------------------
  1 | # ecr management manual
  2 | 이 문서는 aws의 Elastic Container Registry 자원을 관리하기 위한 설명서입니다.
  3 | 
  4 | ## 1. 파일 소개
  5 | ### aws_ecr_lambda_report_to_slack.py
  6 | - 실행하게 되는 메인 파일입니다.
  7 | - 사용자 지정 ECR 객체, 람다 객체를 받아와 조인후 슬랙에 전송합니다.
  8 | - 실행 중 오류가 생길 경우 에러 메세지를 슬랙에 전송합니다.
  9 | ### get_ecr_object.py
 10 | - 사용자 지정 ECR 객체를 받아오는데 사용하는 파일입니다.
 11 | ### get_lambda_object.py
 12 | - 사용자 지정 lambda 객체를 받아오는데 사용하는 파일입니다.
 13 | ### slack_utils.py
 14 | - 슬랙으로 메시지를 전송하는 함수들이 있는 파일입니다.
 15 | 
 16 | ## 2. 환경설정
 17 | 1. aws lambda에서 실행할 시
 18 | 	- `aws_ecr_lambda_report_to_slack.py`가 최종 lambda_function을 실행하는 파일입니다.
 19 | 	- 핸들러 파일 이름을 위 파일으로 설정하거나, 위 파일의 이름을 `lambda_function.py`로 변경합니다.
 20 | 	- 나머지 `get_ecr_object.py`, `get_lambda_object.py`, `slack_utils.py` 파일들도 함께 올려줍니다. 해당 파일들은 이름을 동일하게 해주어야 합니다.
 21 | 	- Configuration -> Environment variable에 Key, Value 설정
 22 | 		- key1 = EMAIL, value1 = 원하는 이메일
 23 | 		- key2 = SLACK_DDPS, value2 = 보내고 싶은 slack hook url
 24 | 	- ECR, Lambda, CloudWatch들에 대해 ReadOnly Access가 가능한 role을 할당해 줍니다.
 25 | 2. 로컬에서 실행시
 26 | 	- 필수적인 환경변수 설정
 27 | 		- export EMAIL=원하는 이메일
 28 | 		- export SLACK_DDPS=보내고 싶은 slack hook url
 29 | 	- AWS 자격증명하기
 30 | 		- 두 가지 방법
 31 | 		1. 직접 환경변수 설정하기
 32 | 			- export AWS_ACCESS_KEY_ID=YOUR_ACCESS_KEY
 33 | 			- export AWS_SECRETE_ACCESS_KEY=YOUR_SECRETE_ACCESS_KEY
 34 | 			- export AWS_DEFAULT_REGION=AWS_REGION_INFO
 35 | 		2. credential file 설정
 36 | 			- ~/.aws/credentials 파일에 다음 내용 넣기
 37 | 			```
 38 | 			[ddps-usage]
 39 | 			aws_access_key_id = YOUR_ACCESS_KEY
 40 | 			aws_secret_access_key = YOUR_SECRETE_ACCESS_KEY
 41 | 			```
 42 | 			- credential file을 설정하는 방식을 사용했다면 `aws_ecr_lambda_report_to_slack.py`의 `lambda_handler(event, context)`함수에서 첫 번째 라인의 `session = boto3.Session()`을 `session = boto3.Session(profile_name='ddps-usage')`로 변경해주어야 합니다.
 43 | 
 44 | ## 3. 파일 설명
 45 | ### 사용자 지정 객체
 46 | 1. ECR 객체 형식
 47 | ```
 48 | {
 49 | 	REGION_NAME : {
 50 | 		'repositories' : [
 51 | 			{
 52 | 				'repositoryName' : string,
 53 | 				'images' : [
 54 | 					{
 55 | 						'imageTags' : [],
 56 | 						'imageSizeGB' : int,
 57 | 						'imagePushedAt' : datetime,
 58 | 						'imageUris' : string
 59 | 					},
 60 | 				],
 61 | 				'totalSizeGB' : int,
 62 | 				'repositoryUri' : string,
 63 | 				'lastPushedDate' : datetime
 64 | 			},
 65 | 		],
 66 | 		'totalSizeGB' : int,
 67 | 	}
 68 | } 
 69 | ```
 70 | 2. 람다 객체 형식
 71 | ```
 72 | {
 73 | 	REGION_NAME : [
 74 | 		{
 75 | 			'FunctionName' : string,
 76 | 			'MemorySize' : int,
 77 | 			'LastModified' : string,
 78 | 			'PackageType' : string,
 79 | 			'ImageUri' : string,
 80 | 			'Description' : string,
 81 | 			'LogGroupName' : string
 82 | 		},
 83 | 	]
 84 | }
 85 | ```
 86 | 	
 87 | ### aws_ecr_lambda_report_to_slack.py
 88 | - `get_last_execution_time(client, log_group_name)`
 89 | 	- 해당 로그 그룹의 마지막 로그 시간을 반환합니다. 반환하는 형식은 한국 UTC 시간입니다.
 90 | 	- 함수 인자로 넣어주는 client는 CloudWatch의 boto3 client여야 합니다.
 91 | 	- 로그 그룹이 없다면 "No Log Group"을 반환합니다.
 92 | 	- 에러가 난다면 에러 메세지를 반환합니다.
 93 | - `get_repository_string(client, ecr_repository_object, lambda_region_object)`
 94 | 	- ECR 리포지토리 객체와 람다 리전 객체를 조인하여 결과를 반환합니다. 반환 형식은 string입니다.
 95 | - `get_region_string(session, region, ecr_region_object, lambda_region_object)`
 96 | 	- ECR 리전 객체와 람다 리전 객체를 조인하여 결과를 반환합니다. 반환 형식은 string입니다.
 97 | - `get_total_string(session, ecr_object, lambda_object)`
 98 | 	- ECR 객체의 모든 리전을 순회하며 문자열화 한 후 최종 문자열을 반환합니다. 반환 형식은 string입니다.
 99 | - `lambda_handler(event, context)`
100 | 	- 내부 함수들을 실행 후 슬랙에 결과를 전송합니다.
101 | ### get_ecr_object.py
102 | - `get_region_ecr_object(client, region)`
103 | 	- 리전에 맞는 ECR 리전 객체를 반환합니다. 반환 형식은 dictionary입니다.
104 | 	- 파라미터의 client는 ECR의 boto3 client여야 합니다.
105 | 	- 에러가 발생하면 (리전, 에러 메세지) 형식의 튜플을 반환합니다.
106 | - `get_repository_object(client, repositoryName, repositoryUri)`
107 | 	- 리포지토리 이름에 맞는 ECR 리포지토리 객체를 반환합니다. 반환 형식은 dictionary 입니다.
108 | 	- 리포지토리 내부의 이미지들을 순회하며 이미지가 마지막으로 푸시된 시각을 리포지토리 객체에 저장합니다.
109 | 	- 에러가 발생하면 (리포지토리 이름, 에러 메세지) 형식의 튜플을 반환합니다.
110 | - `get_region_ecr_object_dic(session)`
111 | 	- ECR 객체를 반환합니다. 반환 형식은 dictionary 입니다.
112 | ### get_lambda_object.py
113 | - `get_region_lambda_object(client, region)`
114 | 	- 리전에 맞는 람다 리전 객체를 반환합니다. 반환 형식은 dictionary 입니다.
115 | 	- 에러가 발생하면 NoneType을 반환합니다.
116 | - `get_region_lambda_object_dic(session)`
117 | 	- 람다 객체를 반환합니다. 반환 형식은 dictionary 입니다.
118 | ### slack_utils.py
119 | - `send_message_to_slack(message)`
120 | 	- 파라미터로 들어온 메세지를 슬랙에 전송합니다.
121 | 	- 슬랙URL은 환경변수 `SLACK_DDPS`에 저장되어있어야 합니다.
122 | 	- 반환 형식은 HTTP response 형태입니다.
123 | - `send_error_message_to_slack(message)`
124 | 	- 파라미터로 들어온 에러 메세지를 슬랙에 전송합니다.
125 | 	- 해당 함수는 석현님이 작성하신 코드를 살짝 변형하였습니다.
126 | 


--------------------------------------------------------------------------------
/monitor/ecr_management/slack_utils.py:
--------------------------------------------------------------------------------
 1 | import boto3
 2 | from datetime import datetime, timezone, timedelta
 3 | import json
 4 | import os
 5 | import urllib
 6 | import inspect
 7 | 
 8 | time_string_format = "%Y-%m-%d %H:%M"
 9 | six_month = timedelta(days=365/2)
10 | one_year = timedelta(days=365)
11 | korea_utc_timezone_info = timezone(timedelta(hours=9))
12 | SLACK_URL = os.environ['SLACK_DDPS']
13 | EMAIL = os.environ['EMAIL']
14 | 
15 | def send_message_to_slack(message):
16 |     payload = {
17 |         "text": message
18 |     }
19 |     data = json.dumps(payload).encode("utf-8")
20 | 
21 |     req = urllib.request.Request(SLACK_URL)
22 |     req.add_header("Content-Type", "application/json")
23 |     return urllib.request.urlopen(req, data)
24 | 
25 | def send_error_message_to_slack(message):
26 |     module_name = inspect.stack()[1][1]
27 |     line_no = inspect.stack()[1][2]
28 |     function_name = inspect.stack()[1][3]
29 | 
30 |     msg = f"File \"{module_name}\", line {line_no}, in {function_name} :\n{message}"
31 | 
32 |     return send_message_to_slack(msg)
33 |     
34 | 


--------------------------------------------------------------------------------
/monitor/gcp_usage_report_to_slack.py:
--------------------------------------------------------------------------------
 1 | from google.oauth2 import service_account
 2 | from google.cloud import bigquery
 3 | 
 4 | import os
 5 | import requests
 6 | import tempfile
 7 | import json
 8 | 
 9 | webhook_url = os.environ['webhook']
10 | gcp_key_json = os.environ['credential']
11 | 
12 | gcp_key_dict = json.loads(gcp_key_json)
13 | 
14 | with tempfile.NamedTemporaryFile(mode='w', delete=False) as temp:
15 |     json.dump(gcp_key_dict, temp)
16 |     temp_path = temp.name
17 | 
18 | credentials = service_account.Credentials.from_service_account_file(temp_path)
19 | os.remove(temp_path)
20 | 
21 | #sending query to gcp_bigquery service / get query result
22 | def query_gcp_price():
23 |     client = bigquery.Client(credentials=credentials, project=credentials.project_id)
24 |     saved_query = """
25 |     SELECT
26 |     project.id as PROJECT,
27 |     location.region as REGION,
28 |     service.description as SERVICE,
29 |     CAST(export_time AS DATE) as DATE,
30 |     sku.description as DESCRIPTION,
31 |     SUM(cost) as COST
32 |     FROM `tpu_billing_data.gcp_billing_export_v1_01C268_9BA8E9_8952A9`
33 |     WHERE CAST(export_time AS DATE) =  DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)
34 |     GROUP BY DATE, SERVICE, DESCRIPTION, REGION, PROJECT
35 |     ORDER BY PROJECT, COST DESC, SERVICE
36 | ;
37 |     """
38 |     query_result = client.query(saved_query).to_dataframe()  # Make an API request.
39 |     query_result['DATE'] = query_result['DATE'].astype(str)
40 |     query_result = query_result.reset_index()
41 |     total_bill = query_result['COST'].sum()
42 |     query_result = query_result.drop(['DATE','index'], axis = 1).astype(str).to_dict()
43 | 
44 |     return query_result, total_bill
45 | 
46 | # Convert Query result to string
47 | def Query_result_converter(res_dict):
48 |     res_str = ""
49 |     for i in range(len(res_dict['PROJECT'])):
50 |         for col, val in res_dict.items():
51 |             if(col == 'COST'):
52 |                 res_str = res_str + val[i] + "\n"
53 |             elif (col == 'SERVICE'):
54 |                 res_str = res_str + val[i]
55 |             elif (col == 'DESCRIPTION'):
56 |                 res_str = res_str + "(" + val[i] + ") = "
57 |             else:
58 |                 res_str = res_str + val[i] + " : "
59 |     return res_str
60 | 
61 | url = webhook_url
62 | 
63 | #Calling Slack Bot to send message
64 | def slack_bot():
65 |     usage, bill = query_gcp_price()
66 |     usage = Query_result_converter(usage)
67 | 
68 |     message = {
69 |         'text': f"GCP Usage\nDaily Total: {bill}(KRW)\n{usage}"
70 |     }
71 | 
72 |     response = requests.post(
73 |         url=url,
74 |         data=json.dumps(message),
75 |         headers={'Content-Type': 'application/json'}
76 |     )
77 | 
78 |     if response.status_code == 200:
79 |         print('Slack message sent successfully')
80 |     else:
81 |         print('Error sending Slack message: {}'.format(response.text))
82 |     return usage
83 | 
84 | def lambda_handler(event, context):
85 |     return slack_bot()
86 | 


--------------------------------------------------------------------------------
/monitor/s3_management/README.md:
--------------------------------------------------------------------------------
 1 | # s3 management mennual
 2 | ### 이 문서는 aws의 s3 버킷을 관리하기 위한 설명서입니다.
 3 | 
 4 | 
 5 | ## 1. s3 management files
 6 | ### aws_s3_standard_report.py
 7 | - 서울 리전의 람다 함수, `usage_s3_standard_report` 의 파일입니다.
 8 | - 서울 리전의 이벤트 브릿지인 `usage_report_s3_bucket` 가 트리거로 존재합니다.
 9 | - 매월 1일 오전 10시마다 s3에 존재하는 standard class인 버킷의 정보를 슬랙으로 전송합니다.
10 | - 버킷의 이름, standard class의 크기, 최근 액세스한 날을 확인할 수 있습니다.
11 | 
12 | ### aws_s3_archiving_report.py
13 | - 서울 리전의 람다 함수, `usage_s3_archiving_report` 의 파일입니다.
14 | - 서울 리전의 이벤트 브릿지인 `usage_report_s3_archiving` 가 트리거로 존재합니다.
15 | - 짝수월 1일, 5일 오전 10시 5분마다 glacier class로 이동해야 하는 버킷의 정보를 슬랙으로 전송합니다.
16 | - 버킷의 이름, standard class의 크기, 최근 액세스한 날을 확인할 수 있습니다.
17 | - 기본 값으로 6개월이 지정되어 있어, 최근 6개월 간 액세스하지 않은 버킷이 glacier class로 이동할 버킷에 선정됩니다.
18 | 
19 | ### aws_auto_s3_archiving.py
20 | - 서울 리전의 `usage_s3_management` ami 안에 존재하는 `aws_auto_s3_archiving.py` 의 파일입니다.
21 | - 짝수월 6일 오전 10시에 ec2를 생성한 후 코드를 실행하여 s3 버킷을 관리합니다.
22 | - 지정된 ami를 통해 `auto_archiving_management` 인스턴스를 생성한 후 s3 버킷을 자동으로 관리합니다.
23 | - s3 버킷 관리 이후 ec2가 자동으로 삭제 됩니다.
24 | - 서울 리전의 람다 함수, `usage_created_archiving_instance` 와 `usage_terminated_archiving_instance` 로 ec2를 관리합니다.
25 | - 서울 리전의 이벤트 브릿지인 `usage_run_auto_archiving` 가 트리거로 존재합니다.
26 | - IAM user `ddps-uasge` 의 정보를 이용하여 실행 자격을 증명합니다.
27 | - 실행 시 최소 2분 정도의 시간이 소요될 수 있으며 파일 용량에 따라 실행 시간이 결정됩니다.
28 | 
29 | ### usage_created_archiving_instance.py
30 | - 서울 리전의 람다 함수, `usage_created_archiving_instance` 의 파일입니다.
31 | - 트리거인 `usage_run_auto_archiving` 에 의해 실행되어 서울 리전에 S3를 관리할 수 있는 인스턴스를 생성합니다.
32 | - t2.micro 타입의 one-time, terminate 설정의 스팟 인스턴스가 생성됩니다.
33 | 
34 | ### usage_terminated_archiving_instance.py
35 | - 서울 리전의 람다 함수, `usage_terminated_archiving_instance` 의 파일입니다.
36 | - `aws_auto_s3_archiving.py` 내 작업이 끝나면 이 파일의 람다 함수를 호출하여 자동으로 실행됩니다.
37 | - `usage_created_archiving_instance.py` 을 통해 생성된 인스턴스를 terminated 합니다.
38 | 
39 | 
40 | ## 2. Architecture
41 | 1. `report_s3_bucket` 트리거로 `usage_s3_standard_report` 람다 함수 실행
42 |     매월 오전 10시에 standard class 버킷의 정보를 슬랙으로 전송
43 |     
44 | 2. `report_s3_archiving` 트리거로 `usage_s3_archiving_report` 람다 함수 실행
45 |     짝수월 1일과 5일 오전 10시 5분에 glacier class 로 이동해야 하는 버킷의 정보를 슬랙으로 전송
46 | 
47 | 3-a. `run_auto_archiving` 트리거로 `usage_created_archiving_instance` 람다 함수 실행
48 |     짝수월 6일 오전 10시에 ec2 를 생성하고 실행함
49 | 
50 | 3-b. `auto_archiving_management` 이름의 ec2가 생성되고 2번의 결과인 버킷을 glacier class로 이동
51 |     아카이빙 결과 및 5GB 가 넘어 수동으로 옮겨야 하는 파일에 대한 정보를 슬랙으로 전송
52 | 
53 | 3-c. ec2 내에 존재하는 invoke lambda 를 통해 `usage_terminated_archiving_instance` 람다 함수 실행
54 |     사용한 ec2를 종료시키고, 관리를 종료한다는 메세지를 슬랙으로 전송
55 | 
56 | 
57 | ## 3. 사용자 정의
58 | 모든 람다 함수 내에 각각 필요한 환경 변수가 지정되어 있습니다.
59 | 환경 변수는 사용자가 정할 수 있으며 수정 시 해당 변수를 사용하고 있는 모든 람다에 반영하여야 합니다.
60 | 아래는 현재 적용된 기본 값 혹은 예시입니다. 민감한 내용이나 수시로 변경될 가능성이 있는 정보는 비공개합니다.
61 | 
62 | ```
63 | AMI_ID = 'ami-00000'
64 | DEADLINE_MONTHS = 6
65 | PASS_LIST = ['bucket_name1', 'bucket_name2']
66 | RUN_REGION = 'ap-northeast-2'
67 | SLACK_DDPS = 'ddps-lab'
68 | ```
69 | 
70 | ### 환경변수 이름 및 포함 함수
71 | AMI_ID : usage_created_archiving_instance
72 | DEADLINE_MONTHS : usage_s3_archiving_report, usage_created_archiving_instance
73 | PASS_LIST : usage_s3_archiving_report, usage_created_archiving_instance
74 | RUN_REGION : usage_created_archiving_instance, usage_terminated_archiving_instance
75 | SLACK_DDPS : usage_s3_standard_report, usage_s3_archiving_report, usage_created_archiving_instance, usage_terminated_archiving_instance
76 | 
77 | 
78 | # 주의 사항
79 | 1. 실행에 필요한 함수와 트리거가 수정 및 삭제되지 않도록 유의하십시오.
80 | 2. IAM user(`ddps-usage`), IAM roles(`usage-EBS-roles`, `usage-S3-roles`)이 삭제되지 않도록 유의하십시오.
81 | 3. `auto_archiving_management` 인스턴스 내에 민감한 정보가 담긴 파일이 있으니 유출하지 않도록 유의하십시오.
82 | 4. `usage_s3_management` ami와 연결된 스냅샷이 삭제되지 않도록 유의하십시오.


--------------------------------------------------------------------------------
/monitor/s3_management/aws_auto_s3_archiving.py:
--------------------------------------------------------------------------------
  1 | import boto3
  2 | from botocore.exceptions import ClientError
  3 | import urllib.request, urllib.parse, json, configparser
  4 | from datetime import datetime,  timedelta, timezone
  5 | 
  6 | 
  7 | # auto_archiving - 아카이브할 버킷 탐색 및 아카이브 진행
  8 | def auto_archiving(session, DEADLINE_MONTHS, pass_list):
  9 |     s3_client = session.client('s3')
 10 |     bucket_list = s3_client.list_buckets()
 11 |     buckets = bucket_list['Buckets']
 12 | 
 13 |     archiving_list = []
 14 |     error_list = []
 15 |     deadline = datetime.now() - timedelta(days=DEADLINE_MONTHS*30)
 16 | 
 17 |     for bucket in buckets:
 18 |         bucket_name = bucket['Name']
 19 | 
 20 |         # 아카이빙하지 않을 목록은 스킵함
 21 |         if bucket_name in pass_list:
 22 |             continue
 23 |         
 24 |         standard_size = 0
 25 |         last_accessed_date = "N/A"
 26 |         archiving_bucket = True
 27 | 
 28 |         try:
 29 |             bucket_objects = s3_client.list_objects_v2(Bucket=bucket_name)
 30 |         except (ClientError, NameError) as e:
 31 |             continue
 32 | 
 33 |         if 'Contents' in bucket_objects:
 34 |             last_accessed = []
 35 |             for content in bucket_objects['Contents']:
 36 |                 last_accessed.append(content['LastModified'].strftime("%Y-%m-%d"))
 37 |             last_accessed_date = max(last_accessed)
 38 |             item_date = datetime.strptime(last_accessed_date, "%Y-%m-%d")
 39 |             if item_date > deadline:
 40 |                 archiving_bucket = False
 41 |         
 42 |             if archiving_bucket == True and last_accessed_date != "N/A":
 43 |                 error_bucket = []
 44 |                 for content in bucket_objects['Contents']:
 45 |                     if content['StorageClass'] == 'STANDARD':
 46 |                         standard_size += content['Size']
 47 |                         # 아카이빙을 진행하는 코드 (.copy_object)
 48 |                         try:
 49 |                             s3_client.copy_object(Bucket=bucket_name, CopySource={'Bucket': bucket_name, 'Key': content['Key']}, Key=content['Key'], StorageClass='GLACIER')
 50 |                         except ClientError as e:
 51 |                             error_bucket.append([content['Key']])
 52 |                 if len(error_bucket) > 0:
 53 |                     error_list.append([bucket_name, error_bucket])
 54 |                 archiving_list.append([bucket_name, standard_size])
 55 |                 
 56 |     if len(archiving_list) > 0:
 57 |         ordered_archiving_list = sorted(archiving_list, key=lambda x: x[1], reverse=True)
 58 |     else:
 59 |         ordered_archiving_list = []
 60 |     return ordered_archiving_list, error_list
 61 |                         
 62 | 
 63 | # created message - 아카이브 결과를 메세지로 생성
 64 | def created_message(now_time, archiving_list, error_list):
 65 |     message = f'*s3 archiving management* ({now_time})'
 66 |     if len(archiving_list) > 0:
 67 |         count = 1
 68 |         message += f"\n{len(archiving_list)}개의 버킷을 Glacier로 옮겼습니다.\n"
 69 |         for bucket in archiving_list:
 70 |             if bucket[1] >= 1000000000:
 71 |                 message += f"\n{count}.  {bucket[0]}    {round(bucket[1]/1000000000, 2)}GB"
 72 |             elif bucket[1] >= 1000000:
 73 |                 message += f"\n{count}.  {bucket[0]}    {round(bucket[1]/1000000, 2)}MB"
 74 |             elif bucket[1] >= 1000:
 75 |                 message += f"\n{count}.  {bucket[0]}    {round(bucket[1]/1000, 2)}KB"
 76 |             else:
 77 |                 message += f"\n{count}.  {bucket[0]}    {bucket[1]}B"
 78 |             count += 1
 79 |     else:
 80 |         message += "\nGlacier로 옮길 항목이 없습니다.\n"
 81 |     if len(error_list) > 0:
 82 |         message += f"\n---\n{len(error_list)}개의 버킷에 5GB가 넘는 항목이 존재합니다.\n"
 83 |         for bucket in error_list:
 84 |             message += f"\n버킷 이름 : {bucket[0]}\n목록 :"
 85 |             for key in bucket[1]:
 86 |                 message += f"\n- {key}"
 87 |     return message
 88 | 
 89 | 
 90 | # slack message : 생성된 메세지를 슬랙으로 전달
 91 | def slack_message(message, url):
 92 |     payload = {"text": message}
 93 |     data = json.dumps(payload).encode("utf-8")
 94 | 
 95 |     req = urllib.request.Request(url)
 96 |     req.add_header("Content-Type", "application/json")
 97 |     return urllib.request.urlopen(req, data)
 98 | 
 99 | 
100 | if __name__ == '__main__':
101 |     aws_profile = 'ddps-usage'
102 |     session = boto3.Session(profile_name=aws_profile)
103 | 
104 |     config = configparser.ConfigParser()
105 |     config.read('/home/ubuntu/config.ini')
106 |     
107 |     DEADLINE_MONTHS = int(config.get('s3_setting', 'DEADLINE_MONTHS'))
108 |     SLACK_URL = config.get('s3_setting', 'SLACK_URL')
109 |     pass_list = config.get('s3_setting', 'PASS_LIST')
110 | 
111 |     utc_time = datetime.now(timezone.utc)
112 |     korea_time = (utc_time + timedelta(hours=9)).strftime("%Y-%m-%d %H:%M")
113 | 
114 |     archiving_list, error_list = auto_archiving(session, DEADLINE_MONTHS, pass_list)
115 |     message = created_message(korea_time, archiving_list, error_list)
116 |     response = slack_message(message, SLACK_URL)


--------------------------------------------------------------------------------
/monitor/s3_management/aws_s3_archiving_report.py:
--------------------------------------------------------------------------------
  1 | import boto3, os
  2 | from botocore.exceptions import ClientError
  3 | import urllib.request, urllib.parse, json
  4 | from datetime import datetime, timedelta, timezone
  5 | 
  6 | SLACK_URL = os.environ['SLACK_DDPS']
  7 | 
  8 | try:
  9 |     DEADLINE_MONTHS = int(os.environ['MONTHS'])
 10 | except KeyError:
 11 |     DEADLINE_MONTHS = 6
 12 | DEADLINE = datetime.now() - timedelta(days=DEADLINE_MONTHS*30)
 13 | 
 14 | 
 15 | # get archiving bucket : 아카이브행 버킷 탐색
 16 | def get_archiving_bucket(pass_list):
 17 |     s3_client = boto3.client('s3')
 18 |     bucket_list = s3_client.list_buckets()
 19 |     archiving_list = []
 20 |     bucket_name_max = 0
 21 |     
 22 |     for bucket in bucket_list['Buckets']:
 23 |         bucket_name = bucket["Name"]
 24 |         
 25 |         # 아카이빙 목록에서 제외해야 하는 버킷이라면 패스함
 26 |         if bucket_name in pass_list:
 27 |             continue
 28 |         
 29 |         bucket_name_len = len(bucket_name)
 30 |         bucket_name_max = max(bucket_name_max, bucket_name_len)
 31 | 
 32 |         try:
 33 |             bucket_objects = s3_client.list_objects(Bucket=bucket_name)
 34 |         except (ClientError, NameError) as e:
 35 |             continue
 36 |         bucket_size = 0
 37 |         last_accessed_date = "N/A"
 38 |         archiving_bucket = True
 39 |         
 40 |         if 'Contents' in bucket_objects:
 41 |             last_accessed = []
 42 |             for content in bucket_objects['Contents']:
 43 |                 last_accessed.append(content['LastModified'].strftime("%Y-%m-%d"))
 44 |             last_accessed_date = max(last_accessed)
 45 |             item_date = datetime.strptime(last_accessed_date, "%Y-%m-%d")
 46 |             if item_date > DEADLINE:
 47 |                 archiving_bucket = False
 48 |             
 49 |             if archiving_bucket == True and last_accessed_date != "N/A":
 50 |                 for content in bucket_objects['Contents']:
 51 |                     if content['StorageClass'] == "STANDARD":
 52 |                         bucket_size += content['Size']
 53 | 
 54 |         if archiving_bucket and last_accessed_date != "N/A":
 55 |             archiving_list.append([bucket_name, bucket_size, last_accessed_date]) 
 56 |   
 57 |     ordered_archiving_list = sorted(archiving_list, key=lambda x: x[1], reverse=True)
 58 |     return ordered_archiving_list, bucket_name_max
 59 |                 
 60 | 
 61 | # created message : 탐색된 아카이브행 버킷을 메세지로 생성
 62 | def created_message(now_time, archiving_list, bucket_name_max):
 63 |     messages = []
 64 |     header = "*S3 Bucket List to be Archived* - [" + str(len(archiving_list)) + " buckets]\n"
 65 |     header += (now_time+"\n")
 66 | 
 67 |     if len(archiving_list) > 0:
 68 |         header += "* 금월 6일에 Glacier로 이동할 버킷 리스트입니다.\n* 해당 버킷이 Glacier로 이동하길 원하지 않으시면, 백업 혹은 새로 액세스해주시길 바랍니다.\n"
 69 |         header += "비고) 액세스 방법은 s3 버킷 내에 업로드, 삭제, 수정 등의 변화가 일어나야 하는 점을 유의해주십시오.\n"
 70 |     
 71 |         message = f'{"No":>2}. {"Bucket Name":{bucket_name_max+2}} {"Size":12} {"Last Modified"}'
 72 |         count = 1
 73 |         for item in archiving_list:
 74 |             if item[1] >= 1000000000:
 75 |                 item[1] = str(round(item[1]/1000000000, 2)) + " GB"
 76 |             elif item[1] >= 1000000:
 77 |                 item[1] = str(round(item[1]/1000000, 2)) + " MB"
 78 |             elif item[1] >= 1000:
 79 |                 item[1] = str(round(item[1]/1000, 2)) + " KB"
 80 |             else:
 81 |                 item[1] = str(item[1]) + " B"
 82 |             message += f'\n{count:>2}. {item[0]:{bucket_name_max+2}} {item[1]:12} {item[2]}'
 83 |             count += 1
 84 |             if len(message) > 3800:
 85 |                 messages.append(message)
 86 |                 message = ""
 87 |         messages.append(message)
 88 |     else:
 89 |         header += "금월에 Glacier로 옮길 항목이 없습니다."
 90 |     return header, messages
 91 |     
 92 | 
 93 | # slack message : 생성한 메세지를 슬랙으로 전달
 94 | def slack_message(message, meg_type):
 95 |     if meg_type == True:
 96 |         payload = {"text": message}
 97 |     else:
 98 |         payload = {"text": f'```{message}```'}
 99 |     data = json.dumps(payload).encode("utf-8")
100 |     
101 |     req = urllib.request.Request(SLACK_URL)
102 |     req.add_header("Content-Type", "application/json")
103 |     return urllib.request.urlopen(req, data)
104 | 
105 | 
106 | # lambda handler : 람다 실행
107 | def lambda_handler(event, context):
108 |     # 람다 환경변수로부터 패스해야 하는 버킷 리스트를 읽음
109 |     pass_list = [item.strip() for item in os.environ['PASS_LIST'].split(',')]
110 |     
111 |     utc_time = datetime.now(timezone.utc)
112 |     korea_time = (utc_time + timedelta(hours=9)).strftime("%Y-%m-%d %H:%M")
113 | 
114 |     bucket_result_list, bucket_name_max = get_archiving_bucket(pass_list)
115 |     header, messages = created_message(korea_time, bucket_result_list, bucket_name_max)
116 |     
117 |     response = slack_message(header, True)
118 |     
119 |     if not messages:
120 |         return "There are no items to move to Glacier. Check the Slack message."
121 |     else:
122 |         for meg in messages:
123 |             response = slack_message(meg, False)
124 | 
125 |     return "The Archive List was successfully sent in a Slack. Check the Slack message."


--------------------------------------------------------------------------------
/monitor/s3_management/aws_s3_standard_report.py:
--------------------------------------------------------------------------------
  1 | import boto3, os
  2 | from botocore.exceptions import ClientError
  3 | import urllib.request, urllib.parse, json
  4 | from datetime import datetime, timedelta, timezone
  5 | 
  6 | SLACK_URL = os.environ['SLACK_DDPS']
  7 | 
  8 | 
  9 | # get s3 bucket : s3 버킷 중 standard class 만 리스트 생성
 10 | def get_s3_bucket():
 11 |     s3_client = boto3.client('s3')
 12 |     bucket_list = s3_client.list_buckets()
 13 |     standard_list = []
 14 |     bucket_name_max = 0
 15 |     
 16 |     for bucket in bucket_list['Buckets']:
 17 |         bucket_name = bucket["Name"]
 18 |         bucket_name_len = len(bucket_name)
 19 |         bucket_name_max = max(bucket_name_max, bucket_name_len)
 20 | 
 21 |         try:
 22 |             bucket_objects = s3_client.list_objects(Bucket=bucket_name)
 23 |         except (ClientError, NameError) as e:
 24 |             continue
 25 |         bucket_size = 0
 26 |         last_accessed_date = "N/A"
 27 |         bucket_class = "STANDARD"
 28 |         
 29 |         if 'Contents' in bucket_objects:
 30 |             last_accessed = []
 31 |             for content in bucket_objects['Contents']:
 32 |                 last_accessed.append(content['LastModified'].strftime("%Y-%m-%d"))
 33 |             last_accessed_date = max(last_accessed)
 34 | 
 35 |             for content in bucket_objects['Contents']:
 36 |                 if content['StorageClass'] == "STANDARD":
 37 |                     bucket_size += content['Size']
 38 | 
 39 |         if bucket_size == 0:
 40 |             bucket_class = "GLACIER"
 41 |         
 42 |         if bucket_class == "STANDARD":
 43 |             standard_list.append([bucket_name, bucket_size, last_accessed_date])
 44 |         elif bucket_class == "GLACIER" and last_accessed_date == "N/A":
 45 |             standard_list.append([bucket_name, bucket_size, last_accessed_date])
 46 |             
 47 |     ordered_standard_list = sorted(standard_list, key=lambda x: x[1], reverse=True)
 48 |     return ordered_standard_list, bucket_name_max
 49 |                
 50 | 
 51 | # created message : standard bucket을 메세지로 생성
 52 | def created_message(now_time, standard_list, bucket_name_max):
 53 |     messages = []
 54 |     header = "*S3 Bucket List* - [" + str(len(standard_list)) + " buckets]\n"
 55 |     header += (now_time+"\n")
 56 |    
 57 |     message = f'{"No":>2}. {"Bucket Name":{bucket_name_max+2}} {"Size":12} {"Last Modified"}'
 58 |     count = 1
 59 |     for item in standard_list:
 60 |         if item[1] >= 1000000000:
 61 |             item[1] = str(round(item[1]/1000000000, 2)) + " GB"
 62 |         elif item[1] >= 1000000:
 63 |             item[1] = str(round(item[1]/1000000, 2)) + " MB"
 64 |         elif item[1] >= 1000:
 65 |             item[1] = str(round(item[1]/1000, 2)) + " KB"
 66 |         else:
 67 |             item[1] = str(item[1]) + " B"
 68 |         message += f'\n{count:>2}. {item[0]:{bucket_name_max+2}} {item[1]:12} {item[2]}'
 69 |         count += 1
 70 |         if len(message) > 3800:
 71 |             messages.append(message)
 72 |             message = ""
 73 |     messages.append(message)
 74 |     return header, messages
 75 |     
 76 | 
 77 | # slack message : 생성한 메세지를 슬랙으로 전달
 78 | def slack_message(message, meg_type):
 79 |     if meg_type == True:
 80 |         payload = {"text": message}
 81 |     else:
 82 |         payload = {"text": f'```{message}```'}
 83 |     data = json.dumps(payload).encode("utf-8")
 84 | 
 85 |     req = urllib.request.Request(SLACK_URL)
 86 |     req.add_header("Content-Type", "application/json")
 87 |     return urllib.request.urlopen(req, data)
 88 | 
 89 | 
 90 | # lambda_handler : 람다 실행
 91 | def lambda_handler(event, context):
 92 | 
 93 |     utc_time = datetime.now(timezone.utc)
 94 |     korea_time = (utc_time + timedelta(hours=9)).strftime("%Y-%m-%d %H:%M")
 95 | 
 96 |     bucket_standard_list, bucket_name_max = get_s3_bucket()
 97 |     header, messages = created_message(korea_time, bucket_standard_list, bucket_name_max)
 98 |     
 99 |     response = slack_message(header, True)
100 |     
101 |     for meg in messages:
102 |         response = slack_message(meg, False)
103 |         
104 |     return "All bucket list of s3 was sent in a slack. Check the Slack message."


--------------------------------------------------------------------------------
/monitor/s3_management/usage_created_archiving_instance.py:
--------------------------------------------------------------------------------
 1 | import boto3, os
 2 | 
 3 | def usage(user_data):
 4 |     region = os.environ['RUN_REGION']
 5 |     ami_id = os.environ['AMI_ID']
 6 | 
 7 |     ec2_client = boto3.client('ec2', region_name=region)
 8 | 
 9 |     response = ec2_client.run_instances(
10 |         ImageId=ami_id,
11 |         InstanceType='t2.micro',
12 |         UserData=user_data,
13 |         MinCount=1,
14 |         MaxCount=1,
15 |         InstanceMarketOptions={
16 |             'MarketType':'spot',
17 |             'SpotOptions':{
18 |                 'SpotInstanceType':'one-time',
19 |                 'InstanceInterruptionBehavior':'terminate'
20 |             }
21 |         }
22 |     )
23 | 
24 |     instance_id = response['Instances'][0]['InstanceId']
25 | 
26 |     ec2_client.create_tags(
27 |         Resources=[instance_id],
28 |         Tags=[
29 |             {'Key': 'Name', 'Value': 'auto_archiving_management'}
30 |         ]
31 |     )
32 |     return instance_id
33 | 
34 | 
35 | def created_userdata():
36 |     DEADLINE_MONTHS = int(os.environ['DEADLINE_MONTHS'])
37 |     SLACK_URL = os.environ['SLACK_DDPS']
38 |     PASS_LIST = [item.strip() for item in os.environ['PASS_LIST'].split(',')]
39 | 
40 |     data = f"""#!/bin/bash
41 | cat <<EOL > /home/ubuntu/config.ini
42 | [s3_setting]
43 | DEADLINE_MONTHS = {DEADLINE_MONTHS}
44 | SLACK_URL = {SLACK_URL}
45 | PASS_LIST = {PASS_LIST}
46 | EOL
47 | /home/ubuntu/runfile.sh
48 | """
49 |     return data
50 | 
51 |     
52 | def lambda_handler(event, context):
53 |     data = created_userdata()
54 |     instance_id = usage(data)
55 |     
56 |     return f"successfully create instance, id = {instance_id}"


--------------------------------------------------------------------------------
/monitor/s3_management/usage_terminated_archiving_instance.py:
--------------------------------------------------------------------------------
 1 | import boto3
 2 | import urllib.request, json, os
 3 | 
 4 | 
 5 | SLACK_URL = os.environ['SLACK_DDPS']
 6 | 
 7 | # slack message : 생성한 메세지를 슬랙으로 전달
 8 | def slack_message(message, meg_type, url):
 9 |     if meg_type == True:
10 |         payload = {"text": message}
11 |     else:
12 |         payload = {"text": f'```{message}```'}
13 |     data = json.dumps(payload).encode("utf-8")
14 | 
15 |     req = urllib.request.Request(url)
16 |     req.add_header("Content-Type", "application/json")
17 |     return urllib.request.urlopen(req, data)
18 | 
19 | # lambda handler : 람다 실행
20 | def lambda_handler(event, context):
21 |     url = SLACK_URL
22 |     region = os.environ['RUN_REGION']
23 |     
24 |     meg = ""
25 |     instance_id = event['key']
26 |     ec2_client = boto3.client('ec2', region_name=region)
27 |     instances = ec2_client.terminate_instances(InstanceIds=[instance_id])
28 |     
29 |     for instance in instances['TerminatingInstances']:
30 |         meg += "S3 관리를 안전하게 마무리합니다. 사용 리소스를 정리하였습니다."
31 |         
32 |     response = slack_message(meg, True, url)
33 |     return "close the auto archiving management."


--------------------------------------------------------------------------------
/monitor/spot_management/README.md:
--------------------------------------------------------------------------------
  1 | # 기능
  2 | 1. 현 계정의 모든 활성된 리전의 정보를 수집하고, 리스트로 만든다.
  3 | 2. 활성화된 모든 리전을 리스트를 차례로 돌아 인스턴스 사용량을 수집한다.
  4 | 3. 인스턴스의 정보를 이벤트 모드에 맞추어 수집하고, Dictionary로 저장한다.
  5 | 4. Dictionary 정보를 토대로 인스턴스 사용량 메세지를 생성한다.
  6 | 5. 활성화된 리전의 수만큼 3번과 4번을 반복한다.
  7 | 6. 생성된 메세지를 토대로 슬랙에 전달한다.
  8 | 
  9 | 
 10 | ## 설정해야 하는 관련 리소스
 11 | ### Lambda (필수)
 12 | - 환경변수 : 메세지 전송 URL
 13 | - TimeOut : 10min *권장
 14 | 
 15 | ### IAM Roles (필수)
 16 | - AmazonEC2ReadOnlyAccess
 17 | - AWSCloudTrail_ReadOnlyAccess
 18 | - AWSLambdaBasicExecutionRole
 19 | 
 20 | ### event bridge (선택)
 21 | - 매일 아침 8시 40분에 동작하게 이벤트 스케줄 생성
 22 | 
 23 | ### requests 모듈 (선택)
 24 | - slack_msg_sender 파일의 함수를 사용하기 위해 필요한 필수 모듈
 25 | - 사용 시 람다 레이어로 파이썬 버전에 맞게 설치 필요
 26 | 
 27 | 
 28 | # aws_daily_instance_usage_report.py
 29 | ## values
 30 | - 사전에 람다 서비스 환경변수에 저장해둔 URL을 코드에서 사용하기 위해 선언한다.
 31 | 
 32 | 
 33 | ## def daily_instance_usage(region, END_DATE):
 34 |     : Collect instance information that 'run', 'start', 'terminate', and 'stop' for each region.
 35 | - 인스턴스의 이벤트에는 'run, start, stop, terminate'의 4가지 종류가 존재한다.
 36 | - 각 리전에서 이 4가지 종류의 검색을 searched_instances() 함수를 통해 시도한다.
 37 | - 검색 횟수 : O(4n)
 38 | 
 39 | ### 종류 별로 검색을 각각 시도하는 이유
 40 | - 클라우드 트레일 서비스는 안타깝게도 AND 검색이나 OR 검색이 불가능하다.
 41 | - 즉 Run Instance 이벤트이면서 i-00000000 인 인스턴스를 찾는 것은 불가능하다.
 42 | - 또한, Run Instance 이거나 Start Instance 인 인스턴스를 찾는 것 역시 불가능하다.
 43 | - 이러한 점 때문에, 각 리전에 4가지 종류를 시도한다.
 44 | 
 45 | 
 46 | ## def search_instances(cloudtrail, eventname, item, token, start_date, end_date, token_code):
 47 |     : Collect instance information and call the following functions.
 48 | - 클라우드 트레일 서비스의 검색을 위해 시작 시간과 종료 시간을 UTC TIME 에서 UNIX TIMESTAMP 으로 변환한다.
 49 | - 설정된 이벤트 모드에 따라 실제 클라우드 트레일 서비스 API를 통해 검색을 시도한다.
 50 | - 검색 결과는 한 번에 50개의 응답만 확인할 수 있으며, 50개 이상부터는 토큰을 통해 추가 검색 후 확인할 수 있다.
 51 | - 추가 검색의 필요를 판단하기 위해 토큰 유무에 대한 결과와 응답을 반환한다.
 52 | 
 53 | 
 54 | ## get_start_instances(mode, cloudtrail, response, all_daily_instance, END_DATE)
 55 |     : It stores the instance information of the 'creat' and 'start' state.
 56 | - 'run'과 'start' 이벤트로써 검색된 목록은 모두 검색일 당일 인스턴스가 켜졌다는 것을 의미한다.
 57 | - 인스턴스 사용량을 저장하기 위하여 모든 인스턴스 아이디를 Dictionary의 Key로 저장한다.
 58 | - 'run'과 'start' 이벤트의 응답 결과의 양식이 조금씩 다르기 때문에 이벤트 모드에 따라 인스턴스 아이디를 수집한다.
 59 | 
 60 | ### 현재 이벤트의 인스턴스 아이디가 Key로 없는 경우
 61 | - 현재 이벤트가 인스턴스 사용량을 저장한 Dictionary에 없는 경우 사용량이 기록되지 않은 이벤트이기 때문에 이 이벤트에서 필요한 모든 정보를 추출한다.
 62 | - 이벤트 시작 시간을 일차적으로 저장한다.
 63 | - 이 외의 리전이나 인스턴스 유형(ex, t2.micro), 스팟 인스턴스 여부, Name 태그, Username은 이벤트 모드에 따라 함수를 호출하여 수집한다.
 64 |     - 리전이나 스팟 여부 등의 정보는 run instance event에서만 수집해올 수 있다.
 65 |     - 따라서 이미 검색한 이벤트 모드가 run instance 일 경우에는 get_run_instance_information() 함수를 호출하여 수집해온다.
 66 |     - start instance 일 경우에는 run instance를 검색하는 get_instance_information() 함수를 호출하여 현재 인스턴스 아이디와 일치하는 run instance 검색 결과에서 정보를 수집해온다.
 67 | 
 68 | ### 현재 이벤트의 인스턴스 아이디가 Key로 저장되어 있는 경우
 69 | - 이미 리전이나 스팟 여부 등의 정보를 수집해온 인스턴스 아이디가 Dictionary에 존재하기 때문에 현재 이벤트 발생 시간만을 기록한다.
 70 | - 이미 인스턴스 아이디가 있는데, RunInstance 인 것은 이벤트 오류로 일반적이지 않은 경우이기에 무시한다.
 71 | - 지금 이벤트 시간이 인스턴스 정보 중 최근 Start Time 시간으로 저장되어 있지 않는다면 현재 이벤트 시간을 최신으로 반영하여 저장한다.
 72 | 
 73 | 
 74 | ## def get_stop_instances(mode, cloudtrail, response, all_daily_instance, END_DATE):
 75 |     : It stores the instance information of the 'terminate' and 'stop' state.
 76 | - 'stop'과 'terminate' 이벤트로 검색된 목록은 모두 검색일 당일 인스턴스가 꺼졌다는 것을 의미한다.
 77 | - 인스턴스 사용량을 저장하기 위하여 모든 인스턴스 아이디를 Dictionary의 Key로 저장한다.
 78 | - 'stop'과 'terminate' 이벤트의 응답 결과는 완전히 일치하기 때문에 인스턴스 아이디가 Dictionary에 존재하는지만 판단한다.
 79 | 
 80 | ### 현재 이벤트의 인스턴스 아이디가 Key로 저장되어 있는 경우
 81 | - 현재 이벤트가 Key로 저장되어 있다는 것은 검색하는 당일에 인스턴스가 켜졌다는 것을 의미한다.
 82 | - 검색 순서 상 'run' -> 'start' -> 'terminate' -> 'stop' 이기 때문에, 인스턴스 아이디가 존재한다는 것은 당일에 켜진 인스턴스가 존재한다는 의미이다.
 83 | - 따라서 현재 이벤트의 발생 시간을 Stop Time으로 저장하여 인스턴스의 종료 시간을 수집한다.
 84 |     - 현재 기록되어 있는 Start Time에 대응하는 Stop Time 값을 저장한다.
 85 |     - 특정한 조건을 통해 이벤트의 중복을 체크한다.
 86 |         1. 기록된 인스턴스의 시작 시간이 검색 시간과 동일하다.
 87 |         2. 인스턴스 사용량 기록이 단 한 개만 존재한다.
 88 |         - 이는 'stop' 이나 'terminate' 이벤트의 중복을 의미한다.
 89 |         - 기존에 존재하는 시간과 비교하여 더 빨리 실행된 이벤트의 시간을 Stop Time으로 기록한다.
 90 |     - Start Time 보다 Stop Time 이 더 늦게 저장되는지 확인하고 그 값을 기록한다.
 91 |     - Start Time 보다 빠른 Stop Time 값이 온다면, 이전에 저장한 이벤트의 중복이라는 의미임으로 이전에 저장한 Stop Time 값을 수정한다.
 92 | 
 93 | ### 현재 이벤트의 인스턴스 아이디가 Key로 없는 경우
 94 | - 검색일 보다 이전에 시작된 인스턴스가 존재하여 검색 당일에는 시작된 인스턴스가 없는 경우 종료 이벤트임에도 불구하고 Dictionary에서 Key를 찾을 수 없다.
 95 | - 이 경우 사용량 리스트에 새로운 인스턴스의 정보를 추가하는 함수 add_new_instance_information()를 호출하여 인스턴스 정보를 기록한다.
 96 | 
 97 | 
 98 | 
 99 | ## def get_instance_ids(events):
100 |     : Collect instance IDs to extract information for all instances in an event
101 | - 하나의 이벤트 값 안에 여러 개의 인스턴스가 실행된 경우, 모든 인스턴스의 아이디를 수집한다.
102 | - 콘솔에서 2개 이상의 인스턴스에 run, start, stop, terminate를 할 시 하나의 이벤트로 기록되어 시간 수집이 누락되는 경우가 있었다.
103 | - 이를 방지하고자 모든 인스턴스 아이디를 제대로 수집할 수 있도록 한다.
104 | 
105 | 
106 | ## def add_new_instance_information(cloudtrail, instance_id, all_daily_instance, event_time, END_DATE):
107 |     : Collect information when the input instance has new information
108 | - Run-Start-Terminate-Stop 순으로 이벤트를 수집하는데, Stop 또는 Terminate 이벤트를 실행할 때 검색 당일 Run 또는 Start 를 하지 않았을 경우 인스턴스의 정보 수집을 위해 실행한다.
109 | - 코드 재사용성을 높이기 위해 함수로 제작하였다.
110 |     - 같은 인스턴스에서 Terminate 후 중복으로 Terminate 되는 경우가 있다.
111 |     - 리스트의 인덱스를 잘못 지정하였을 때 생기는 에러를 방지하고자 Run이나 Start를 하지 않았을 경우에는 인스턴스 목록 자체를 날린다.
112 |     - 이때, 인스턴스 정보를 새로 생성하기 위한 코드를 실행해야 하기에, 재사용을 위해 함수로 제작하였다.
113 | 
114 | 
115 | ## def search_instance_information(cloudtrail, run_instance_id, daily_instances, END_DATE):
116 |     : Call other functions to get information about the 'run instance'.
117 | - 받아온 인스턴스 아이디를 토대로 인스턴스가 처음 생성되었을 때의 정보를 가지고 있는 run instance event를 찾는다.
118 | - get_run_instance()를 호출하여 이벤트를 검색하는데, 인스턴스의 이벤트가 50개 이내일 경우 단 한 번의 함수 호출로도 이벤트를 찾아낼 수 있다.
119 | - 50개 이내에 찾고자 했던 run instance event 정보가 있었다면 get_run_instance_information() 함수를 호출하여 인스턴스 정보를 수집한 후 이 정보를 반환한다.
120 | - 만약 50개 이상의 이벤트가 존재하여 한 번의 검색으로 찾을 수 없었다면, 그 다음 50개를 검색하기 위해 존재하는 token 값을 이용하여 다음 50개 항목 중에 run instance event를 찾아낸다.
121 |     - 이때, 인스턴스 아이디의 가장 첫번째 이벤트가 run instance event 라는 점을 이용하여 token 값이 없을 때까지 검색을 시도하고, token의 값이 없을 때의 검색 결과를 get_run_instance_information() 함수의 파라미터로 넣어 인스턴스의 정보를 알아낸다.
122 | 
123 | 
124 | ## def get_run_instance_information(events, run_instance_id, daily_instances):
125 |     : Store the necessary information from the extracted data.
126 | - 검색된 결과에서 인스턴스 아이디를 Key로 하여 Dictionary에 필요한 데이터를 저장한다.
127 | - run instance event에서만 알 수 있는 정보가 저장되며, 인스턴스 유형(ex. t2.micro), 스팟 인스턴스 여부 및 네임 태그 정보를 수집할 수 있다.
128 | 
129 | 
130 | ## def get_spot_requests_information(region, instance_id, search_date):
131 |     : Find the stop time recorded on spot request.
132 | - 스팟 리퀘스트 요청 시 캔슬 시간을 지정한 경우 Terminate event 가 기록되지 않는다.
133 | - 이를 찾기 위해 스팟 리퀘스트 요청에 포함된 캔슬 시간 정보를 검색해 온다.
134 | - 모든 과정에서 캔슬 시간을 찾지 못한 경우 캔슬 시간이 정의되어 있지 않은 것으로 판단되며, 값을 찾지 않는다.
135 | 
136 | 
137 | ## def create_message(all_daily_instance, search_date):
138 |     : Create a message to send to Slack.
139 | - 슬랙에 보낼 메세지를 생성한다.
140 | - message : 인스턴스 사용량을 저장한다.
141 | - count : 인스턴스 사용 횟수를 저장한다.
142 | - 인스턴스 사용량이 저장된 Dictionary를 순차적으로 돌며 메세지를 생성한다.
143 | 
144 | ### 인스턴스 사용량 시간 메세지 생성
145 | - 인스턴스 사용량은 일반적으로 시작부터 종료 시간까지를 기록한 후 종료 시간에서 시작 시간을 빼 계산한다.
146 | - 그러나 일부 특이한 케이스는 다르게 계산한다.
147 |     시작 시간은 존재하나 종료 시간이 없는 경우
148 |     - 인스턴스의 Start Time은 기록되어 있으나 매치되는 Stop Time이 없을 경우에 해당한다.
149 |     - 이 경우에는 인스턴스를 시작하였으나 종료를 하지 않았다는 것을 의미한다.
150 |     - 이는 주로 마지막에만 실행될 수 있는 케이스이기 때문에, 저장된 시간이 마지막이 아니라면 Start Instance Event 중복으로 간주하고 무시한다.
151 |     - 해당 사용량은 검색 기준으로 살펴보았을 때 인스턴스가 종료되지 않았다는 점을 참고하여 "인스턴스 실행 중" 이라고 확인된다.
152 | 
153 | ### KeyError
154 | - 검증되지 않은 케이스에서 KeyError가 발생할 수 있다. 이 경우 개발 과정에서 확인할 수 없었던 이벤트가 발생한 것임으로 인스턴스 정보를 수집하는 것과 관련된 함수를 찾아 디버그해야 한다.
155 | 
156 | 
157 | ## def push_slack(message):
158 |     : Push a message to Slack.
159 | - 슬랙에 메세지를 보낸다.
160 | 
161 | 
162 | ## def lambda_handler(event, context):
163 | ### 클라우드 트레일 서비스 검색 필터를 위한 DATE 정보 수집
164 | - 클라우드 트레일 서비스에서 정확한 기간동안 로그를 수집하기 위해 UTC TIME을 기반으로 검색일을 수집한다.
165 | - 검색 일자를 header 변수에 저장하여, 슬렉에 전달한다.
166 | 
167 | ### 코드 동작 설명
168 | - 현재 계정에 활성화된 모든 리전을 검색하고 각 리전을 확인한다.
169 | - 인스턴스 사용량을 검색한다.
170 | - 각 리전 별로 인스턴스 사용량을 받아오고, 슬랙에 만들 메세지로 생성한다.
171 | - 실행한 인스턴스가 한 개 이상 존재하면 슬랙으로 메세지를 보낸다.
172 | - 실행한 인스턴스가 한 개도 없을 시 인스턴스를 사용하지 않았다는 메세지를 보낸다.
173 | - 대규모 실험으로 인해 Timeout과 같은 과부하가 발생하지 않도록 사용량 수집 시간을 4분 30초로 제한한다.
174 |     - 제한된 시간 안에 생성된 메세지는 슬랙으로 보내고, 이후 인스턴스 사용량이 많아 수집을 중단한다는 메세지를 덧붙인다.


--------------------------------------------------------------------------------
/monitor/spot_management/aws_daily_instance_usage_report.py:
--------------------------------------------------------------------------------
  1 | # This file is reporting to you on daily instance usage used to cloud trail service.
  2 | # But cloud trail service is not frendly, so it's impossible to process duplicate searches.
  3 | # Therefore, please note that the code may be a little complicated and inefficient.
  4 | 
  5 | import boto3
  6 | import json, urllib.request, os
  7 | from datetime import datetime, timezone, timedelta
  8 | from slack_msg_sender import send_slack_message
  9 | 
 10 | 
 11 | SLACK_URL = os.environ['SLACK_DDPS']
 12 | 
 13 | 
 14 | # daily_instance_usage() : Collect instance information that 'run', 'start', 'terminate', and 'stop' for each region.
 15 | def daily_instance_usage(region):
 16 |     all_daily_instance = {}
 17 |     search_modes = ["RunInstances", "StartInstances", "TerminateInstances", "StopInstances", "BidEvictedEvent"]
 18 |     check_mode = [True, True, False, False, False]
 19 |     
 20 |     # store cloud trail logs of all region
 21 |     for i, mode in enumerate(search_modes):
 22 |         try:
 23 |             cloudtrail = boto3.client('cloudtrail', region_name=region)
 24 | 
 25 |             response_list = []
 26 |             token, response = search_instances(cloudtrail, "EventName", mode, False, 0, None)
 27 |             response_list.append(response)
 28 | 
 29 |             while(token):
 30 |                 if response.get('NextToken') != None:
 31 |                     token_code = response['NextToken']
 32 | 
 33 |                 token, response = search_instances(cloudtrail, "EventName", mode, token, 0, token_code)
 34 |                 response_list.append(response)
 35 |         except:
 36 |             send_slack_message(f'An exception that occurred while getting the result of cloud trail query response about {mode} events in {region}')
 37 | 
 38 |         try:            
 39 |             # call the following functions according to the selected mode
 40 |             # parameter description : prevents duplicate searches, act the selected mode, and extracts data from results
 41 |             for response in response_list:
 42 |                 if check_mode[i]:
 43 |                     all_daily_instance = get_start_instances(mode, cloudtrail, response, all_daily_instance)
 44 |                 else:
 45 |                     all_daily_instance = get_stop_instances(mode, cloudtrail, response, all_daily_instance)
 46 | 
 47 |         except Exception as e:
 48 |             send_slack_message(f'An Exception that occurred while collecting instance usage information in {region}\n Check the error message: {e}')
 49 |     return all_daily_instance
 50 | 
 51 | 
 52 | # search_instances() : search the instance as cloud trail service.
 53 | def search_instances(cloudtrail, eventname, item, token, period, token_code):
 54 |     # search the instances
 55 |     response = []
 56 |     if token:
 57 |         response = cloudtrail.lookup_events(
 58 |             EndTime = end_datetime,
 59 |             LookupAttributes = [
 60 |                 {
 61 |                     "AttributeKey": eventname,
 62 |                     "AttributeValue": item
 63 |                 },
 64 |             ],
 65 |             StartTime = (start_datetime - timedelta(days=period)),
 66 |             NextToken = token_code
 67 |         )
 68 |     else:
 69 |         response = cloudtrail.lookup_events(
 70 |             EndTime = end_datetime,
 71 |             LookupAttributes = [
 72 |                 {
 73 |                     "AttributeKey": eventname,
 74 |                     "AttributeValue": item
 75 |                 },
 76 |             ],
 77 |             StartTime = (start_datetime - timedelta(days=period))
 78 |         )
 79 | 
 80 |     if response.get('NextToken') == None:
 81 |         token = False
 82 |     else:
 83 |         token = True
 84 |     return token, response
 85 | 
 86 | 
 87 | # get_start_instances() : It stores the instance information of the 'run' and 'start' state.
 88 | def get_start_instances(mode, cloudtrail, response, all_daily_instance):
 89 |     for events in response['Events']:
 90 |         instance_ids, event_time = get_instance_ids(events)
 91 | 
 92 |         try:
 93 |             if instance_ids == None:
 94 |                 event_informations = json.loads(events['CloudTrailEvent'])
 95 |                 if event_informations['responseElements'].get('omitted'):
 96 |                     request_number = (event_informations['requestParameters']['instancesSet'].get('items'))[0]['maxCount']
 97 |                     if 'SpotRquests' not in all_daily_instance:
 98 |                         all_daily_instance['SpotRquests'] = {'Number': request_number}
 99 |                     else:
100 |                         all_daily_instance['SpotRquests']['Number'] += request_number
101 |                 continue
102 |         except:
103 |             send_slack_message(f'An exception that occurred in the process of determining how many spot request requests there were when there was no instance_id')
104 | 
105 |         try:
106 |             for instance_id, spot_request_id in instance_ids:
107 |                 # store new instance information
108 |                 if instance_id not in all_daily_instance:
109 |                     try:
110 |                         all_daily_instance[instance_id] = {'state': [{'StartTime': event_time}], 'spot_request_id': spot_request_id}
111 |                         if mode == "RunInstances":
112 |                             all_daily_instance = get_run_instance_information(events, instance_id, all_daily_instance)
113 |                         else:
114 |                             all_daily_instance = search_instance_information(cloudtrail, instance_id, all_daily_instance)
115 |                     except:
116 |                         send_slack_message(f'An exception that occurred in storing the new instance information of {instance_id} during the "get_start_instances()" function.')
117 | 
118 |                 # add the start time information of instance to daily instance list
119 |                 else:
120 |                     # Ignore RunInstances event duplication
121 |                     if mode == "RunInstances":
122 |                         continue
123 |                     try:
124 |                         sequence = len(all_daily_instance[instance_id]['state']) - 1
125 |                         if event_time != all_daily_instance[instance_id]['state'][sequence]['StartTime']:
126 |                             all_daily_instance[instance_id]['state'].append({'StartTime': event_time})
127 |                     except:
128 |                         send_slack_message(f'An exception that occurred in storing the start instance event information of {instance_id}')
129 |         except ValueError as e:
130 |             send_slack_message(f'An exception that occurred in running "get_start_instances()" function.\nCheck the error message: {e}')
131 | 
132 |     return all_daily_instance
133 | 
134 | 
135 | # get_stop_instances() : It stores the instance information of the 'terminate' and 'stop' state.
136 | def get_stop_instances(mode, cloudtrail, response, all_daily_instance):
137 |     for events in response['Events']:
138 |         instance_ids, event_time = None, None
139 |         if mode == "BidEvictedEvent":
140 |             instance_ids, event_time = get_interrupt_instance_ids(events)
141 |         else:
142 |             instance_ids, event_time = get_instance_ids(events)
143 | 
144 |         if instance_ids == None:
145 |             continue
146 |         try:
147 |             for instance_id, _ in instance_ids:
148 |                 # add the stop time information of instance to daily instance list
149 |                 if instance_id in all_daily_instance:
150 |                     try:
151 |                         for sequence, instance_state in enumerate(all_daily_instance[instance_id]['state']):
152 |                             start_time = instance_state.get('StartTime')
153 |                             if start_datetime == start_time and len(all_daily_instance[instance_id]['state']) == 1:
154 |                                 if instance_state.get('StopTime') > event_time:
155 |                                     del all_daily_instance[instance_id]
156 |                                     add_new_instance_information(cloudtrail, instance_id, all_daily_instance, event_time)
157 |                                 continue
158 | 
159 |                             if start_time <= event_time:
160 |                                 instance_state['StopTime'] = event_time
161 |                             else:
162 |                                 previous_start_time = all_daily_instance[instance_id]['state'][sequence-1].get('StartTime')
163 |                                 previous_stop_time = all_daily_instance[instance_id]['state'][sequence-1].get('StopTime')
164 |                                 if previous_start_time != None and previous_stop_time != None and previous_start_time < event_time and previous_stop_time > event_time:
165 |                                     all_daily_instance[instance_id]['state'][sequence-1]['StopTime'] = event_time
166 |                     except:
167 |                         send_slack_message(f'An exception that occurred in getting the stop time information of {instance_id}.')
168 | 
169 |                 # store new instance information
170 |                 else:
171 |                     try:
172 |                         # Start only terminate
173 |                         if mode == "TerminateInstances":
174 |                             continue
175 |                         all_daily_instance = add_new_instance_information(cloudtrail, instance_id, all_daily_instance, event_time)
176 |                     except:
177 |                         send_slack_message(f'An exception that occurred in storing the new instance information of {instance_id} during the running "get_stop_instances()" function.')
178 |         except ValueError as e:
179 |             send_slack_message(f'An exception that occurred in running "get_stopt_instances()" function.\nCheck the error message: {e}')
180 |     return all_daily_instance
181 | 
182 | 
183 | # get_instance_ids() : Collect instance IDs to extract information for all instances in an event
184 | def get_instance_ids(events):
185 |     # get instance id in result of cloud trail service
186 |     event_informations = json.loads(events['CloudTrailEvent'])
187 |     instance_ids = []
188 | 
189 |     if event_informations.get('responseElements') == None:
190 |         try:
191 |             for resource in events['Resources']:
192 |                 if resource['ResourceType'] == 'AWS::EC2::Instance':
193 |                     instance_ids.append((resource['ResourceName'], None))
194 |         except KeyError:
195 |             return None, 0
196 |     
197 |     else:
198 |         if event_informations['responseElements'].get('omitted'):
199 |             return None, 0
200 |         instances = event_informations['responseElements']['instancesSet']['items']
201 |         for instance in instances:
202 |             instance_ids.append((instance.get('instanceId'), instance.get('spotInstanceRequestId', '')))
203 |             
204 |     event_time = events['EventTime'].replace(tzinfo=timezone.utc)
205 | 
206 |     return instance_ids, event_time
207 | 
208 | 
209 | # get_interrupt_instance_ids() : Collect information when the instance had interrupt events
210 | def get_interrupt_instance_ids(events):
211 |     cloud_trail_event = json.loads(events['CloudTrailEvent'])
212 |     instance_ids = [
213 |         (item, None) for item in cloud_trail_event['serviceEventDetails']['instanceIdSet']
214 |     ]
215 |     event_time = events['EventTime'].replace(tzinfo=timezone.utc)
216 |     return instance_ids, event_time
217 | 
218 | 
219 | # add_new_instance_information() : Collect information when the input instance has new information
220 | def add_new_instance_information(cloudtrail, instance_id, all_daily_instance, event_time):
221 |     all_daily_instance[instance_id] = {'state': [{'StartTime': start_datetime,'StopTime': event_time}]}
222 |     all_daily_instance = search_instance_information(cloudtrail, instance_id, all_daily_instance)
223 |     return all_daily_instance
224 | 
225 | 
226 | # search_instance_information() : Call other functions to get information about the 'run instance'.
227 | def search_instance_information(cloudtrail, run_instance_id, all_daily_instance):
228 |     token, response = search_instances(cloudtrail, "ResourceName", run_instance_id, False, 88, None)
229 | 
230 |     if token:
231 |         while(token):
232 |             token, response = search_instances(cloudtrail, "ResourceName", run_instance_id, token, 88, response['NextToken'])
233 |     for events in response['Events']:
234 |         if events.get('EventName') == 'RunInstances':
235 |             all_daily_instance = get_run_instance_information(events, run_instance_id, all_daily_instance)
236 |     if all_daily_instance[run_instance_id].get('UserName') is None:
237 |         for events in response['Events']:
238 |             if events.get('EventName') in ['StartInstances', 'StopInstances', 'TerminateInstances']:
239 |                 all_daily_instance[run_instance_id]['UserName'] = events.get('Username')
240 |                 break
241 |     
242 |     return all_daily_instance
243 | 
244 | 
245 | # get_run_instance_information() : Store the necessary information from the extracted data.
246 | def get_run_instance_information(events, run_instance_id, all_daily_instance):
247 |     event_informations = json.loads(events.get('CloudTrailEvent'))
248 |     all_daily_instance[run_instance_id]['InstanceType'] = event_informations['requestParameters'].get('instanceType')
249 |     all_daily_instance[run_instance_id]['UserName'] = events.get('Username')
250 | 
251 |     if event_informations['requestParameters'].get('instancesSet') != None:
252 |         all_daily_instance[run_instance_id]['KeyName'] = event_informations['requestParameters']['instancesSet']['items'][0].get('keyName')
253 | 
254 |     if event_informations['requestParameters'].get('instanceMarketOptions') != None:
255 |         all_daily_instance[run_instance_id]['Spot'] = True
256 |     else:
257 |         all_daily_instance[run_instance_id]['Spot'] = False
258 | 
259 |     try:
260 |         name_tag = event_informations['requestParameters']['tagSpecificationSet']['items'][0]['tags'][0]['value']
261 |         if name_tag[:4] == "sfr-":
262 |             name_tag = "spot fleet"
263 |         all_daily_instance[run_instance_id]['NameTag'] = name_tag
264 |         if len(all_daily_instance[run_instance_id].get('UserName')) > 10:
265 |             for resource in events.get('Resources'):
266 |                 if resource.get('ResourceType') == 'AWS::EC2::KeyPair':
267 |                     all_daily_instance[run_instance_id]['UserName'] = resource.get('ResourceName')
268 |     except Exception:
269 |         all_daily_instance[run_instance_id]['NameTag'] = all_daily_instance[run_instance_id]['UserName']
270 |         all_daily_instance[run_instance_id]['UserName'] = "aws"
271 | 
272 |     return all_daily_instance
273 | 
274 | 
275 | # get_spot_requests_information() : Find the stop time recorded on spot request
276 | def get_spot_requests_information(region, instance_id, request_id):
277 |     try:
278 |         cloudtrail = boto3.client('cloudtrail', region_name=region)
279 |         if request_id == None:
280 |             _, response = search_instances(cloudtrail, 'Username', instance_id, False, 0, None)
281 |             for events in response['Events']:
282 |                 if events.get('EventName') == 'DescribeSpotInstanceRequests':
283 |                     event_informations = json.loads(events.get('CloudTrailEvent'))
284 |                     request_id = event_informations['requestParameters']['spotInstanceRequestIdSet']['items'][0].get('spotInstanceRequestId')
285 |         _, response = search_instances(cloudtrail, 'ResourceName', request_id, False, 0, None)
286 |         if response['Events'][0]['EventName'] == 'RequestSpotInstances':
287 |             event_informations = json.loads(response['Events'][0].get('CloudTrailEvent'))
288 |             valid_until = event_informations['requestParameters'].get('validUntil')
289 |             stop_time = (datetime.fromtimestamp(valid_until/1000)).replace(microsecond=0, tzinfo=timezone.utc)
290 |         return stop_time
291 |     except:
292 |         return None
293 |     
294 | 
295 | # create_message() : Create a message to send to Slack.
296 | def create_message(region, all_daily_instance):
297 |     message = {'spot': ["",],  'request': "", 'on_demand': ["",]}
298 |     instance_count = 0
299 |     experiment_count = 0
300 |     try:
301 |         for instance_id in all_daily_instance:
302 |             if instance_id == 'SpotRquests':
303 |                 message['request'] += f"{' ':>12}이외 스팟리퀘스트 요청이 {all_daily_instance['SpotRquests']['Number']}건 실행되었습니다.\n"
304 |                 continue
305 |         
306 |             for sequence, instance_state in enumerate(all_daily_instance[instance_id]['state']):
307 |                 state_running = False
308 |                 # when time information about start and stop be in all daily instance
309 |                 try:
310 |                     run_time = instance_state['StopTime'] - instance_state['StartTime']
311 | 
312 |                 # when time information about start or stop not be in all daily instance
313 |                 except KeyError:
314 |                     if sequence == len(all_daily_instance[instance_id]['state']) - 1:
315 |                         spot_request_id = all_daily_instance[instance_id].get('spot_request_id')
316 |                         stop_time = get_spot_requests_information(region, instance_id, spot_request_id)
317 |                         if stop_time != None:
318 |                             run_time = stop_time - instance_state['StartTime']
319 |                         else:
320 |                             run_time = end_datetime - instance_state['StartTime']
321 |                             if all_daily_instance[instance_id]['UserName'] == "InstanceLaunch" and all_daily_instance[instance_id]['KeyName'] == None:
322 |                                 run_time = timedelta(days=0, seconds=0)
323 |                             else:
324 |                                 state_running = True
325 |                     else:
326 |                         continue
327 | 
328 |                 if run_time.days == -1:
329 |                     run_time = (-run_time)
330 | 
331 |                 if run_time.seconds < 5:
332 |                     instance_count += 1
333 |                     experiment_count += 1
334 |                     continue
335 | 
336 |                 # create the message about instance usage
337 |                 if all_daily_instance[instance_id].get('InstanceType') is not None:
338 |                     usage_message = f"{' ':>8}{all_daily_instance[instance_id]['NameTag']} ({all_daily_instance[instance_id]['UserName']}, {instance_id}) / {all_daily_instance[instance_id]['InstanceType']} / "
339 |                 else:
340 |                     usage_message = f"{' ':>8}{all_daily_instance[instance_id]['UserName']} ({all_daily_instance[instance_id]['UserName']}, {instance_id}) / Not-Found / "
341 | 
342 |                 if state_running:
343 |                     usage_message += f"인스턴스 실행 중 ({run_time})\n"
344 |                 else:
345 |                     usage_message += f"{run_time} 간 실행\n"
346 | 
347 |                 # add message depending on whether spot instance is enabled
348 |                 if all_daily_instance[instance_id].get('Spot') in [True, None]:
349 |                     if len(message['spot'][len(message['spot'])-1]) < 3950:
350 |                         message['spot'][len(message['spot'])-1] += usage_message
351 |                     else:
352 |                         message['spot'].append(usage_message)
353 |                 else:
354 |                     if len(message['on_demand'][len(message['on_demand'])-1]) < 3950:
355 |                         message['on_demand'][len(message['on_demand'])-1] += usage_message
356 |                     else:
357 |                         message['on_demand'].append(usage_message)
358 |                 instance_count += 1
359 | 
360 |     except KeyError:
361 |         send_slack_message(f"create_message() : A problem collecting instance information. Related functions is get_run_instance_information() in {region}")
362 |     except Exception as e:
363 |         send_slack_message(f"create_message() : Exception in relation to <{e}> in {region}")
364 |     
365 |     report_message = [f'{region} ({instance_count})\n']
366 |     for kind in message:
367 |         if kind == 'request':
368 |             report_message[len(report_message)-1] += message[kind]
369 |             continue
370 |         if kind == 'spot':
371 |             emoji = ":large_blue_diamond:"
372 |         else:
373 |             emoji = ":large_orange_diamond:"
374 |         for sequence in range(len(message[kind])):
375 |             if message[kind][sequence] != "":
376 |                 if sequence == 0:
377 |                     report_message[len(report_message)-1] += f"{' ':>8}{kind} {emoji}\n"
378 |                 message_block = f"```{message[kind][sequence]}```"
379 |                 if len(report_message[len(report_message)-1]) + len(message_block) < 4000:
380 |                     report_message[len(report_message)-1] += message_block
381 |                 else:
382 |                     report_message.append(message_block)
383 |     if experiment_count > 0:
384 |         report_message[len(report_message)-1] += f"{' ':4}실험을 위한 {experiment_count}개의 인스턴스가 3초 이내로 실행되었습니다.\n"
385 | 
386 |     return report_message
387 |     
388 | 
389 | # push_slack() : Push a message to Slack.
390 | def push_slack(message):
391 |     payload = {"text": message}
392 |     data = json.dumps(payload).encode("utf-8")
393 | 
394 |     req = urllib.request.Request(SLACK_URL)
395 |     req.add_header("Content-Type", "application/json")
396 |     return urllib.request.urlopen(req, data)
397 | 
398 | 
399 | def lambda_handler(event, context):
400 |     # setting datetime informations for searching daily logs in cloud trail service
401 |     global search_datetime, start_datetime, end_datetime
402 |     utc_datetime = datetime.now(timezone.utc)
403 |     search_datetime = utc_datetime + timedelta(days=-1, hours=9)
404 |     start_datetime = ((utc_datetime + timedelta(days=-1)).astimezone(timezone(timedelta(hours=9)))).replace(hour=0, minute=0, second=0, microsecond=0)
405 |     end_datetime = ((utc_datetime + timedelta(days=-1)).astimezone(timezone(timedelta(hours=9)))).replace(hour=23, minute=59, second=59, microsecond=0)
406 | 
407 |     # creating head message
408 |     header = f"*Daily Instances Usage Report (DATE: {search_datetime.strftime('%Y-%m-%d')})*"
409 |     all_message = []
410 |     stop_message = [False, "*생성된 인스턴스의 수가 많아 인스턴스 사용량 전달을 중단합니다.*"]
411 | 
412 |     # created region list and called main function
413 |     ec2 = boto3.client('ec2')
414 |     regions = [region['RegionName'] for region in ec2.describe_regions()['Regions']]
415 |     for region in regions:
416 |         if (datetime.now(timezone.utc) - utc_datetime).seconds > 270:
417 |             stop_message[0] = True
418 |             break
419 |             
420 |         all_daily_instance = daily_instance_usage(region)
421 | 
422 |         # created message to slack and pushed to slack
423 |         if len(all_daily_instance) != 0:
424 |             all_message.append(create_message(region, all_daily_instance))
425 |          
426 |     push_slack(header)
427 |     if len(all_message) != 0:
428 |         for region_message in all_message:
429 |             for message in region_message:
430 |                 push_slack(message)
431 |         if stop_message[0]:
432 |             push_slack(stop_message[1])
433 |     else:
434 |         push_slack("Instances not used.")
435 | 
436 |     return "perfect jobs. check the slack message, plz."


--------------------------------------------------------------------------------