├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── CWLogsToOpenSearch
    └── index.js
├── CWMetricsToOpenSearch
    ├── es_sink
    │   ├── __init__.py
    │   ├── __version__.py
    │   ├── descriptor.py
    │   ├── es_auth.py
    │   ├── es_transport.py
    │   ├── examples.py
    │   ├── flushing_buffer.py
    │   ├── line_buffer.py
    │   ├── sqs_transport.py
    │   ├── transport_exceptions.py
    │   ├── transport_result.py
    │   └── transport_utils.py
    ├── handler.py
    └── requirements.txt
├── LICENSE
├── README.md
├── app.py
├── bootstrap.sh
├── cdk.json
├── images
    ├── amazon_opensearch_service_monitor_framework.png
    ├── opensearch_collection_metrics.png
    ├── opensearch_collection_overview.png
    ├── opensearch_dashboards_list.png
    ├── opensearch_dashboards_login.png
    ├── opensearch_dashboards_select.png
    ├── opensearch_domain_metrics_at_a_glance.png
    ├── opensearch_domain_overview.png
    ├── opensearch_monitor_bootstrap.png
    ├── opensearch_monitor_bootstrap_cdk.png
    ├── opensearch_monitor_clone.png
    ├── opensearch_monitor_deploy.png
    ├── opensearch_monitor_destroy.png
    ├── opensearch_monitor_post_deploy.png
    └── opensearch_monitor_post_destroy.png
├── opensearch
    ├── __init__.py
    ├── create_alerts.sh
    ├── export_opensearch_dashboards_V1_0.ndjson
    ├── nginx_opensearch.conf
    ├── opensearch_monitor_stack.py
    └── setupCWSubscriptionFilter.py
├── requirements.txt
└── setup.py


/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 


--------------------------------------------------------------------------------
/CWLogsToOpenSearch/index.js:
--------------------------------------------------------------------------------
  1 | 
  2 | // v1.1.2
  3 | var https = require('https');
  4 | var zlib = require('zlib');
  5 | var crypto = require('crypto');
  6 | 
  7 | // Load Amazon ES Domain from env variable 
  8 | var endpoint = process.env.DOMAIN_ENDPOINT;
  9 | 
 10 | // Set this to true if you want to debug why data isn't making it to
 11 | // your Opensearch cluster. This will enable logging of failed items
 12 | // to CloudWatch Logs.
 13 | var logFailedResponses = false;
 14 | 
 15 | exports.handler = function(input, context) {
 16 |     // decode input from base64
 17 |     var zippedInput = new Buffer.from(input.awslogs.data, 'base64');
 18 | 
 19 |     // decompress the input
 20 |     zlib.gunzip(zippedInput, function(error, buffer) {
 21 |         if (error) { context.fail(error); return; }
 22 | 
 23 |         // parse the input from JSON
 24 |         var awslogsData = JSON.parse(buffer.toString('utf8'));
 25 |         
 26 |         // transform the input to Opensearch documents
 27 |         var opensearchBulkData = transform(awslogsData);
 28 | 
 29 |         // skip control messages
 30 |         if (!opensearchBulkData) {
 31 |             console.log('Received a control message');
 32 |             context.succeed('Control message handled successfully');
 33 |             return;
 34 |         }
 35 | 
 36 |         // post documents to the Amazon Opensearch Service
 37 |         post(opensearchBulkData, function(error, success, statusCode, failedItems) {
 38 |             console.log('Response: ' + JSON.stringify({
 39 |                 "statusCode": statusCode
 40 |             }));
 41 | 
 42 |             if (error) {
 43 |                 logFailure(error, failedItems);
 44 |                 context.fail(JSON.stringify(error));
 45 |             } else {
 46 |                 console.log('Success: ' + JSON.stringify(success));
 47 |                 context.succeed('Success');
 48 |             }
 49 |         });
 50 |     });
 51 | };
 52 | 
 53 | function transform(payload) {
 54 |     if (payload.messageType === 'CONTROL_MESSAGE') {
 55 |         return null;
 56 |     }
 57 | 
 58 |     var bulkRequestBody = '';
 59 | 
 60 |     payload.logEvents.forEach(function(logEvent) {
 61 |         var timestamp = new Date(1 * logEvent.timestamp);
 62 | 
 63 |         // index name format: cwl-YYYY.MM.DD
 64 |         var indexName = [
 65 |             'cwl-' + timestamp.getUTCFullYear(),              // year
 66 |             ('0' + (timestamp.getUTCMonth() + 1)).slice(-2),  // month
 67 |             ('0' + timestamp.getUTCDate()).slice(-2)          // day
 68 |         ].join('.');
 69 | 
 70 |         var source = buildSource(logEvent.message, logEvent.extractedFields);
 71 |         source['@id'] = logEvent.id;
 72 |         source['@timestamp'] = new Date(1 * logEvent.timestamp).toISOString();
 73 |         source['@message'] = logEvent.message;
 74 |         source['@owner'] = payload.owner;
 75 |         source['@log_group'] = payload.logGroup;
 76 |         source['@log_stream'] = payload.logStream;
 77 | 
 78 | 
 79 |         var action = { "index": {} };
 80 |         action.index._index = indexName;
 81 |         action.index._id = logEvent.id;
 82 |         
 83 |         // Use pipeline for application log and slow logs as they does not come as json
 84 |         if (payload.logStream.includes("-es-application-logs")) {
 85 |             action.index.pipeline = "application-logs";
 86 |             source['@cw_log_type'] = "application-logs";
 87 |         } else if (payload.logStream.includes("-search-slow-logs")) {
 88 |             action.index.pipeline = "search-slow-logs";
 89 |             source['@cw_log_type'] = "search-slow-logs";
 90 |         } else if (payload.logStream.includes("-index-slow-logs")) {
 91 |             action.index.pipeline = "index-slow-logs";
 92 |             source['@cw_log_type'] = "index-slow-logs";
 93 |         } else if (payload.logStream.includes("-audit-logs")) {
 94 |             source['@cw_log_type'] = "audit-logs";
 95 |         }
 96 |         
 97 |         // Extrtact domain name from audit_cluster_name which has the value as accountId:domainName fr other logs fetch second last items from group name assuming it has the format /aws/aes/domains/cluster_name/log_group_name
 98 |         if (source["audit_cluster_name"]) {
 99 |             source["domain_name"] = source["audit_cluster_name"].substring(source["audit_cluster_name"].indexOf(':') + 1);
100 |         } else {
101 |             source["domain_name"] = source['@log_group'].split("/").reverse()[1];
102 |         }
103 | 
104 |     
105 |         bulkRequestBody += [
106 |             JSON.stringify(action),
107 |             JSON.stringify(source),
108 |         ].join('\n') + '\n';
109 |     });
110 |     return bulkRequestBody;
111 | }
112 | 
113 | function buildSource(message, extractedFields) {
114 |     if (extractedFields) {
115 |         var source = {};
116 | 
117 |         for (var key in extractedFields) {
118 |             if (extractedFields.hasOwnProperty(key) && extractedFields[key]) {
119 |                 var value = extractedFields[key];
120 | 
121 |                 if (isNumeric(value)) {
122 |                     source[key] = 1 * value;
123 |                     continue;
124 |                 }
125 | 
126 |                 var jsonSubString = extractJson(value);
127 |                 if (jsonSubString !== null) {
128 |                     source['$' + key] = JSON.parse(jsonSubString);
129 |                 }
130 | 
131 |                 source[key] = value;
132 |             }
133 |         }
134 |         return source;
135 |     }
136 | 
137 |     var jsonSubString = extractJson(message);
138 |     if (jsonSubString !== null) {
139 |         return JSON.parse(jsonSubString);
140 |     }
141 | 
142 |     return {};
143 | }
144 | 
145 | function extractJson(message) {
146 |     var jsonStart = message.indexOf('{');
147 |     if (jsonStart < 0) return null;
148 |     var jsonSubString = message.substring(jsonStart);
149 |     return isValidJson(jsonSubString) ? jsonSubString : null;
150 | }
151 | 
152 | function isValidJson(message) {
153 |     try {
154 |         JSON.parse(message);
155 |     } catch (e) { return false; }
156 |     return true;
157 | }
158 | 
159 | function isNumeric(n) {
160 |     return !isNaN(parseFloat(n)) && isFinite(n);
161 | }
162 | 
163 | function post(body, callback) {
164 |     var requestParams = buildRequest(endpoint, body);
165 | 
166 |     var request = https.request(requestParams, function(response) {
167 |         var responseBody = '';
168 |         response.on('data', function(chunk) {
169 |             responseBody += chunk;
170 |         });
171 | 
172 |         response.on('end', function() {
173 |             var info = JSON.parse(responseBody);
174 |             var failedItems;
175 |             var success;
176 |             var error;
177 | 
178 |             if (response.statusCode >= 200 && response.statusCode < 299) {
179 |                 failedItems = info.items.filter(function(x) {
180 |                     return x.index.status >= 300;
181 |                 });
182 | 
183 |                 success = {
184 |                     "attemptedItems": info.items.length,
185 |                     "successfulItems": info.items.length - failedItems.length,
186 |                     "failedItems": failedItems.length
187 |                 };
188 |             }
189 | 
190 |             if (response.statusCode !== 200 || info.errors === true) {
191 |                 // prevents logging of failed entries, but allows logging
192 |                 // of other errors such as access restrictions
193 |                 delete info.items;
194 |                 error = {
195 |                     statusCode: response.statusCode,
196 |                     responseBody: info
197 |                 };
198 |             }
199 | 
200 |             callback(error, success, response.statusCode, failedItems);
201 |         });
202 |     }).on('error', function(e) {
203 |         callback(e);
204 |     });
205 |     request.end(requestParams.body);
206 | }
207 | 
208 | function buildRequest(endpoint, body) {
209 |     var endpointParts = endpoint.match(/^([^\.]+)\.?([^\.]*)\.?([^\.]*)\.amazonaws\.com$/);
210 |     var region = endpointParts[2];
211 |     var service = endpointParts[3];
212 |     var datetime = (new Date()).toISOString().replace(/[:\-]|\.\d{3}/g, '');
213 |     var date = datetime.substr(0, 8);
214 |     var kDate = hmac('AWS4' + process.env.AWS_SECRET_ACCESS_KEY, date);
215 |     var kRegion = hmac(kDate, region);
216 |     var kService = hmac(kRegion, service);
217 |     var kSigning = hmac(kService, 'aws4_request');
218 | 
219 |     var request = {
220 |         host: endpoint,
221 |         method: 'POST',
222 |         path: '/_bulk',
223 |         body: body,
224 |         headers: {
225 |             'Content-Type': 'application/json',
226 |             'Host': endpoint,
227 |             'Content-Length': Buffer.byteLength(body),
228 |             'X-Amz-Security-Token': process.env.AWS_SESSION_TOKEN,
229 |             'X-Amz-Date': datetime
230 |         }
231 |     };
232 | 
233 |     var canonicalHeaders = Object.keys(request.headers)
234 |         .sort(function(a, b) { return a.toLowerCase() < b.toLowerCase() ? -1 : 1; })
235 |         .map(function(k) { return k.toLowerCase() + ':' + request.headers[k]; })
236 |         .join('\n');
237 | 
238 |     var signedHeaders = Object.keys(request.headers)
239 |         .map(function(k) { return k.toLowerCase(); })
240 |         .sort()
241 |         .join(';');
242 | 
243 |     var canonicalString = [
244 |         request.method,
245 |         request.path, '',
246 |         canonicalHeaders, '',
247 |         signedHeaders,
248 |         hash(request.body, 'hex'),
249 |     ].join('\n');
250 | 
251 |     var credentialString = [ date, region, service, 'aws4_request' ].join('/');
252 | 
253 |     var stringToSign = [
254 |         'AWS4-HMAC-SHA256',
255 |         datetime,
256 |         credentialString,
257 |         hash(canonicalString, 'hex')
258 |     ] .join('\n');
259 | 
260 |     request.headers.Authorization = [
261 |         'AWS4-HMAC-SHA256 Credential=' + process.env.AWS_ACCESS_KEY_ID + '/' + credentialString,
262 |         'SignedHeaders=' + signedHeaders,
263 |         'Signature=' + hmac(kSigning, stringToSign, 'hex')
264 |     ].join(', ');
265 | 
266 |     return request;
267 | }
268 | 
269 | function hmac(key, str, encoding) {
270 |     return crypto.createHmac('sha256', key).update(str, 'utf8').digest(encoding);
271 | }
272 | 
273 | function hash(str, encoding) {
274 |     return crypto.createHash('sha256').update(str, 'utf8').digest(encoding);
275 | }
276 | 
277 | function logFailure(error, failedItems) {
278 |     if (logFailedResponses) {
279 |         console.log('Error: ' + JSON.stringify(error, null, 2));
280 | 
281 |         if (failedItems && failedItems.length > 0) {
282 |             console.log("Failed Items: " +
283 |                 JSON.stringify(failedItems, null, 2));
284 |         }
285 |     }
286 | }
287 | 


--------------------------------------------------------------------------------
/CWMetricsToOpenSearch/es_sink/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ['descriptor',
2 |            'es_transport',
3 |            'flushing_buffer',
4 |            'line_buffer',
5 |            'sqs_transport',
6 |            'transport_exceptions',
7 |            'transport_result',
8 |            'transport_utils']


--------------------------------------------------------------------------------
/CWMetricsToOpenSearch/es_sink/__version__.py:
--------------------------------------------------------------------------------
1 | VERSION = (0, 1, 2)
2 | 
3 | __version__ = '.'.join(map(str, VERSION))


--------------------------------------------------------------------------------
/CWMetricsToOpenSearch/es_sink/descriptor.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright 2020, Amazon Web Services Inc.
  3 | This code is licensed under MIT license (see LICENSE.txt for details)
  4 | 
  5 | Python 3
  6 | 
  7 | Provides a buffer object that holds log lines in Elasticsearch _bulk
  8 | format. As each line is added, the buffer stores the control line
  9 | as well as the log line.
 10 | '''
 11 | 
 12 | 
 13 | from collections import namedtuple
 14 | 
 15 | 
 16 | from es_sink.es_auth import ESAuth, ESNoAuth, ESSigV4Auth, ESHttpAuth
 17 | from es_sink.transport_utils import now_pst
 18 | 
 19 | 
 20 | class SQSDescriptor():
 21 |     '''Description of an SQS queue. Enables generalization of sink targets'''
 22 |     def __init__(self, q_url, region):
 23 |         '''An SQS queue has a URL and a region'''
 24 |         self._sqs_url = q_url
 25 |         self._region = region
 26 | 
 27 |     @property
 28 |     def sqs_url(self):
 29 |         '''The target SQS URL'''
 30 |         return self. sqs_url
 31 | 
 32 |     @property
 33 |     def region(self):
 34 |         '''The region of the queue'''
 35 |         return self._region
 36 | 
 37 | 
 38 | # Describes handling of indexes for the ESDescriptor class
 39 | # es_v7             Use ES V7 APIs (no _type, mostly)
 40 | # es_index:         For API calls that use an index
 41 | # es_type:          For ES V6 clusters and calls that use a _type
 42 | # timestamped:      For ES API calls, mostly writes, append _YY.MM.DD
 43 | #                   to the index name
 44 | IndexDescriptor = namedtuple('IndexDescriptor', ['es_index', 'es_type',
 45 |                                                  'es_v7', 'timestamped'],
 46 |                                               defaults=(None, None, True, True))
 47 | 
 48 | 
 49 | class ESDescriptor():
 50 |     """Description of an Elasticsearch endpoint."""
 51 | 
 52 |     def __init__(self, endpoint, index_descriptor, region=None, auth=None):
 53 |         """Describes an ELasticsearch sink.
 54 | 
 55 |            This could be refactored to be a little bit better. As of now, it
 56 |            supports Amazon ES endpoints as well as vanilla ES endpoints. It also
 57 |            supports ES V6 and ES V7 endpoints. These could be mixins.
 58 | 
 59 |            endpoint:            The base url to send REST API calls
 60 |            region:              For Amazon ES domains, the AWS region. E.g.
 61 |                                 us-west-2
 62 |            indexing_descriptor: An IndexingDescriptor as above, specifying the
 63 |             `                   index name, es type, v7 status, and whether to
 64 |                                 create indices with a timestamped name
 65 |            es_auth:             A subclass of the ESAuth class specifying how to
 66 |                                 handle authentication of requests sent to the
 67 |                                 Elasticsearch endpoint.
 68 |         """
 69 |         self._endpoint = endpoint
 70 | 
 71 |         if not isinstance(index_descriptor, IndexDescriptor):
 72 |             raise TypeError('Wrong type for index_descriptor')
 73 |         self._indexing = index_descriptor
 74 | 
 75 |         self._auth = auth
 76 |         if not auth:
 77 |             self._auth = ESNoAuth()
 78 | 
 79 |         if not issubclass(type(auth), ESAuth):
 80 |             raise ValueError('You must use the a child of the ESAuth class')
 81 | 
 82 | 
 83 |         if isinstance(auth, ESSigV4Auth) and not region:
 84 |             raise ValueError('You must specify a region to use SigV4Signing')
 85 |         self._region = region
 86 | 
 87 | 
 88 |     def user_password(self):
 89 |         '''Expose a method to retrieve the username/password.'''
 90 |         if not self._auth or not isinstance(self._auth, ESHttpAuth):
 91 |             raise ValueError('The descriptors authentication is not HTTP')
 92 |         return self._auth.auth_creds()
 93 | 
 94 |     @property
 95 |     def region(self):
 96 |         '''The region of the Amazon ES domain'''
 97 |         return self._region
 98 | 
 99 |     def is_signed(self):
100 |         '''Should requests be signed with AWS SigV4 signing?'''
101 |         return isinstance(self._auth, ESSigV4Auth)
102 | 
103 |     def is_http_auth(self):
104 |         '''Should requests be signed with AWS SigV4 signing?'''
105 |         return isinstance(self._auth, ESHttpAuth)
106 | 
107 |     def auth(self):
108 |         '''Return the auth object passed in to init'''
109 |         return self._auth
110 | 
111 |     def timestamped(self):
112 |         '''Returns true when the index names should carry a timestamp'''
113 |         return self._indexing.timestamped
114 | 
115 |     def _index_name(self):
116 |         ''' Return es_index-YY.MM.DD. Not timezone-aware '''
117 |         if self.timestamped():
118 |             return "{}-{}".format(self._indexing.es_index,
119 |                                   now_pst().strftime("%Y.%m.%d"))
120 |         return self._indexing.es_index
121 | 
122 |     def base_url(self):
123 |         ''' Returns the endpoint. Slash-terminated.'''
124 |         if self._endpoint.endswith('/'):
125 |             return self._endpoint
126 |         return '{}/'.format(self._endpoint)
127 | 
128 |     def base_url_with_index(self):
129 |         '''Returns the endpoint/index, slash terminated. '''
130 |         return '{}{}/'.format(self.base_url(), self._index_name())
131 | 
132 |     def base_url_6(self):
133 |         ''' Returns the endpoint/index/type. Slash-terminated.
134 |             Set timestamped=True to add the YY.MM.DD to the index
135 |             name.'''
136 |         return '{}{}/{}/'.format(self.base_url(), self._index_name(),
137 |                                  self._indexing.es_type)
138 | 
139 |     def base_url_7(self):
140 |         ''' Returns the endpoint/index/. Slash-terminated.
141 |             Set timestamped=True to add the YY.MM.DD to the index
142 |             name.'''
143 |         return '{}{}/'.format(self.base_url(), self._index_name())
144 | 
145 |     def bulk_url(self):
146 |         ''' d - an ESDescriptor. Returns the base url with _bulk.
147 |             This assumes that you do not want index embedded.
148 |             Set timestamped=True to add the YY.MM.DD to the index
149 |             name.'''
150 |         return '{}{}/_bulk'.format(self.base_url(), self._index_name())
151 | 
152 |     def _es_v7(self):
153 |         return self._indexing.es_v7
154 | 
155 |     def search_url(self):
156 |         ''' d - an ESDescriptor. Returns the base url with
157 |             <es_index>/<es_type>/_search handles es v7 by removing the
158 |             type. Set timestamped=True to add the YY.MM.DD to the index
159 |             name.'''
160 |         if self._es_v7():
161 |             return '{}{}/_search'.format(self.base_url(),
162 |                                          self._index_name())
163 | 
164 |         return '{}{}/{}/_search'.format(self.base_url(),
165 |                                         self._index_name(),
166 |                                         self._indexing.es_type)
167 | 
168 |     ACTION_LINE_6 = '{{"index" : {{ "_index" : "{}", "_type": "{}" }} }}'
169 |     ACTION_LINE_7 = '{{"index" : {{ "_index" : "{}" }} }}'
170 |     def bulk_control_line(self):
171 |         ''' Strictly, this shouldn't go in this class. It's not really
172 |             part of a description. OTOH, all the info is here and it will
173 |             save lots of duplicated code.
174 |             Returns the "control" line for a _bulk request. '''
175 |         if self._es_v7():
176 |             return self.ACTION_LINE_7.format(self._index_name())
177 | 
178 |         return self.ACTION_LINE_6.format(self._index_name(),
179 |                                          self._indexing.es_type)
180 | 


--------------------------------------------------------------------------------
/CWMetricsToOpenSearch/es_sink/es_auth.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright 2020, Amazon Web Services Inc.
 3 | This code is licensed under MIT license (see LICENSE.txt for details)
 4 | 
 5 | Python 3
 6 | 
 7 | Provides a class hierarchy to specify authentication method for the
 8 | ESDescriptor.
 9 | 
10 | I'm not clear that the class hierarchy is buying me anything here. I
11 | thought about a dict or namedtuple for these as well. The one thing
12 | I get is in the transport layer, I can dispatch on the auth type. This
13 | way is also more self-documenting.
14 | 
15 | '''
16 | 
17 | 
18 | from abc import ABC, abstractmethod
19 | 
20 | 
21 | class ESAuth(ABC):
22 |     ''' Base class for the hierarchy. Nothing to do here.'''
23 |     @abstractmethod
24 |     def __init__(self):
25 |         pass
26 | 
27 |     @abstractmethod
28 |     def auth_creds(self):
29 |         pass
30 | 
31 | 
32 | class ESNoAuth(ESAuth):
33 |     ''' Use to specify that no authentication will be added to the low-level
34 |         transport.'''
35 |     def __init__(self):
36 |         super(ESNoAuth, self).__init__()
37 | 
38 |     def auth_creds(self):
39 |         return None
40 | 
41 | 
42 | class ESSigV4Auth(ESAuth):
43 |     ''' Use this to have the transport layer grab credentials via Boto. '''
44 |     # Placeholder - eventually should support all of the different auth methods
45 |     # of specifying access/secret and tokens.
46 |     # Possibly this could do something like: boto3.Session().get_credentials()
47 | 
48 |     # TODO: Move the aws region into this class. Add a test case for region=None
49 |     def __init__(self):
50 |         super(ESSigV4Auth, self).__init__()
51 | 
52 |     def auth_creds(self):
53 |         '''Placeholder... this should implement boto-like determination of AWS
54 |            creds.'''
55 |         return None
56 | 
57 | 
58 | class ESHttpAuth(ESAuth):
59 |     ''' Use with username/password for auth '''
60 |     def __init__(self, user, password):
61 |         super(ESHttpAuth, self).__init__()
62 |         self._user = user
63 |         self._password = password
64 | 
65 |     @property
66 |     def user(self):
67 |         return self._user
68 | 
69 |     @property
70 |     def password(self):
71 |         return self._password
72 | 
73 |     def auth_creds(self):
74 |         return (self._user, self._password)
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/CWMetricsToOpenSearch/es_sink/es_transport.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright 2020, Amazon Web Services Inc.
  3 | This code is licensed under MIT license (see LICENSE.txt for details)
  4 | 
  5 | Python 3
  6 | 
  7 | ESTransport class
  8 |  Wrapper around the requests library that supports sending requests to
  9 | Elasticsearch.
 10 | 
 11 |  Use the signed initializer to control whether requests are signed with
 12 | sigV4 auth (via the requests_aws4auth library). When requests are signed
 13 | Transport gets credentials from the environment via Boto.
 14 | '''
 15 | 
 16 | import boto3
 17 | import requests
 18 | from requests_aws4auth import AWS4Auth
 19 | 
 20 | from es_sink.transport_result import TransportResult
 21 | from es_sink.transport_exceptions import BadHTTPMethod
 22 | from es_sink.transport_utils import wall_time, valid_request_body
 23 | from es_sink.es_auth import ESAuth
 24 | 
 25 | 
 26 | def _get_requests_function(method):
 27 |     ''' Pull the right method from requests. '''
 28 |     try:
 29 |         func = getattr(requests, method)
 30 |         return func
 31 |     except AttributeError:
 32 |         msg = "{} not a recognized HTTP method".format(method)
 33 |         raise BadHTTPMethod(msg)
 34 | 
 35 | 
 36 | def _send_signed(method, url, service='es', region='us-west-2', body=None):
 37 |     '''Internal method that uses sigV4 signing to send the request.'''
 38 |     credentials = boto3.Session().get_credentials()
 39 |     auth = AWS4Auth(credentials.access_key, credentials.secret_key, region,
 40 |                     service, session_token=credentials.token)
 41 |     func = _get_requests_function(method)
 42 |     (result, took_time) = \
 43 |         wall_time(func, url, auth=auth, data=valid_request_body(body),
 44 |                   headers={"Content-Type":"application/json"})
 45 |     return TransportResult(status=int(result.status_code),
 46 |                            result_text=result.text, took_s=took_time,
 47 |                            size=len(body))
 48 | 
 49 | 
 50 | def _send_unsigned(method, url, body=None, http_auth=None):
 51 |     ''' Internal method to pass the request through. '''
 52 |     body = valid_request_body(body)
 53 |     func = _get_requests_function(method)
 54 |     if http_auth:
 55 |         (result, took_time) = \
 56 |             wall_time(func, url, data=body,
 57 |                       headers={"Content-Type":"application/json"},
 58 |                       auth=http_auth,
 59 |                       verify=False)
 60 |         return TransportResult(status=int(result.status_code),
 61 |                                result_text=result.text, took_s=took_time,
 62 |                                size=len(body))
 63 |     (result, took_time) = \
 64 |         wall_time(func, url, data=body,
 65 |                   headers={"Content-Type":"application/json"},
 66 |                   verify=False)
 67 |     return TransportResult(status=int(result.status_code),
 68 |                            result_text=result.text, took_s=took_time,
 69 |                            size=len(body))
 70 | 
 71 | 
 72 | class ESTransport():
 73 |     ''' Transport class, wrapping the requests library to add auth when needed
 74 |         and to provide a facade for Amazon ES domains and local Elasticsearch
 75 |         instances.'''
 76 | 
 77 |     def __init__(self, descriptor):
 78 |         '''A transport object to send requests to Elasticsearch. Since the class
 79 |            supports both Amazon ES domains and vanilla ES clusters, this needs
 80 |            to provide request signing as well as HTTP auth. The ESDescriptor
 81 |            specifies which of these to use. At present, there's no way to
 82 |            add http auth AND sign requests.
 83 |            TODO: implement lower-level request signing for signed HTTP auth
 84 | 
 85 |            descriptor.signed:       Set True to use SigV4 signing only
 86 |                                     Set False for HTTP Auth or no auth
 87 |            descriptor.http_auth:    User name, password tuple '''
 88 | 
 89 |         self._descriptor = descriptor
 90 | 
 91 |         if descriptor.is_signed() and descriptor.is_http_auth():
 92 |             raise BadAuth('You can\'t specify both HTTP auth and signed requests')
 93 | 
 94 |         if descriptor.is_signed() and not descriptor.region:
 95 |             raise ValueError('If you specify signed requests, you must also specify region')
 96 | 
 97 |     @property
 98 |     def is_signed(self):
 99 |         ''' Tracks whether to send signed requests '''
100 |         return self._descriptor.is_signed()
101 | 
102 |     def send(self, method, url, service='es', body=''):
103 |         '''Public method to dispatch between signed and unsigned.
104 | 
105 |            Specify the full URL, including endpoint.
106 |            TODO: make the endpoint implicit, as determined by 
107 |            descriptor.base_url(). This might be easier, but introduces
108 |            complexity in using the class (how to know how much of the URL to
109 |            specify)'''
110 |         if self.is_signed:
111 |             return _send_signed(method, url, service, self._descriptor.region,
112 |                                 body=body)
113 |         return _send_unsigned(method, url, body=body,
114 |                               http_auth=self._descriptor._auth.auth_creds())
115 | 


--------------------------------------------------------------------------------
/CWMetricsToOpenSearch/es_sink/examples.py:
--------------------------------------------------------------------------------
 1 | import flushing_buffer
 2 | from descriptor import ESDescriptor, IndexDescriptor
 3 | import es_auth
 4 | from es_transport import ESTransport
 5 | 
 6 | 
 7 | ################################################################################
 8 | # Example connecting to localhost with http auth
 9 | auth = es_auth.ESHttpAuth('admin', 'admin')
10 | index_descriptor = IndexDescriptor(es_index='logs', es_v7=True, timestamped=True)
11 | LOCALHOST_ESDESCRIPTOR = ESDescriptor("https://localhost:9200/", index_descriptor,
12 |                                       auth=auth)
13 | 
14 | buffer = flushing_buffer.flushing_buffer_factory(LOCALHOST_ESDESCRIPTOR,
15 |                                                  flush_trigger=1)
16 | 
17 | buffer.add_log_line('{"field1": "value1", "field2": "value2"}')
18 | 
19 | raw_transport = ESTransport(LOCALHOST_ESDESCRIPTOR)
20 | result = raw_transport.send('get', "https://localhost:9200/logs*/_search")
21 | print(result)
22 | 
23 | 
24 | ################################################################################
25 | # Example connecting to Amazon Elasticsearch Service with signed requests
26 | 
27 | AMAZON_ES_ENDPOINT = "https://your endpoint here"
28 | amzn_auth = es_auth.ESSigV4Auth()
29 | amzn_index_descriptor = IndexDescriptor(es_index='logs', es_v7=True,
30 |                                         timestamped=True)
31 | AMAZON_ES_DESCRIPTOR = ESDescriptor(AMAZON_ES_ENDPOINT, amzn_index_descriptor,
32 |                                     auth=amzn_auth)
33 | 
34 | buffer2 = flushing_buffer.flushing_buffer_factory(AMAZON_ES_DESCRIPTOR,
35 |                                                   flush_trigger=1)
36 | 
37 | print('Sending 1 doc to Amazon ES')
38 | buffer2.add_log_line('{"field1": "value1", "field2": "value2"}')
39 | 
40 | print('Searching')
41 | raw_transport2 = ESTransport(AMAZON_ES_DESCRIPTOR)
42 | result = raw_transport2.send(
43 |     'get', 
44 |     "https://<Your endpoint here>/logs*/_search")
45 | print(result)
46 | 
47 | 


--------------------------------------------------------------------------------
/CWMetricsToOpenSearch/es_sink/flushing_buffer.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright 2020, Amazon Web Services Inc.
  3 | This code is licensed under MIT license (see LICENSE.txt for details)
  4 | 
  5 | Python 3
  6 | 
  7 | Provides a buffer object that holds log lines in Elasticsearch _bulk
  8 | format. As each line is added, the buffer stores the control line
  9 | as well as the log line.
 10 | 
 11 | Employs an line_buffer to hold log lines as they are added. Optionally
 12 | sends monitor information to an ES cluster. Set the flush_trigger to
 13 | control how many lines are buffered before each flush.
 14 | '''
 15 | 
 16 | import time
 17 | 
 18 | from es_sink.descriptor import ESDescriptor, SQSDescriptor
 19 | from es_sink.line_buffer import ESLineBuffer, SQSLineBuffer
 20 | from es_sink.es_transport import ESTransport
 21 | from es_sink.sqs_transport import SQSTransport
 22 | from es_sink.transport_exceptions import BadSink
 23 | 
 24 | class FlushingESBuffer():
 25 |     '''Wraps an ESLineBuffer object to provide _bulk flushing when the
 26 |        flush_trigger is hit.'''
 27 | 
 28 |     def __init__(self, descriptor, flush_trigger=1):
 29 |         ''' target_descriptor must be an ESDescriptor'''
 30 |         self.transport = ESTransport(descriptor)
 31 |         self.target_descriptor = descriptor
 32 |         self.flush_trigger = flush_trigger
 33 |         self.buffer = ESLineBuffer(descriptor)
 34 | 
 35 |     def add_log_line(self, log_line):
 36 |         '''Add a single log line to the internal buffer. If the flush trigger
 37 |            is hit, send the bulk request.'''
 38 |         self.buffer.add_log_line(log_line)
 39 |         if self.buffer.es_doc_count() >= self.flush_trigger:
 40 |             return self.flush() # swallows the result. Do something with it?
 41 |         return (0, None)
 42 | 
 43 |     def flush(self):
 44 |         '''Flushes the line_buffer, sending all to the _bulk API'''
 45 |         before_doc_count = self.buffer.es_doc_count()
 46 |         if self.buffer.es_doc_count() > 0:
 47 |             try:
 48 |                 url = self.target_descriptor.bulk_url()
 49 |                 print("Flushing {} documents {} to {}".format(
 50 |                     self.buffer.es_doc_count(),
 51 |                     time.time(),
 52 |                     url))
 53 |                 result = self.transport.send('post', url, body=str(self.buffer))
 54 |                 result = result._asdict()
 55 |                 result['docs'] = self.buffer.es_doc_count()
 56 |                 self.buffer.clear()
 57 |                 return (before_doc_count, result)
 58 |             except Exception as exc:
 59 |                 message = "Exception sending request '{}'"
 60 |                 print(message.format(str(exc)))
 61 |                 raise exc
 62 |         return (before_doc_count, None)
 63 | 
 64 | 
 65 | class FlushingSQSBuffer():
 66 |     '''Use to send ES _bulk data to SQS in batches.'''
 67 | 
 68 |     def __init__(self, descriptor, flush_trigger=1):
 69 |         self.target_descriptor = descriptor
 70 |         self.flush_trigger = flush_trigger
 71 |         self.transport = SQSTransport(descriptor)
 72 |         self.buffer = SQSLineBuffer()
 73 | 
 74 |     def add_log_line(self, line):
 75 |         '''Add a single log line to the internal buffer. If the flush trigger
 76 |            is hit, send the bulk request.'''
 77 |         self.buffer.add_log_line(line)
 78 |         if self.buffer.es_doc_count() >= self.flush_trigger:
 79 |             self.flush() # swallows the result. Do something with it?
 80 | 
 81 |     def flush(self):
 82 |         '''Flushes the line_buffer, sending all to the _bulk API'''
 83 |         print("Flushing {} documents {}".format(self.buffer.es_doc_count(),
 84 |                                                 time.time()))
 85 |         if self.buffer.es_doc_count() > 0:
 86 |             result = self.transport.send(str(self.buffer))
 87 |             result = result._asdict()
 88 |             result['docs'] = self.buffer.es_doc_count()
 89 |             self.buffer.clear()
 90 |             print(result)
 91 |             return result
 92 |         return None
 93 | 
 94 | 
 95 | def flushing_buffer_factory(descriptor, flush_trigger=1):
 96 |     '''Call with a descriptor to receive a buffer object.'''
 97 |     if isinstance(descriptor, ESDescriptor):
 98 |         return FlushingESBuffer(descriptor, flush_trigger)
 99 | 
100 |     if isinstance(descriptor, SQSDescriptor):
101 |         return FlushingSQSBuffer(descriptor, flush_trigger)
102 | 
103 |     raise BadSink()
104 | 


--------------------------------------------------------------------------------
/CWMetricsToOpenSearch/es_sink/line_buffer.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright 2019, Amazon Web Services Inc.
  3 | This code is licensed under MIT license (see LICENSE.txt for details)
  4 | 
  5 | Python 3
  6 | 
  7 | Provides a buffer object that holds log lines in Elasticsearch _bulk
  8 | format. As each line is added, the buffer stores the control line
  9 | as well as the log line.
 10 | '''
 11 | 
 12 | import abc
 13 | import json
 14 | 
 15 | 
 16 | class LineBuffer():
 17 |     ''' An abstract base class for buffering log lines'''
 18 | 
 19 |     __metaclass__ = abc.ABCMeta
 20 | 
 21 |     def __init__(self):
 22 |         self._line_buffer = list()
 23 | 
 24 |     @abc.abstractmethod
 25 |     def add_line_dict(self, dic):
 26 |         '''Children should add the log line to their internal buffer'''
 27 | 
 28 |     @abc.abstractmethod
 29 |     def add_line_str(self, line):
 30 |         '''Children should add the log line to their internal buffer'''
 31 | 
 32 |     @abc.abstractmethod
 33 |     def es_docs(self):
 34 |         '''Children should override to return a multi-line string with only the
 35 |            ES documents, not the control lines.'''
 36 | 
 37 |     @staticmethod
 38 |     def _dict_to_string(dic):
 39 |         ''' Encode a dict as a string. Silently swallows errors '''
 40 |         try:
 41 |             line = json.JSONEncoder().encode(dic)
 42 |             return line
 43 |         except UnicodeDecodeError as exc:
 44 |             msg = "unicode problem {}, skipping line: {}"
 45 |             print(msg.format(str(exc), dic))
 46 |             return ''
 47 | 
 48 |     def add_log_line(self, log_line):
 49 |         '''Send all log lines to this function.'''
 50 |         if isinstance(log_line, dict):
 51 |             self.add_line_dict(log_line)
 52 |         elif isinstance(log_line, str):
 53 |             self.add_line_str(log_line)
 54 |         else:
 55 |             raise ValueError('{} is neither str nor dict'.format(log_line))
 56 | 
 57 |     def clear(self):
 58 |         '''Empty the buffer.'''
 59 |         self._line_buffer = list()
 60 | 
 61 |     def es_docs_bytes(self):
 62 |         '''Return the byte count for the log lines in the buffer'''
 63 |         return len(self.es_docs().encode("utf8"))
 64 | 
 65 |     def buffer_bytes(self):
 66 |         '''Return the total size of the objects in the buffer. This includes
 67 |            the size of the control lines.'''
 68 |         return len(str(self).encode("utf8"))
 69 | 
 70 |     def __str__(self):
 71 |         return "\n".join(self._line_buffer) + "\n"
 72 | 
 73 |     def __repr__(self):
 74 |         return str(self)
 75 | 
 76 | 
 77 | class SQSLineBuffer(LineBuffer):
 78 |     '''Implementation of LineBuffer to buffer data for SQS output. SQS doesn't
 79 |        use ES control lines, of course. The workers reading the queue need to
 80 |        add those lines.'''
 81 |     def __init__(self):
 82 |         super().__init__(self) # PyLint claims this is useless. Is it?
 83 | 
 84 |     def add_line_str(self, line):
 85 |         self._line_buffer.append(line)
 86 | 
 87 |     def add_line_dict(self, dic):
 88 |         line = LineBuffer._dict_to_string(dic)
 89 |         self._line_buffer.append(line)
 90 | 
 91 |     def es_docs(self):
 92 |         '''Return a flattened string with the log lines in the buffer.'''
 93 |         return "\n".join(self._line_buffer) + "\n"
 94 | 
 95 |     def es_doc_count(self):
 96 |         '''Return the count of log lines in the buffer.'''
 97 |         return len(self._line_buffer)
 98 | 
 99 | 
100 | class ESLineBuffer(LineBuffer):
101 |     '''Send lines to this class as either dicts or strs and it will buffer
102 |        a control line along with the log line. Use str() to retrieve the
103 |        post body to be used with a _bulk request.'''
104 | 
105 |     def __init__(self, es_descriptor):
106 |         '''Initialize with the ES index name root as well as the ES type. These
107 |            are embedded in the control line.'''
108 |         super().__init__()
109 |         self.es_descriptor = es_descriptor
110 | 
111 |     def add_line_str(self, line):
112 |         '''Buffer a log line and an indexing command for that line'''
113 |         control_line = self.es_descriptor.bulk_control_line()
114 |         self._line_buffer.append(control_line)
115 |         self._line_buffer.append(line)
116 | 
117 |     def add_line_dict(self, dic):
118 |         '''Buffer a log line and an indexing command for that line'''
119 |         line = LineBuffer._dict_to_string(dic)
120 |         self.add_line_str(line)
121 | 
122 |     def es_docs(self):
123 |         '''Return just the log lines in the buffer.'''
124 |         return "\n".join(self._line_buffer[1::2]) + "\n"
125 | 
126 |     def es_doc_count(self):
127 |         '''Return the count of log lines in the buffer.'''
128 |         return len(self._line_buffer) / 2
129 | 


--------------------------------------------------------------------------------
/CWMetricsToOpenSearch/es_sink/sqs_transport.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright 2020, Amazon Web Services Inc.
 3 | This code is licensed under MIT license (see LICENSE.txt for details)
 4 | 
 5 | Python 3
 6 | 
 7 | PyLint complains about the Queue member of the boto3 SQS resource. If also
 8 | complains the SQSTransport class has too few methods. This disables both.'''
 9 | # pylint: disable=no-member,R0903
10 | 
11 | 
12 | import boto3
13 | from es_sink.transport_result import TransportResult
14 | from es_sink.transport_utils import wall_time
15 | 
16 | class SQSTransport():
17 |     ''' Transport class, wrapping the requests library to add auth when needed
18 |         and to provide a facade for Amazon ES domains and local Elasticsearch
19 |         instances.'''
20 | 
21 |     def __init__(self, target_descriptor):
22 |         ''' Set signed=True to use the sigV4 signing. False to send without.'''
23 |         self.target_descriptor = target_descriptor
24 | 
25 |     def send(self, body):
26 |         '''Send a message to SQS. Returns a TransportResult'''
27 |         sqs = boto3.resource('sqs', region_name=self.target_descriptor.region)
28 |         queue = sqs.Queue(self.target_descriptor.sqs_url)
29 |         (result, took_time) = wall_time(queue.send_message, MessageBody=body)
30 |         metadata = result['ResponseMetadata']
31 |         status = int(metadata['HTTPStatusCode'])
32 |         size = int(metadata['HTTPHeaders']['content-length'])
33 |         print(result)
34 |         return TransportResult(status=status,
35 |                                result_text='',
36 |                                took_s=took_time,
37 |                                size=size)
38 | 


--------------------------------------------------------------------------------
/CWMetricsToOpenSearch/es_sink/transport_exceptions.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright 2020, Amazon Web Services Inc.
 3 | This code is licensed under MIT license (see LICENSE.txt for details)
 4 | 
 5 | Python 3
 6 | '''
 7 | class TransportException(Exception):
 8 |     '''Raised by the transport layer for most issues.'''
 9 | 
10 | 
11 | class BadHTTPMethod(Exception):
12 |     '''Raised for methods missing from the requests library.'''
13 | 
14 | 
15 | class BadSink(Exception):
16 |     '''Raised when the target descriptor for transport is not ESDescriptor or
17 |        SQSDescriptor.'''
18 | 
19 | 
20 | class BadAuth(Exception):
21 |     '''Raised if the transport client gets both SigV4 signing and HTTP Auth'''


--------------------------------------------------------------------------------
/CWMetricsToOpenSearch/es_sink/transport_result.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright 2020, Amazon Web Services Inc.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |    http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | 
16 | Python 3
17 | '''
18 | 
19 | from collections import namedtuple
20 | 
21 | TransportResult = namedtuple('TransportResult', ['status', 'result_text',
22 |                                                  'took_s', 'size'])
23 | 


--------------------------------------------------------------------------------
/CWMetricsToOpenSearch/es_sink/transport_utils.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright 2020, Amazon Web Services Inc.
 3 | This code is licensed under MIT license (see LICENSE.txt for details)
 4 | 
 5 | Python 3
 6 | 
 7 | Provides a buffer object that holds log lines in Elasticsearch _bulk
 8 | format. As each line is added, the buffer stores the control line
 9 | as well as the log line.
10 | '''
11 | 
12 | import re
13 | import time
14 | 
15 | 
16 | from datetime import datetime
17 | from dateutil import tz
18 | from pytz import timezone
19 | 
20 | 
21 | def now_pst():
22 |     '''Return the current time in PST timezone'''
23 |     now_utc = datetime.now(timezone('UTC'))
24 |     return now_utc.astimezone(timezone('US/Pacific'))
25 | 
26 | 
27 | def utc_to_local_datetime(timestamp):
28 |     ''' Takes a UTC timestamp (as seconds since the epoch) and converts to a
29 |         local datetime object '''
30 |     # Could validate data type
31 |     tdt = datetime.fromtimestamp(timestamp, tz=tz.tzlocal())
32 |     tdt = tdt.replace(tzinfo=tz.gettz('UTC'))
33 |     return tdt.astimezone(tz.tzlocal())
34 | 
35 | 
36 | def has_path(dic, path_elts):
37 |     '''Given dict dic, and path path_elts, successively dereference the keys
38 |        from path_elts, returning True. Returns False if the path is not in the
39 |        dictionary'''
40 |     if not isinstance(dic, dict) and path_elts:
41 |         return False
42 |     if not path_elts:
43 |         return True
44 |     if path_elts[0] in dic:
45 |         return has_path(dic[path_elts[0]],
46 |                         path_elts[1:])
47 |     return False
48 | 
49 | 
50 | def valid_key(key_in):
51 |     '''Mutates key_in, making it a valid field name for Elasticsearch (and
52 |        hence, a suitable key for a dict.)'''
53 |     pattern = re.compile('[^a-zA-Z0-9@_]')
54 |     return pattern.sub('_', key_in)
55 | 
56 | 
57 | def flatten(current, key, result):
58 |     '''Takes a path to an element in a nested dict (e.g., JSON) and recursively
59 |        walks the whole tree, returning a 1-layer dict, with elements where the
60 |        keys are the path elements joined with '_' and the values are the leaf
61 |        values from the dict.
62 |        flatten({'a': {'b':'c', 'd': 'e'}}, '', {}) =>
63 |            {'a_b': 'c', 'a_d': 'e'}'''
64 |     if isinstance(current, dict):
65 |         for thiskey in current:
66 |             valid_k = valid_key(str(thiskey))
67 |             new_key = "{0}_{1}".format(key, valid_k) if len(key) > 0 else valid_k
68 |             flatten(current[thiskey], new_key, result)
69 |     else:
70 |         result[key] = current
71 |     return result
72 | 
73 | 
74 | def valid_request_body(body):
75 |     ''' Helper function to ensure request bodies terminate with a new line
76 |         and to replace None with the empty string.'''
77 |     if body and not body.endswith("\n"):
78 |         body += "\n"
79 |     elif not body:
80 |         body = ""
81 |     return body
82 | 
83 | 
84 | def wall_time(func, *args, **kwargs):
85 |     ''' Helper function to wrap the request and return wall time along with
86 |         the result of the call. Not using clock() since the processing
87 |         happens remotely.'''
88 |     start = time.time()
89 |     result = func(*args, **kwargs)
90 |     end = time.time()
91 |     return (result, end - start)
92 | 


--------------------------------------------------------------------------------
/CWMetricsToOpenSearch/handler.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  3 | SPDX-License-Identifier: MIT-0
  4 | '''
  5 | 
  6 | from collections import namedtuple
  7 | from datetime import datetime, timedelta
  8 | from dateutil import tz, parser
  9 | import itertools
 10 | import json
 11 | import os
 12 | import time
 13 | import uuid
 14 | 
 15 | 
 16 | import boto3
 17 | from es_sink.descriptor import ESDescriptor, IndexDescriptor
 18 | import es_sink.es_auth
 19 | from es_sink.es_transport import ESTransport
 20 | import es_sink.flushing_buffer
 21 | 
 22 | # Lambda Interval Settings (seconds)
 23 | LAMBDA_INTERVAL=60
 24 | 
 25 | # This structure details the metrics available per domain. Domain names are unique
 26 | # by region, but not globally, so the identifier includes the domain name and region
 27 | # pair. The metric_descriptions are a collection of SingleMetricDescriptions, which
 28 | # provide the dimensions to pass to CloudWatch to retrieve the values for the
 29 | # metric.
 30 | DomainMetricsAvailable = namedtuple('DomainMetricsAvailable',
 31 |                                     ('region', 'domain_name', 'metric_descriptions'))
 32 | SingleMetricDescription = namedtuple('SingleMetricDescription',
 33 |                                      ('metric_name', 'dims'))
 34 | 
 35 | 
 36 | SingleMetricValue = namedtuple('SingleMetricValue',
 37 |                                ('region', 'domain_name', 'metric_name', 'stat',
 38 |                                 'value', 'timestamp'))
 39 | 
 40 | CollectionMetricsAvailable = namedtuple('CollectionMetricsAvailable',
 41 |                                         ('region', 'collection_id', 'metric_descriptions'))
 42 | 
 43 | SingleMetricValueCollection = namedtuple('SingleMetricValue',
 44 |                                          ('region', 'collection_name', 'collection_id', 'index_name', 'metric_name', 'stat',
 45 |                                           'value', 'timestamp'))
 46 | 
 47 | 
 48 | ################################################################################
 49 | # Environment
 50 | DDB_TABLE = os.environ['TABLE']
 51 | 
 52 | DOMAIN_ENDPOINT = os.environ['DOMAIN_ENDPOINT']
 53 | DOMAIN_ADMIN_UNAME = os.environ['DOMAIN_ADMIN_UNAME']
 54 | DOMAIN_ADMIN_PW = os.environ['DOMAIN_ADMIN_PW']
 55 | REGIONS = json.loads(os.environ['REGIONS'])
 56 | SERVERLESS_REGIONS = json.loads(os.environ['SERVERLESS_REGIONS'])
 57 | ################################################################################
 58 | # Timestamp tracking
 59 | 
 60 | def get_last_timestamp_ddb(domain_name, region):
 61 |     ddb = boto3.client('dynamodb')
 62 |     try:
 63 |         ret = ddb.get_item(TableName=DDB_TABLE,
 64 |                            Key={'domain': {'S': domain_name},
 65 |                                 'region': {'S': region}})
 66 |         if not ret or not ret.get('Item', None):
 67 |             return None
 68 |         iso_ts = ret['Item'].get('Timestamp', None)
 69 |         if not iso_ts:
 70 |             return None
 71 |         iso_ts = iso_ts['S']
 72 |         return parser.parse(iso_ts)
 73 |     except Exception as e:
 74 |         print('Exception retrieving timestamp for "{}:{}"'.format(domain_name, region))
 75 |         print(e)
 76 |     return None
 77 | 
 78 | 
 79 | def update_metric_timestamp_ddb(domain_name, region, ts):
 80 |     ddb = boto3.client('dynamodb')
 81 |     try:
 82 |         existing = get_last_timestamp_ddb(domain_name, region)
 83 |         if not existing or (existing and existing < ts):
 84 |             ddb.update_item(
 85 |                 TableName=DDB_TABLE,
 86 |                 Key={ 'domain': {'S': domain_name},
 87 |                       'region': {'S': region}},
 88 |                 AttributeUpdates={'Timestamp': { 'Value': {'S': ts.isoformat()}}}
 89 |             )
 90 |     except Exception as e:
 91 |         print('Exception putting timestamp for "{}:{}"'.format(domain_name, region))
 92 |         print(e)
 93 | 
 94 | 
 95 | LAST_TIMESTAMPS = dict()
 96 | def update_metric_timestamp(domain_name, region, ts):
 97 |     tup = (domain_name, region)
 98 |     existing = LAST_TIMESTAMPS.get(tup, None)
 99 |     if existing and existing < ts:
100 |         LAST_TIMESTAMPS[tup] = ts
101 |     elif not existing:
102 |         LAST_TIMESTAMPS[tup] = ts
103 |     # otherwise, there's a newer timestamp so don't change anything
104 | 
105 | 
106 | def get_last_timestamp(domain_name, region):
107 |     return LAST_TIMESTAMPS.get((domain_name, region))
108 | ################################################################################
109 | 
110 | 
111 | ################################################################################
112 | # Domain tracking;
113 | 
114 | def list_all_domains():
115 |     ''' Loops through the list of REGIONS, listing out all domains for this
116 |         account in that region. Returns a list of domain names.
117 |     '''
118 |     print("Started processing for list_all_domains")
119 |     doms = {}
120 |     for region in REGIONS:
121 |         es = boto3.client('es', region)
122 |         try:
123 |             resp = es.list_domain_names()
124 |             resp = resp['DomainNames']
125 |             doms[region] = [val['DomainName'] for val in resp]
126 |         except Exception as e:
127 |             print('Failed to get domain names in region: {}'.format(region))
128 |             print(e)
129 |     return doms
130 | 
131 | # Domain tracking;
132 | 
133 | def list_all_collections():
134 |     ''' Loops through the list of REGIONS, listing out all domains for this
135 |         account in that region. Returns a list of domain names.
136 |     '''
137 |     print("Started processing for list_all_collections")
138 |     cols = {}
139 |     for region in SERVERLESS_REGIONS:
140 |         aoss = boto3.client('opensearchserverless', region)
141 |         try:
142 |             resp = aoss.list_collections()
143 |             resp = resp['collectionSummaries']
144 |             cols[region] = [val['id'] for val in resp]
145 |         except Exception as e:
146 |             print('Failed to get domain names in region: {}'.format(region))
147 |             print(e)
148 |     return cols
149 | 
150 | ################################################################################
151 | # CloudWatch interface
152 | #
153 | def list_domain_cloudwatch_metrics(domain_name=None, region=None):
154 |     ''' For a particular domain/region, list all available metrics. Different
155 |         ES versions have different metrics for them. This ensures retrieving
156 |         all metrics.
157 |         Returns a list of SingleMetricDescriptions
158 |     '''
159 |     cw = boto3.client('cloudwatch', region)
160 |     paginator = cw.get_paginator('list_metrics')
161 |     iter = paginator.paginate(
162 |         Dimensions=[
163 |             {
164 |                 'Name': 'DomainName',
165 |                 'Value': domain_name
166 |             }
167 |         ]
168 |     )
169 |     resp = []
170 |     for page in iter:
171 |         metrics = page['Metrics']
172 |         for metric in metrics:
173 |             resp.append(SingleMetricDescription(metric_name=metric['MetricName'],
174 |                                                 dims=metric['Dimensions']))
175 |     return resp
176 | 
177 | def list_domain_cloudwatch_metrics_collections(collection_id=None, region=None):
178 |     ''' For a particular domain/region, list all available metrics. Different
179 |         ES versions have different metrics for them. This ensures retrieving
180 |         all metrics.
181 |         Returns a list of SingleMetricDescriptions
182 |     '''
183 |     cw = boto3.client('cloudwatch', region)
184 |     paginator = cw.get_paginator('list_metrics')
185 |     iter = paginator.paginate(
186 |         Dimensions=[
187 |             {
188 |                 'Name': 'CollectionId',
189 |                 'Value': collection_id
190 |             }
191 |         ]
192 |     )
193 |     resp = []
194 |     for page in iter:
195 |         metrics = page['Metrics']
196 |         for metric in metrics:
197 |             resp.append(SingleMetricDescription(metric_name=metric['MetricName'],
198 |                                                 dims=metric['Dimensions']))
199 |     return resp
200 | 
201 | 
202 | def get_all_domain_metric_descriptions(doms):
203 |     ''' Takes a list of dicts - region: list of domains and retrieves the available
204 |         metrics for each of the domains.
205 |     '''
206 |     resp = []
207 |     for region, domains in doms.items():
208 |         for domain in domains:
209 |             dmets = list_domain_cloudwatch_metrics(domain_name=domain,
210 |                                                    region=region)
211 |             resp.append(DomainMetricsAvailable(region, domain, dmets))
212 |     return resp
213 | 
214 | def get_all_domain_metric_descriptions_collections(colls):
215 |     ''' Takes a list of dicts - region: list of collections and retrieves the available
216 |         metrics for each of the collections.
217 |     '''
218 |     resp = []
219 |     for region, collections in colls.items():
220 |         for collection in collections:
221 |             dmets = list_domain_cloudwatch_metrics_collections(collection_id=collection,
222 |                                                                region=region)
223 |             resp.append(CollectionMetricsAvailable(region, collection, dmets))
224 |     return resp
225 | 
226 | 
227 | def build_metric_data_queries(domain_name, region, metric_descriptions):
228 |     ret = []
229 |     for md in metric_descriptions:
230 |         metric_name = md.metric_name
231 |         for stat in ['Minimum', 'Maximum', 'Average']: # What flexibility does this need?
232 |             label = '{} {} {} {}'.format(domain_name, region, metric_name, stat)
233 |             _id = 'a' + str(uuid.uuid1()).lower().replace('-', '_')
234 |             ret.append(
235 |                 {
236 |                     'Id': _id,
237 |                     'Label': label,
238 |                     'MetricStat': {
239 |                         'Metric': {
240 |                             'Namespace': 'AWS/ES',
241 |                             'MetricName': metric_name,
242 |                             'Dimensions': md.dims
243 |                         },
244 |                         'Period': LAMBDA_INTERVAL, # ? any need to do more granular than 1 minute?
245 |                         'Stat': stat,
246 |                     }
247 |                 }
248 |             )
249 |     return ret
250 | 
251 | def build_metric_data_queries_collections(collection_id, region, metric_descriptions):
252 |     ret = []
253 |     for md in metric_descriptions:
254 |         metric_name = md.metric_name
255 |         collection_name = "N/A"
256 |         index_name = "N/A"
257 |         for dimensions in md.dims:
258 |             if dimensions['Name'] == "CollectionName":
259 |                 collection_name = dimensions['Value']
260 |             elif dimensions['Name'] == "IndexName":
261 |                 index_name = dimensions['Value']
262 | 
263 |         for stat in ['Minimum', 'Maximum', 'Average']: # What flexibility does this need?
264 |             label = '{} {} {} {} {} {}'.format(collection_id, collection_name, index_name, region, metric_name, stat)
265 |             _id = 'a' + str(uuid.uuid1()).lower().replace('-', '_')
266 |             ret.append(
267 |                 {
268 |                     'Id': _id,
269 |                     'Label': label,
270 |                     'MetricStat': {
271 |                         'Metric': {
272 |                             'Namespace': 'AWS/AOSS',
273 |                             'MetricName': metric_name,
274 |                             'Dimensions': md.dims
275 |                         },
276 |                         'Period': LAMBDA_INTERVAL, # ? any need to do more granular than 1 minute?
277 |                         'Stat': stat,
278 |                     }
279 |                 }
280 |             )
281 |     return ret
282 | 
283 | 
284 | def grouper(iterable, n):
285 |     it = iter(iterable)
286 |     while True:
287 |         chunk = list(itertools.islice(it, n))
288 |         if not chunk:
289 |             return
290 |         yield chunk
291 | 
292 | 
293 | def get_single_domain_metric_values(domain_name, region, metric_descriptions):
294 |     # TODO: Make this multi-domain?
295 |     ret = list()
296 |     cw = boto3.client('cloudwatch', region)
297 |     queries = build_metric_data_queries(domain_name, region, metric_descriptions)
298 | 
299 |     # The CW query runs from now to the last time this retrieved data. It could miss
300 |     # data points on edge case boundaries.
301 |     time_now = datetime.utcfromtimestamp(time.time())
302 |     last_timestamp = get_last_timestamp_ddb(domain_name, region)
303 |     if not last_timestamp:
304 |         last_timestamp = time_now - timedelta(minutes=15)
305 | 
306 |     for group in grouper(queries, 100):
307 |         try:
308 |             paginator = cw.get_paginator('get_metric_data')
309 |             iter = paginator.paginate(MetricDataQueries=group,
310 |                                       StartTime=last_timestamp,
311 |                                       EndTime=time_now)
312 |             for page in iter:
313 |                 for result in page['MetricDataResults']:
314 |                     # TODO: Error handling
315 |                     (result_domain, result_region, metric_name, stat) = result['Label'].split(' ')
316 |                     for val in zip(result['Timestamps'], result['Values']):
317 |                         ts = val[0].replace(microsecond=0, tzinfo=tz.tzutc())
318 |                         ret.append(SingleMetricValue(
319 |                             domain_name=result_domain,
320 |                             region=result_region,
321 |                             metric_name=metric_name,
322 |                             stat=stat,
323 |                             timestamp=ts.isoformat(),
324 |                             value=val[1]
325 |                         ))
326 |             update_metric_timestamp_ddb(domain_name, region, time_now)
327 |         except Exception as e:
328 |             # Handle me better
329 |             print('Exception', domain_name, region)
330 |             print(e)
331 |             print()
332 |     return ret
333 | 
334 | def get_single_domain_metric_values_collections(collection_id, region, metric_descriptions):
335 |     # TODO: Make this multi-domain?
336 |     ret = list()
337 |     cw = boto3.client('cloudwatch', region)
338 |     queries = build_metric_data_queries_collections(collection_id, region, metric_descriptions)
339 |     # The CW query runs from now to the last time this retrieved data. It could miss
340 |     # data points on edge case boundaries.
341 |     time_now = datetime.utcfromtimestamp(time.time())
342 |     last_timestamp = get_last_timestamp_ddb(collection_id, region)
343 |     if not last_timestamp:
344 |         last_timestamp = time_now - timedelta(minutes=15)
345 | 
346 |     for group in grouper(queries, 100):
347 |         try:
348 |             paginator = cw.get_paginator('get_metric_data')
349 |             iter = paginator.paginate(MetricDataQueries=group,
350 |                                       StartTime=last_timestamp,
351 |                                       EndTime=time_now)
352 |             for page in iter:
353 |                 for result in page['MetricDataResults']:
354 |                     # TODO: Error handling
355 |                     (collection_id, collection_name, index_name, region, metric_name, stat) = result['Label'].split(' ')
356 |                     for val in zip(result['Timestamps'], result['Values']):
357 |                         ts = val[0].replace(microsecond=0, tzinfo=tz.tzutc())
358 |                         ret.append(SingleMetricValueCollection(
359 |                             collection_id=collection_id,
360 |                             collection_name=collection_name,
361 |                             index_name=index_name,
362 |                             region=region,
363 |                             metric_name=metric_name,
364 |                             stat=stat,
365 |                             timestamp=ts.isoformat(),
366 |                             value=val[1]
367 |                         ))
368 |             update_metric_timestamp_ddb(collection_id, region, time_now)
369 |         except Exception as e:
370 |             # Handle me better
371 |             print('Exception', collection_id, region)
372 |             print(e)
373 |             print()
374 |     return ret
375 | 
376 | 
377 | def get_all_domain_metric_values(domains):
378 |     ''' Domains is a list of DomainMetricDescriptions - tuples with domain_name,
379 |         region, and a list of SingleMetricDescriptions.
380 |         Returns a list of SingleMetricValues.
381 |     '''
382 |     # TODO: Send a single request rather than 1 per domain/region dimension
383 |     res = list()
384 |     for domain in domains:
385 |         domain_name = domain.domain_name
386 |         region = domain.region
387 |         res.extend(get_single_domain_metric_values(domain_name, region, domain.metric_descriptions))
388 |     return res
389 | 
390 | 
391 | def get_all_domain_metric_values_collections(collections):
392 |     ''' Collections is a list of CollectionMetricDescriptions - tuples with collection_id,
393 |         region, and a list of SingleMetricDescriptions.
394 |         Returns a list of SingleMetricValuesCollections.
395 |     '''
396 |     # TODO: Send a single request rather than 1 per domain/region dimension
397 |     res = list()
398 |     for collection in collections:
399 |         collection_id = collection.collection_id
400 |         region = collection.region
401 |         res.extend(get_single_domain_metric_values_collections(collection_id, region, collection.metric_descriptions))
402 |     return res
403 | 
404 | 
405 | ################################################################################
406 | # Amazon OpenSearch interface
407 | ES_AUTH = es_sink.es_auth.ESHttpAuth(DOMAIN_ADMIN_UNAME, DOMAIN_ADMIN_PW)
408 | 
409 | INDEX_DESCRIPTOR = IndexDescriptor(es_index='domains', es_v7=True, timestamped=True)
410 | ES_DESCRIPTOR = ESDescriptor(
411 |     endpoint=DOMAIN_ENDPOINT,
412 |     index_descriptor=INDEX_DESCRIPTOR,
413 |     auth=ES_AUTH
414 | )
415 | ES_BUFFER = es_sink.flushing_buffer.flushing_buffer_factory(ES_DESCRIPTOR,
416 |                                                             flush_trigger=1000)
417 | 
418 | INDEX_DESCRIPTOR_COLLECTIONS = IndexDescriptor(es_index='collections', es_v7=True, timestamped=True)
419 | ES_DESCRIPTOR_COLLECTIONS = ESDescriptor(
420 |     endpoint=DOMAIN_ENDPOINT,
421 |     index_descriptor=INDEX_DESCRIPTOR_COLLECTIONS,
422 |     auth=ES_AUTH
423 | )
424 | ES_BUFFER_COLLECTIONS=es_sink.flushing_buffer.flushing_buffer_factory(ES_DESCRIPTOR_COLLECTIONS,
425 |                                                                       flush_trigger=1000)
426 | 
427 | def send_all_domain_metric_values(values):
428 |     total = 0
429 |     total_flushed = 0
430 |     for value in values:
431 | 
432 |         d = value._asdict()
433 |         line_value = d.pop('value')
434 |         metric_name = d['metric_name']
435 |         d[metric_name] = line_value
436 | 
437 |         # Rename field timestamp to @timestamp
438 |         timestamp_value = d.pop('timestamp')
439 |         d['@timestamp'] = timestamp_value
440 | 
441 |         log_line = json.dumps(d)
442 | 
443 |         f, ignore = ES_BUFFER.add_log_line(log_line)
444 | 
445 |         total_flushed += f
446 |         total += 1
447 | 
448 |     print('Added {} log lines to the domain buffer'.format(total))
449 |     print('Flushed {} log lines for domains'.format(total_flushed))
450 | 
451 | 
452 | def send_all_domain_metric_values_collections(vals_collections):
453 |     total = 0
454 |     total_flushed = 0
455 |     for value in vals_collections:
456 | 
457 |         d = value._asdict()
458 |         line_value = d.pop('value')
459 |         metric_name = d['metric_name']
460 |         d[metric_name] = line_value
461 | 
462 |         # Rename field timestamp to @timestamp
463 |         timestamp_value = d.pop('timestamp')
464 |         d['@timestamp'] = timestamp_value
465 | 
466 |         log_line = json.dumps(d)
467 | 
468 |         f, ignore = ES_BUFFER_COLLECTIONS.add_log_line(log_line)
469 | 
470 |         total_flushed += f
471 |         total += 1
472 | 
473 |     print('Added {} log lines to the collections buffer'.format(total))
474 |     print('Flushed {} log lines for collections'.format(total_flushed))
475 | ################################################################################
476 | # Lambda handler
477 | def handler(event, context):
478 |     doms = list_all_domains()
479 |     all_mets = get_all_domain_metric_descriptions(doms)
480 |     vals = get_all_domain_metric_values(all_mets)
481 | 
482 |     colls = list_all_collections()
483 |     all_mets_collections = get_all_domain_metric_descriptions_collections(colls)
484 |     vals_collections = get_all_domain_metric_values_collections(all_mets_collections)
485 | 
486 |     send_all_domain_metric_values(vals)
487 |     send_all_domain_metric_values_collections(vals_collections)
488 | 
489 |     ES_BUFFER.flush()
490 | 
491 | 
492 | ################################################################################
493 | # Command line/test interface
494 | if __name__ == '__main__':
495 |     # This code will normally run as a lambda function, so I don't want to add a
496 |     # command-line arg. Instead, set an environment variable as if it were
497 |     # running as lambda.
498 |     print()
499 |     print('Did you remember to set the "TABLE" environment variable with the')
500 |     print('name of the DDB table tracking timestamps?')
501 |     print('Did you remember to set the "DOMAIN_ENDPOINT", DOMAIN_ADMIN_UNAME, and')
502 |     print('DOMAIN_UNAME_PW environment variables?')
503 |     print()
504 | 
505 |     doms = list_all_domains()
506 |     print_doms(doms)
507 | 
508 |     print('Getting all metric descriptions')
509 |     all_mets = get_all_domain_metric_descriptions(doms)
510 |     print_all_mets(all_mets)
511 | 
512 |     while 1:
513 |         print('Retrieving metric values')
514 |         vals = get_all_domain_metric_values(all_mets)
515 |         print_all_vals(vals)
516 |         print('Adding new metric values')
517 |         send_all_domain_metric_values(vals)
518 |         ES_BUFFER.flush()
519 | 
520 | 
521 | def print_doms(doms):
522 |     print("Monitoring domains:")
523 |     for region, domains in doms.items():
524 |         print(region)
525 |         for domain in domains:
526 |             print('\t' + domain)
527 | 
528 | 
529 | def print_all_mets(all_mets):
530 |     for met in all_mets:
531 |         print('\t{}/{}: {} metrics'.format(met.domain_name, met.region,
532 |                                            len(met.metric_descriptions)))
533 | def print_all_vals(vals):
534 |     doms = dict()
535 |     for val in vals:
536 |         if not (val.domain_name, val.region) in doms.keys():
537 |             doms[(val.domain_name, val.region)] = 0
538 |         doms[(val.domain_name, val.region)] += 1
539 |     print('Retrieved {} values'.format(len(vals)))
540 |     total = 0
541 |     for dom, count in doms.items():
542 |         print('{}: {}'.format(dom, count))
543 |         total += count
544 |     print('Retrieved a total of {} values'.format(total))
545 | 
546 | 
547 | 


--------------------------------------------------------------------------------
/CWMetricsToOpenSearch/requirements.txt:
--------------------------------------------------------------------------------
1 | pytz
2 | requests
3 | certifi
4 | chardet
5 | idna
6 | requests_aws4auth
7 | urllib3
8 | six
9 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 4 | this software and associated documentation files (the "Software"), to deal in
 5 | the Software without restriction, including without limitation the rights to
 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 7 | the Software, and to permit persons to whom the Software is furnished to do so.
 8 | 
 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
15 | 
16 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Amazon OpenSearch Service Monitor
  2 | 
  3 | This repository contains step by step demonstration to set up monitoring Stack for Amazon OpenSearch Service domains and Amazon OpenSearch Serverless collections across all specified regions. This example uses AWS CDK and Python.
  4 | 
  5 | 
  6 | ## Table of Contents
  7 | 1. [Context](#context)
  8 | 2. [Prerequisites](#prerequisites)
  9 | 3. [Deploy](#deploy)
 10 | 4. [OpenSearch Subscription Filters](#cw-subscription-filters)
 11 | 4. [Pre-built Monitoring Dashboards](#dashboards)
 12 | 5. [Pre-built Alerts](#alerts)
 13 | 6. [Clean up](#cleanup)
 14 | 7. [Total Cost of Ownership](#tco)
 15 | 
 16 | ## Context <a name="context"></a>
 17 | Amazon OpenSearch Service is a fully managed service that makes it easy for you to deploy, secure, and run OpenSearch cost effectively at scale. Customers often have an issue to manage and monitor multiple Amazon OpenSearch Service domains and OpenSearch Serverless collection as those metrics and logs are not available at centralized place for troubleshooting the issue. 
 18 | This example helps you to configure a monitoring for Amazon OpenSearch Service domains, and OpenSearch Serverless collections which will fetch the Cloudwatch Metrics and Cloudwatch logs from all domains/collections at a regular interval. This example also comes with pre-built OpenSearch dashboards and Alerts. 
 19 | 
 20 | ## Architecture
 21 | ![architecture](/images/amazon_opensearch_service_monitor_framework.png)
 22 | 
 23 | -----
 24 | 
 25 | ## Prerequisites <a name="prerequisites"></a>
 26 | 
 27 | The following tools are required to deploy this Monitoring tool for Amazon OpenSearch Service.
 28 | 
 29 | AWS CDK - https://docs.aws.amazon.com/cdk/latest/guide/getting_started.html  
 30 | AWS CLI - https://aws.amazon.com/cli/  
 31 | Git -  https://git-scm.com/downloads  
 32 | nodejs - https://nodejs.org/en
 33 | python (3.6 or later) - https://www.python.org/downloads/  
 34 | 
 35 | ### Create and deploy Amazon OpenSearch Service Monitor tool
 36 | 
 37 | Complete the following steps to set up the Amazon OpenSearch Service Monitor tool in your environment using CDK.
 38 | 
 39 | At a bash terminal session.
 40 | 
 41 | ```bash
 42 | # clone the repo
 43 | $ git clone https://github.com/aws-samples/amazon-opensearch-service-monitor.git
 44 | # move to directory
 45 | $ cd amazon-opensearch-service-monitor
 46 | ```
 47 | 
 48 | ![Clone Repo](/images/opensearch_monitor_clone.png)
 49 | 
 50 | ```bash
 51 | # bootstrap the remaining setup (assumes us-west-2)
 52 | # Enter the e-mail address for alert, as that will be used for sending the alert
 53 | # Alternatively you can change e-mail address manually in opensearch/opensearch_monitor_stack.py
 54 | $ bash bootstrap.sh
 55 | # activate the virtual environment
 56 | $ source .env/bin/activate
 57 | ```
 58 | 
 59 | ![Bootstrap](/images/opensearch_monitor_bootstrap.png)
 60 | 
 61 | ### Bootstrap the CDK
 62 | 
 63 | Create the CDK configuration by bootstrapping the CDK (one-time activity for each region).
 64 | 
 65 | ```bash
 66 | # bootstrap the cdk
 67 | (.env)$ cdk bootstrap aws://yourAccountID/yourRegion
 68 | ```
 69 | 
 70 | ![Terminal - Bootstrap the CDK](/images/opensearch_monitor_bootstrap_cdk.png)
 71 | 
 72 | -----
 73 | 
 74 | ## Deploy <a name="deploy"></a>
 75 | Use the AWS CDK to deploy opensearch stack for Amazon OpenSearch Service. This stack comprises of creating/deploying below components:
 76 | 1. Create VPC with 3 AZ
 77 | 2. Create and launch Amazon OpenSearch Service cluster (version 2.3) having two t3.medium data nodes with 100GB of EBS storage volume. These 2 nodes are spread across 2 different AZ's
 78 | 3. Create Dynamo DB table for timestamp tracking 
 79 | 4. Create lambda function to fetch Cloudwatch metrics across all regions and all domains. By default it fetches the data every 5 min, which can be changed if needed. 
 80 | 5. Create and launch an EC2 instance which acts as SSH tunnel to access dashboards, as all of our setup is secured and in VPC
 81 | 6. Create default OpenSearch dashboards to visualize metrics across all domains and collections
 82 | 7. Create and setup default e-mail alerts to newly launched Amazon OpenSearch Service cluster
 83 | 8. Create Index template and Index State Management (ISM) policy to delete indices older than 366 days. (can be changed to different retention if needed)
 84 | 9. Monitoring stack has an option to enable Ultra Warm (UW) which is disabled by default, Change settings [in this file](opensearch/opensearch_monitor_stack.py) to enable UW.
 85 | 10. Create lambda function to fetch Cloudwatch metrics and Cloudwatch logs across all regions.
 86 | 
 87 | 
 88 | #### Note: Complete stack gets setup with pre-defined configuration defined in [opensearch_monitor_stack.py](opensearch/opensearch_monitor_stack.py), please review the settings such as e-mail, instance type, username, password before proceeding to deploy. You can also enable UW and dedicated master (if needed)
 89 | 
 90 | Run below command 
 91 | ```bash
 92 | (.env)$ cdk deploy
 93 | ```
 94 | 
 95 | The CDK will prompt to apply Security Changes, input "y" for Yes.
 96 | 
 97 | ![Terminal - Deploy OpenSearch Monitor Tool](/images/opensearch_monitor_deploy.png)
 98 | 
 99 |   Once the app is deployed you will get the Dashboards URL, user and password to access OpenSearch Dashboards. Once logged in you can refer below sections to navigate around dashboards and alerts.
100 | 
101 | ####  Note: After the stack is deployed you will recieve an e-mail to confirm the subscription, please confirm the same to start getting the alerts.  
102 | 
103 | -----
104 | 
105 | ## Post-Deployment: Setup OpenSearch subscription filters for Cloudwatch logs <a name="cw-subscription-filters"></a>
106 |   Once stack is deployed successfully you need to create subscription filter and assign them to Lambda. Run [setupCWSubscriptionFilter.py](opensearch/setupCWSubscriptionFilter.py) to create the subscription filter (assuming the CW log groups with prefix as /aws/aes/domains), if there is any change in prefix please make sure to change above file before running the steps as below.
107 |     
108 | ```bash
109 | (.env)$ python3 opensearch/setupCWSubscriptionFilter.py deploy
110 | ```
111 | ![Terminal - Setup CW Subscription filter Post Deploy](/images/opensearch_monitor_post_deploy.png)
112 | -----
113 | 
114 | ## Pre-built Monitoring Dashboards <a name="dashboards"></a>
115 |   Monitoring domain comes with pre-built dashboards for OpenSearch Service and OpenSearch Serverless collection metrics, these dashboards can be accessed as below:
116 |   1. Login to Dashboards: Access OpenSearch Dashboards with an IP obtained after the deployment and login as below
117 |       ![Dashboards login screen](/images/opensearch_dashboards_login.png)
118 | 
119 |   2. Once logged in, select the private tenant from the pop up and then select dashboard as shown below
120 |       ![OpenSearch Dashboards](/images/opensearch_dashboards_select.png)
121 | 
122 |   3. After clicking on dashboard, it displays list of the dashboard which comes as default
123 |       ![OpenSearch Dashboards List](/images/opensearch_dashboards_list.png)
124 | 
125 |    - **Domain Metrics** : This gives a 360 degree view of all Amazon OpenSearch Service domains across the regions. 
126 |       ![Domain Metrics At A glance](/images/opensearch_domain_metrics_at_a_glance.png)
127 |    
128 |    - **Domain Overview** :  This gives a more detailed metrics for a particular domain, could help to deep dive for issues into a specific domain. 
129 |       ![Domain Overview](/images/opensearch_domain_overview.png)
130 | 
131 |    - **Serverless Collection Metrics** : This gives a 360 degree view of all Amazon OpenSearch Serverless collections across the regions.
132 |      ![Domain Metrics At A glance](/images/opensearch_collection_metrics.png)
133 | 
134 |    - **Serverless Collection Overview** :  This gives a more detailed metrics for a particular collection, could help to deep dive for issues into a specific collection.
135 |      ![Domain Overview](/images/opensearch_collection_overview.png)
136 | 
137 | -----
138 | 
139 | ## Pre-built Alerts <a name="alerts"></a>
140 | 
141 |   Monitoring domains comes with pre-built alerts as below, which could help to get notified as an email alert for event such as Cluster Health, Disk Issue, Memory Issue , JVM issue etc.
142 | These alerts are built for OpenSearch Service domains metrics. 
143 |   
144 | | Alert Type                    | Frequency     |
145 | | ----------------------------- | ------------- |
146 | | Cluster Health - Red          | 5 Min         |
147 | | Cluster Index Writes Blocked  | 5 Min         |
148 | | Automated Snapshot Failure    | 5 Min         |
149 | | JVM Memory Pressure > 80%     | 5 Min         |
150 | | CPU Utilization > 80%         | 15 Min        |
151 | | No Kibana Healthy Nodes       | 15 Min        |
152 | | No Dashboards Healthy Nodes   | 15 Min        |
153 | | Invalid Host Header Requests  | 15 Min        |
154 | | Cluster Health - Yellow       | 30 Min        |
155 | 
156 | -----
157 | ## Cleanup <a name=cleanup></a>
158 | 
159 | To clean up the stacks. destroy the opensearch stack, all other stacks will be torn down due to dependencies. 
160 | 
161 | ```bash
162 | (.env)$ cdk destroy
163 | ```
164 | 
165 | ![Destroy](/images/opensearch_monitor_destroy.png)
166 | 
167 | To remove subscription for Cloudwatch logs run the script as below. This will traverse the Amazon OpenSearch Service cloudwatch logs and delete any filter which has been created during the deploy.
168 | 
169 | ```bash
170 | (.env)$ python3 opensearch/setupCWSubscriptionFilter.py destroy
171 | ```
172 | ![Terminal - Post Destroy](/images/opensearch_monitor_post_destroy.png)
173 | -----
174 | ## Total Cost of Ownership <a name=tco></a>
175 | 
176 | Running this solution will incur charges of less than $10 per day for one domain with additional $2 per day for each additional domain.
177 | 
178 | -----
179 | ## Reporting Bugs
180 | 
181 | If you encounter a bug, please create a new issue with as much detail as possible and steps for reproducing the bug. See the [Contributing Guidelines](./CONTRIBUTING.md) for more details.
182 | 
183 | -----
184 | ## Security
185 | 
186 | See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information.
187 | 
188 | -----
189 | ## License
190 | 
191 | This library is licensed under the MIT-0 License. See the LICENSE file.
192 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | SPDX-License-Identifier: MIT-0
 4 | '''
 5 | 
 6 | #!/usr/bin/env python3
 7 | 
 8 | from aws_cdk import App
 9 | 
10 | from opensearch.opensearch_monitor_stack import OpenSearchMonitor
11 | 
12 | 
13 | app = App()
14 | OpenSearchMonitor(app, "opensearch-monitor-stack")
15 | 
16 | app.synth()
17 | 


--------------------------------------------------------------------------------
/bootstrap.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # create the virtual environment
 4 | python3 -m venv .env
 5 | # Install into the virtual environment
 6 | source .env/bin/activate
 7 | # download requirements
 8 | .env/bin/python -m pip install -r requirements.txt --upgrade pip
 9 | # Load dependency for lambda functions,
10 | .env/bin/python -m pip install --target CWMetricsToOpenSearch/ -r CWMetricsToOpenSearch/requirements.txt
11 | # and add boto3 which would be added as layer for metrics, as latest boto3 is needed for serverless collection
12 | .env/bin/python -m pip install boto3 -t boto3-layer/python
13 | 
14 | # Set region to deploy the stack
15 | region_default="us-east-1"
16 | echo -e
17 | read -p "Please enter your region to deploy the stack [$region_default]: " region
18 | region="${region:-$region_default}"
19 | aws configure set default.region $region
20 | 
21 | # Add e-mail for the notification
22 | email_default="user@example.com"
23 | echo -e
24 | read -p "Please enter an e-mail for alert [$email_default]: " email
25 | email="${email:-$email_default}"
26 | sed -i -e 's/user@example.com/'$email'/g' opensearch/opensearch_monitor_stack.py
27 | 
28 | 


--------------------------------------------------------------------------------
/cdk.json:
--------------------------------------------------------------------------------
1 | {
2 |   "app": "python3 app.py",
3 |   "context": {
4 |     "aws-cdk:enableDiffNoFail": "true",
5 |     "@aws-cdk/core:stackRelativeExports": "true",
6 |     "@aws-cdk/aws-ecr-assets:dockerIgnoreSupport": true
7 |   }
8 | }
9 | 


--------------------------------------------------------------------------------
/images/amazon_opensearch_service_monitor_framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-opensearch-service-monitor/608db26db8c2f2e70bb8c0db74364c2a2eb9b4ea/images/amazon_opensearch_service_monitor_framework.png


--------------------------------------------------------------------------------
/images/opensearch_collection_metrics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-opensearch-service-monitor/608db26db8c2f2e70bb8c0db74364c2a2eb9b4ea/images/opensearch_collection_metrics.png


--------------------------------------------------------------------------------
/images/opensearch_collection_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-opensearch-service-monitor/608db26db8c2f2e70bb8c0db74364c2a2eb9b4ea/images/opensearch_collection_overview.png


--------------------------------------------------------------------------------
/images/opensearch_dashboards_list.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-opensearch-service-monitor/608db26db8c2f2e70bb8c0db74364c2a2eb9b4ea/images/opensearch_dashboards_list.png


--------------------------------------------------------------------------------
/images/opensearch_dashboards_login.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-opensearch-service-monitor/608db26db8c2f2e70bb8c0db74364c2a2eb9b4ea/images/opensearch_dashboards_login.png


--------------------------------------------------------------------------------
/images/opensearch_dashboards_select.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-opensearch-service-monitor/608db26db8c2f2e70bb8c0db74364c2a2eb9b4ea/images/opensearch_dashboards_select.png


--------------------------------------------------------------------------------
/images/opensearch_domain_metrics_at_a_glance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-opensearch-service-monitor/608db26db8c2f2e70bb8c0db74364c2a2eb9b4ea/images/opensearch_domain_metrics_at_a_glance.png


--------------------------------------------------------------------------------
/images/opensearch_domain_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-opensearch-service-monitor/608db26db8c2f2e70bb8c0db74364c2a2eb9b4ea/images/opensearch_domain_overview.png


--------------------------------------------------------------------------------
/images/opensearch_monitor_bootstrap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-opensearch-service-monitor/608db26db8c2f2e70bb8c0db74364c2a2eb9b4ea/images/opensearch_monitor_bootstrap.png


--------------------------------------------------------------------------------
/images/opensearch_monitor_bootstrap_cdk.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-opensearch-service-monitor/608db26db8c2f2e70bb8c0db74364c2a2eb9b4ea/images/opensearch_monitor_bootstrap_cdk.png


--------------------------------------------------------------------------------
/images/opensearch_monitor_clone.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-opensearch-service-monitor/608db26db8c2f2e70bb8c0db74364c2a2eb9b4ea/images/opensearch_monitor_clone.png


--------------------------------------------------------------------------------
/images/opensearch_monitor_deploy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-opensearch-service-monitor/608db26db8c2f2e70bb8c0db74364c2a2eb9b4ea/images/opensearch_monitor_deploy.png


--------------------------------------------------------------------------------
/images/opensearch_monitor_destroy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-opensearch-service-monitor/608db26db8c2f2e70bb8c0db74364c2a2eb9b4ea/images/opensearch_monitor_destroy.png


--------------------------------------------------------------------------------
/images/opensearch_monitor_post_deploy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-opensearch-service-monitor/608db26db8c2f2e70bb8c0db74364c2a2eb9b4ea/images/opensearch_monitor_post_deploy.png


--------------------------------------------------------------------------------
/images/opensearch_monitor_post_destroy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-opensearch-service-monitor/608db26db8c2f2e70bb8c0db74364c2a2eb9b4ea/images/opensearch_monitor_post_destroy.png


--------------------------------------------------------------------------------
/opensearch/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/amazon-opensearch-service-monitor/608db26db8c2f2e70bb8c0db74364c2a2eb9b4ea/opensearch/__init__.py


--------------------------------------------------------------------------------
/opensearch/create_alerts.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | echo -e "Starting Dashboards alerts and dashboard generation"
 3 | 
 4 | # It takes some time to get domain created, and gives Access denied error. Adding a sleep of 60 second so as domain gets created before running the POST commands.
 5 | sleep 60;
 6 | curl -s -XGET -u DOMAIN_ADMIN_UNAME:DOMAIN_ADMIN_PW 'https://DOMAIN_ENDPOINT'
 7 | sleep 10;
 8 | 
 9 | # There was a bug in elastic which can't store the origin url when string field is converted to URL type and it is exported to another domain. Workaround is to replace it manually.
10 | # https://github.com/elastic/kibana/issues/63924
11 | InstanceIP=`curl ifconfig.me`
12 | sed -i 's/CHANGE_ORIGIN_URL/'$InstanceIP'/g' /home/ec2-user/assets/export_opensearch_dashboards_V1_0.ndjson
13 | 
14 | # Create backend role to load CW logs using lambda
15 | curl -s -XPATCH -u DOMAIN_ADMIN_UNAME:DOMAIN_ADMIN_PW 'https://DOMAIN_ENDPOINT/_opendistro/_security/api/rolesmapping/all_access' -H 'Content-Type: application/json' -d '[ {"op":"add","path":"/backend_roles","value":["LAMBDA_CW_LOGS_ROLE_ARN"]} ] '
16 | 
17 | # Generate auth for Default Dashboards
18 | curl -XPOST 'https://DOMAIN_ENDPOINT/_dashboards/auth/login' -H "osd-xsrf: true" -H "content-type:application/json" -d '{"username":"DOMAIN_ADMIN_UNAME", "password" : "DOMAIN_ADMIN_PW"} ' -c auth.txt
19 | 
20 | # Load Default Dashboard
21 | curl -XPOST 'https://DOMAIN_ENDPOINT/_dashboards/api/saved_objects/_import' -H "osd-xsrf:true" -b auth.txt --form file=@export_opensearch_dashboards_V1_0.ndjson
22 | 
23 | 
24 | ################# Index Templates and ISM ###################
25 | # Create ISM ploicy to delete data after 366 days
26 | curl -s -XPUT -u DOMAIN_ADMIN_UNAME:DOMAIN_ADMIN_PW "https://DOMAIN_ENDPOINT/_opendistro/_ism/policies/domains" -H 'Content-Type: application/json' -d'{"policy":{"ism_template":{"index_patterns" : ["domains-*", "cwl-*"]},"policy_id":"domains","description":"hot-delete workflow","last_updated_time":1612206385815,"schema_version":1,"error_notification":null,"default_state":"hot","states":[{"name":"hot","actions":[],"transitions":[{"state_name":"delete","conditions":{"min_index_age":"366d"}}]},{"name":"delete","actions":[{"delete":{}}],"transitions":[]}]}}'
27 | 
28 | # Create Template
29 | curl -s -XPUT -u DOMAIN_ADMIN_UNAME:DOMAIN_ADMIN_PW "https://DOMAIN_ENDPOINT/_template/domains" -H 'Content-Type: application/json' -d'{"index_patterns":["domains-*", "cwl-*"],"settings":{"number_of_shards":1,"number_of_replicas":1}}'
30 | 
31 | ################# Pipiline CREATION ###################
32 | curl -s -XPUT -u DOMAIN_ADMIN_UNAME:DOMAIN_ADMIN_PW "https://DOMAIN_ENDPOINT/_ingest/pipeline/search-slow-logs" -H 'Content-Type: application/json' -d'{"description":"Pipeline for parsing opensearch search slow logs in plaintext format.","processors":[{"grok":{"field":"@message","pattern_definitions":{"GREEDYMULTILINE":"(.|\n)*","INDEXNAME":"[a-zA-Z0-9_.-]*"},"patterns":["\\[%{TIMESTAMP_ISO8601:opensearch.slowlog.timestamp}\\]\\[%{LOGLEVEL:opensearch.slowlog.level}\\s*\\]\\[%{DATA:opensearch.slowlog.logger}\\]\\s*\\[%{DATA:opensearch.slowlog.node}\\]\\s*\\[%{DATA:opensearch.slowlog.index.name}\\]\\s*\\[%{DATA:opensearch.slowlog.shard.id:int}]\\stook\\[%{DATA:opensearch.slowlog.took}\\],\\stook_millis\\[%{DATA:opensearch.slowlog.took_millis:float}\\],\\stotal_hits\\[%{DATA:opensearch.slowlog.total_hits:int}\\+\\shits\\]\\,\\stypes\\[%{DATA:opensearch.slowlog.types}\\],\\sstats\\[%{DATA:opensearch.slowlog.stats}\\],\\ssearch_type\\[%{DATA:opensearch.slowlog.search_type}\\],\\stotal_shards\\[%{DATA:opensearch.slowlog.total_shards:int}\\],\\ssource\\[%{GREEDYDATA:opensearch.slowlog.source_query}\\],\\sid\\[%{DATA:opensearch.slowlog.x-opaque-id}\\]","\\[%{TIMESTAMP_ISO8601:opensearch.slowlog.timestamp}\\]\\[%{LOGLEVEL:opensearch.slowlog.level}\\s*\\]\\[%{DATA:opensearch.slowlog.logger}\\]\\s*\\[%{DATA:opensearch.slowlog.node}\\]\\s*\\[%{DATA:opensearch.slowlog.index.name}\\]\\s*\\[%{DATA:opensearch.slowlog.shard.id:int}]\\stook\\[%{DATA:opensearch.slowlog.took}\\],\\stook_millis\\[%{DATA:opensearch.slowlog.took_millis:float}\\],\\stotal_hits\\[%{DATA:opensearch.slowlog.total_hits:int}\\shits\\]\\,\\stypes\\[%{DATA:opensearch.slowlog.types}\\],\\sstats\\[%{DATA:opensearch.slowlog.stats}\\],\\ssearch_type\\[%{DATA:opensearch.slowlog.search_type}\\],\\stotal_shards\\[%{DATA:opensearch.slowlog.total_shards:int}\\],\\ssource\\[%{GREEDYDATA:opensearch.slowlog.source_query}\\],\\sid\\[%{DATA:opensearch.slowlog.x-opaque-id}\\]"]}},{"date":{"field":"search.slowlog.timestamp","target_field":"@timestamp","formats":["yyyy-MM-ddTHH:mm:ss,SSS"],"if":"ctx.event.timezone == null","on_failure":[{"append":{"field":"error.message","value":"{{ _ingest.on_failure_message }}"}}]}},{"date":{"if":"ctx.event.timezone != null","field":"search.slowlog.timestamp","target_field":"@timestamp","formats":["yyyy-MM-ddTHH:mm:ss,SSS"],"timezone":"{{ event.timezone }}","on_failure":[{"append":{"value":"{{ _ingest.on_failure_message }}","field":"error.message"}}]}}],"on_failure":[{"set":{"field":"error.message","value":"{{ _ingest.on_failure_message }}"}}]}'
33 | 
34 | curl -s -XPUT -u DOMAIN_ADMIN_UNAME:DOMAIN_ADMIN_PW "https://DOMAIN_ENDPOINT/_ingest/pipeline/index-slow-logs" -H 'Content-Type: application/json' -d'{"description":"Pipeline for parsing opensearch index slow logs in plaintext format.","processors":[{"grok":{"field":"@message","pattern_definitions":{"GREEDYMULTILINE":"(.|\n)*","INDEXNAME":"[a-zA-Z0-9_.-]*"},"patterns":["\\[%{TIMESTAMP_ISO8601:opensearch.slowlog.timestamp}\\]\\[%{WORD:opensearch.slowlog.level}(%{SPACE})\\]\\[%{DATA:opensearch.slowlog.logger}\\]%{SPACE}\\[%{DATA:opensearch.slowlog.node}\\](%{SPACE})?(\\[%{INDEXNAME:opensearch.slowlog.index.name}\\]\\[%{NUMBER:opensearch.slowlog.shard.id}\\])?(%{SPACE})?(\\[%{INDEXNAME:opensearch.slowlog.index.name}\\/%{DATA:opensearch.slowlog.index.id}\\])?(%{SPACE})?%{SPACE}(took\\[%{DATA:opensearch.slowlog.took}\\],)?%{SPACE}(took_millis\\[%{NUMBER:opensearch.slowlog.took_millis:long}\\],)?%{SPACE}(type\\[%{DATA:opensearch.slowlog.type}\\],)?%{SPACE}(id\\[%{DATA:opensearch.slowlog.id}\\],)?%{SPACE}(routing\\[%{DATA:opensearch.slowlog.routing}\\],)?%{SPACE}(total_hits\\[%{NUMBER:opensearch.slowlog.total_hits:int}\\],)?%{SPACE}(types\\[%{DATA:opensearch.slowlog.types}\\],)?%{SPACE}(stats\\[%{DATA:opensearch.slowlog.stats}\\],)?%{SPACE}(search_type\\[%{DATA:opensearch.slowlog.search_type}\\],)?%{SPACE}(total_shards\\[%{NUMBER:opensearch.slowlog.total_shards:int}\\],)?%{SPACE}(source\\[%{GREEDYMULTILINE:opensearch.slowlog.source_query}\\])?,?%{SPACE}(extra_source\\[%{DATA:opensearch.slowlog.extra_source}\\])?,?"]}},{"json":{"field":"opensearch.slowlog.source_query","target_field":"opensearch.slowlog.source_query_json","on_failure":[{"append":{"field":"error.message","value":"{{ _ingest.on_failure_message }}"}}]}},{"date":{"field":"opensearch.slowlog.timestamp","target_field":"@timestamp","formats":["yyyy-MM-ddTHH:mm:ss,SSS"],"if":"ctx.event.timezone == null","on_failure":[{"append":{"field":"error.message","value":"{{ _ingest.on_failure_message }}"}}]}},{"date":{"if":"ctx.event.timezone != null","field":"opensearch.slowlog.timestamp","target_field":"@timestamp","formats":["yyyy-MM-ddTHH:mm:ss,SSS"],"timezone":"{{ event.timezone }}","on_failure":[{"append":{"value":"{{ _ingest.on_failure_message }}","field":"error.message"}}]}}],"on_failure":[{"set":{"field":"error.message","value":"{{ _ingest.on_failure_message }}"}}]}'
35 | 
36 | curl -s -XPUT -u DOMAIN_ADMIN_UNAME:DOMAIN_ADMIN_PW "https://DOMAIN_ENDPOINT/_ingest/pipeline/application-logs" -H 'Content-Type: application/json' -d'{"description":"Pipeline for parsing the opensearch server log file in plaintext format.","processors":[{"grok":{"field":"@message","pattern_definitions":{"GREEDYMULTILINE":"(.|\n)*","INDEXNAME":"[a-zA-Z0-9_.-]*","GC_ALL":"\\[gc\\]\\[%{NUMBER:opensearch.server.gc.overhead_seq}\\] overhead, spent \\[%{NUMBER:opensearch.server.gc.collection_duration.time:float}%{DATA:opensearch.server.gc.collection_duration.unit}\\] collecting in the last \\[%{NUMBER:opensearch.server.gc.observation_duration.time:float}%{DATA:opensearch.server.gc.observation_duration.unit}\\]","GC_YOUNG":"\\[gc\\]\\[young\\]\\[%{NUMBER:opensearch.server.gc.young.one}\\]\\[%{NUMBER:opensearch.server.gc.young.two}\\]%{SPACE}%{GREEDYMULTILINE:message}","LOG_HEADER":"\\[%{TIMESTAMP_ISO8601:opensearch.server.timestamp}\\]\\[%{LOGLEVEL:log.level}%{SPACE}\\]\\[%{DATA:opensearch.component}%{SPACE}\\](%{SPACE})?(\\[%{DATA:opensearch.node.name}\\])?(%{SPACE})?"},"patterns":["%{LOG_HEADER}%{GC_ALL}","%{LOG_HEADER}%{GC_YOUNG}","%{LOG_HEADER}%{SPACE}((\\[%{INDEXNAME:opensearch.index.name}\\]|\\[%{INDEXNAME:opensearch.index.name}\\/%{DATA:opensearch.index.id}\\]))?%{SPACE}%{GREEDYMULTILINE:message}"]}},{"date":{"target_field":"@timestamp","formats":["yyyy-MM-dd'T'HH:mm:ss,SSS"],"if":"ctx.event.timezone == null","field":"opensearch.server.timestamp","on_failure":[{"append":{"field":"error.message","value":"{{ _ingest.on_failure_message }}"}}]}},{"date":{"formats":["yyyy-MM-dd'T'HH:mm:ss,SSS"],"timezone":"{{ event.timezone }}","if":"ctx.event.timezone != null","field":"opensearch.server.timestamp","target_field":"@timestamp","on_failure":[{"append":{"value":"{{ _ingest.on_failure_message }}","field":"error.message"}}]}}],"on_failure":[{"set":{"field":"error.message","value":"{{ _ingest.on_failure_message }}"}}]}'
37 | 
38 | ################# ALERTS CREATION ###################
39 | 
40 | # Create Destination for E-mail alert
41 | destination_id=`curl -s -XPOST -u DOMAIN_ADMIN_UNAME:DOMAIN_ADMIN_PW 'https://DOMAIN_ENDPOINT/_opendistro/_alerting/destinations' -H 'Content-Type: application/json' -d'{"name":"cdk_monitoring_email","type":"sns","sns":{"role_arn":"SNS_ROLE_ARN","topic_arn":"SNS_TOPIC_ARN"}}' | jq -r '._id'`
42 | 
43 | # Create Monitor for Cluster Status Yellow and send an alert if its yellow for last 30 mins
44 | curl -s -XPOST -u DOMAIN_ADMIN_UNAME:DOMAIN_ADMIN_PW 'https://DOMAIN_ENDPOINT/_opendistro/_alerting/monitors' -H 'Content-Type: application/json' -d'{"type":"monitor","name":"cluster_health_yellow","enabled":true,"schedule":{"period":{"interval":30,"unit":"MINUTES"}},"inputs":[{"search":{"indices":["domains-*"],"query":{"size":0,"aggs":{"domain_yellow":{"terms":{"field":"domain_name.keyword","size":25}}},"query":{"bool":{"filter":[{"range":{"@timestamp":{"from":"{{period_end}}||-30m","to":"{{period_end}}","include_lower":true,"include_upper":true,"format":"epoch_millis","boost":1}}},{"term":{"ClusterStatus.yellow":{"value":1,"boost":1}}}]}}}}}],"triggers":[{"name":"cluster_health_yellow","severity":"3","condition":{"script":{"source":"ctx.results[0].hits.total.value > 0","lang":"painless"}},"actions":[{"name":"cluster_health_yellow_alert","destination_id":"'$destination_id'","message_template":{"source":"One or more of your cluster health has been turned into YELLOW between {{ctx.periodStart}} and {{ctx.periodEnd}}, Please find below details about the domain for further actions and troubleshooting. \n\n- Severity: {{ctx.trigger.severity}}\n- Domain names: \n  {{#ctx.results.0.aggregations.domain_yellow.buckets}} \n    {{key}} https://'$InstanceIP'/_dashboards/app/dashboards#/view/19087650-454f-11eb-87ad-632020bc8bdf?_a=(query:(language:kuery,query:%27domain_name%20:%20%22{{key}}%22%27)) \n{{/ctx.results.0.aggregations.domain_yellow.buckets}}","lang":"mustache"},"throttle_enabled":false,"subject_template":{"source":"cluster_health_yellow_alert"}}]}]}'
45 | 
46 | 
47 | 
48 | 
49 | # Create Monitor for Cluster Status Red and send an alert if its RED every 5 mins
50 | curl -s -XPOST -u DOMAIN_ADMIN_UNAME:DOMAIN_ADMIN_PW 'https://DOMAIN_ENDPOINT/_opendistro/_alerting/monitors' -H 'Content-Type: application/json' -d'{"type":"monitor","name":"cluster_health_red","enabled":true,"schedule":{"period":{"interval":5,"unit":"MINUTES"}},"inputs":[{"search":{"indices":["domains-*"],"query":{"size":0,"aggs":{"domain_red":{"terms":{"field":"domain_name.keyword","size":25}}},"query":{"bool":{"filter":[{"range":{"@timestamp":{"from":"{{period_end}}||-5m","to":"{{period_end}}","include_lower":true,"include_upper":true,"format":"epoch_millis","boost":1}}},{"term":{"ClusterStatus.red":{"value":1,"boost":1}}}]}}}}}],"triggers":[{"name":"cluster_health_red","severity":"1","condition":{"script":{"source":"ctx.results[0].hits.total.value > 0","lang":"painless"}},"actions":[{"name":"cluster_health_red_alert","destination_id":"'$destination_id'","message_template":{"source":"One or more of your cluster health has been turned into RED between {{ctx.periodStart}} and {{ctx.periodEnd}}, Please find below details about the domain for further actions and troubleshooting. \n\n- Severity: {{ctx.trigger.severity}}\n- Domain names: \n  {{#ctx.results.0.aggregations.domain_red.buckets}} \n    {{key}} https://'$InstanceIP'/_dashboards/app/dashboards#/view/19087650-454f-11eb-87ad-632020bc8bdf?_a=(query:(language:kuery,query:%27domain_name%20:%20%22{{key}}%22%27)) \n{{/ctx.results.0.aggregations.domain_red.buckets}}","lang":"mustache"},"throttle_enabled":false,"subject_template":{"source":"cluster_health_red_alert"}}]}]}'
51 | 
52 | 
53 | # Create Monitor for Cluster when ClusterIndexWritesBlocked is 1 for last 5 mins
54 | curl -s -XPOST -u DOMAIN_ADMIN_UNAME:DOMAIN_ADMIN_PW 'https://DOMAIN_ENDPOINT/_opendistro/_alerting/monitors' -H 'Content-Type: application/json' -d'{"type":"monitor","name":"ClusterIndexWritesBlocked","enabled":true,"schedule":{"period":{"interval":5,"unit":"MINUTES"}},"inputs":[{"search":{"indices":["domains-*"],"query":{"size":0,"aggs":{"ClusterIndexWritesBlocked":{"terms":{"field":"domain_name.keyword","size":25}}},"query":{"bool":{"filter":[{"range":{"@timestamp":{"from":"{{period_end}}||-5m","to":"{{period_end}}","include_lower":true,"include_upper":true,"format":"epoch_millis","boost":1}}},{"term":{"ClusterIndexWritesBlocked":{"value":1,"boost":1}}}]}}}}}],"triggers":[{"name":"ClusterIndexWritesBlocked","severity":"1","condition":{"script":{"source":"ctx.results[0].hits.total.value > 0","lang":"painless"}},"actions":[{"name":"ClusterIndexWritesBlocked_Alert","destination_id":"'$destination_id'","message_template":{"source":"One or more of your ClusterIndexWritesBlocked has been turned into 1 between {{ctx.periodStart}} and {{ctx.periodEnd}}, Please find below details about the domain for further actions and troubleshooting. \n\n- Severity: {{ctx.trigger.severity}}\n- Domain names: \n  {{#ctx.results.0.aggregations.ClusterIndexWritesBlocked.buckets}} \n    {{key}} https://'$InstanceIP'/_dashboards/app/dashboards#/view/19087650-454f-11eb-87ad-632020bc8bdf?_a=(query:(language:kuery,query:%27domain_name%20:%20%22{{key}}%22%27)) \n{{/ctx.results.0.aggregations.ClusterIndexWritesBlocked.buckets}}","lang":"mustache"},"throttle_enabled":false,"subject_template":{"source":"ClusterIndexWritesBlocked_Alert"}}]}]}'
55 | 
56 | 
57 | # Create Monitor for Cluster when AutomatedSnapshotFailure is 1 for last 5 mins
58 | curl -s -XPOST -u DOMAIN_ADMIN_UNAME:DOMAIN_ADMIN_PW 'https://DOMAIN_ENDPOINT/_opendistro/_alerting/monitors' -H 'Content-Type: application/json' -d'{"type":"monitor","name":"AutomatedSnapshotFailure","enabled":true,"schedule":{"period":{"interval":5,"unit":"MINUTES"}},"inputs":[{"search":{"indices":["domains-*"],"query":{"size":0,"aggs":{"AutomatedSnapshotFailure":{"terms":{"field":"domain_name.keyword","size":25}}},"query":{"bool":{"filter":[{"range":{"@timestamp":{"from":"{{period_end}}||-5m","to":"{{period_end}}","include_lower":true,"include_upper":true,"format":"epoch_millis","boost":1}}},{"term":{"AutomatedSnapshotFailure":{"value":1,"boost":1}}}]}}}}}],"triggers":[{"name":"AutomatedSnapshotFailure","severity":"1","condition":{"script":{"source":"ctx.results[0].hits.total.value > 0","lang":"painless"}},"actions":[{"name":"AutomatedSnapshotFailure_Alert","destination_id":"'$destination_id'","message_template":{"source":"One or more of your AutomatedSnapshotFailure has been turned into 1 between {{ctx.periodStart}} and {{ctx.periodEnd}}, Please find below details about the domain for further actions and troubleshooting. \n\n- Severity: {{ctx.trigger.severity}}\n- Domain names: \n  {{#ctx.results.0.aggregations.AutomatedSnapshotFailure.buckets}} \n    {{key}} https://'$InstanceIP'/_dashboards/app/dashboards#/view/19087650-454f-11eb-87ad-632020bc8bdf?_a=(query:(language:kuery,query:%27domain_name%20:%20%22{{key}}%22%27)) \n{{/ctx.results.0.aggregations.AutomatedSnapshotFailure.buckets}}","lang":"mustache"},"throttle_enabled":false,"subject_template":{"source":"AutomatedSnapshotFailure_Alert"}}]}]}'
59 | 
60 | 
61 | # Create Monitor for Cluster when KibanaHealthyNodes is 0 for last 15 mins
62 | curl -s -XPOST -u DOMAIN_ADMIN_UNAME:DOMAIN_ADMIN_PW 'https://DOMAIN_ENDPOINT/_opendistro/_alerting/monitors' -H 'Content-Type: application/json' -d'{"type":"monitor","name":"KibanaHealthyNodes","enabled":true,"schedule":{"period":{"interval":15,"unit":"MINUTES"}},"inputs":[{"search":{"indices":["domains-*"],"query":{"size":0,"aggs":{"KibanaHealthyNodes":{"terms":{"field":"domain_name.keyword","size":25}}},"query":{"bool":{"filter":[{"range":{"@timestamp":{"from":"{{period_end}}||-15m","to":"{{period_end}}","include_lower":true,"include_upper":true,"format":"epoch_millis","boost":1}}},{"term":{"KibanaHealthyNodes":{"value":0,"boost":1}}}]}}}}}],"triggers":[{"name":"KibanaHealthyNodes","severity":"1","condition":{"script":{"source":"ctx.results[0].hits.total.value > 0","lang":"painless"}},"actions":[{"name":"KibanaHealthyNodes_Alert","destination_id":"'$destination_id'","message_template":{"source":"One or more of your KibanaHealthyNodes has been turned into 0 between {{ctx.periodStart}} and {{ctx.periodEnd}}, Please find below details about the domain for further actions and troubleshooting. \n\n- Severity: {{ctx.trigger.severity}}\n- Domain names: \n  {{#ctx.results.0.aggregations.KibanaHealthyNodes.buckets}} \n    {{key}} https://'$InstanceIP'/_dashboards/app/dashboards#/view/19087650-454f-11eb-87ad-632020bc8bdf?_a=(query:(language:kuery,query:%27domain_name%20:%20%22{{key}}%22%27)) \n{{/ctx.results.0.aggregations.KibanaHealthyNodes.buckets}}","lang":"mustache"},"throttle_enabled":false,"subject_template":{"source":"KibanaHealthyNodes_Alert"}}]}]}'
63 | 
64 | 
65 | # Create Monitor for Cluster when OpenSearchDashboardsHealthyNode is 0 for last 15 mins
66 | curl -s -XPOST -u DOMAIN_ADMIN_UNAME:DOMAIN_ADMIN_PW 'https://DOMAIN_ENDPOINT/_opendistro/_alerting/monitors' -H 'Content-Type: application/json' -d'{"type":"monitor","name":"OpenSearchDashboardsHealthyNode","enabled":true,"schedule":{"period":{"interval":15,"unit":"MINUTES"}},"inputs":[{"search":{"indices":["domains-*"],"query":{"size":0,"aggs":{"OpenSearchDashboardsHealthyNode":{"terms":{"field":"domain_name.keyword","size":25}}},"query":{"bool":{"filter":[{"range":{"@timestamp":{"from":"{{period_end}}||-15m","to":"{{period_end}}","include_lower":true,"include_upper":true,"format":"epoch_millis","boost":1}}},{"term":{"OpenSearchDashboardsHealthyNode":{"value":0,"boost":1}}}]}}}}}],"triggers":[{"name":"OpenSearchDashboardsHealthyNode","severity":"1","condition":{"script":{"source":"ctx.results[0].hits.total.value > 0","lang":"painless"}},"actions":[{"name":"OpenSearchDashboardsHealthyNode","destination_id":"'$destination_id'","message_template":{"source":"One or more of your OpenSearchDashboardsHealthyNode has been turned into 0 between {{ctx.periodStart}} and {{ctx.periodEnd}}, Please find below details about the domain for further actions and troubleshooting. \n\n- Severity: {{ctx.trigger.severity}}\n- Domain names: \n  {{#ctx.results.0.aggregations.OpenSearchDashboardsHealthyNode.buckets}} \n    {{key}} https://'$InstanceIP'/_dashboards/app/dashboards#/view/19087650-454f-11eb-87ad-632020bc8bdf?_a=(query:(language:kuery,query:%27domain_name%20:%20%22{{key}}%22%27)) \n{{/ctx.results.0.aggregations.OpenSearchDashboardsHealthyNode.buckets}}","lang":"mustache"},"throttle_enabled":false,"subject_template":{"source":"OpenSearchDashboardsHealthyNode_Alert"}}]}]}'
67 | 
68 | 
69 | 
70 | # Create Monitor for Cluster when InvalidHostHeaderRequests is 1 for last 15 mins
71 | curl -s -XPOST -u DOMAIN_ADMIN_UNAME:DOMAIN_ADMIN_PW 'https://DOMAIN_ENDPOINT/_opendistro/_alerting/monitors' -H 'Content-Type: application/json' -d'{"type":"monitor","name":"InvalidHostHeaderRequests","enabled":true,"schedule":{"period":{"interval":15,"unit":"MINUTES"}},"inputs":[{"search":{"indices":["domains-*"],"query":{"size":0,"aggs":{"InvalidHostHeaderRequests":{"terms":{"field":"domain_name.keyword","size":25}}},"query":{"bool":{"filter":[{"range":{"@timestamp":{"from":"{{period_end}}||-15m","to":"{{period_end}}","include_lower":true,"include_upper":true,"format":"epoch_millis","boost":1}}},{"term":{"InvalidHostHeaderRequests":{"value":1,"boost":1}}}]}}}}}],"triggers":[{"name":"InvalidHostHeaderRequests","severity":"1","condition":{"script":{"source":"ctx.results[0].hits.total.value > 0","lang":"painless"}},"actions":[{"name":"InvalidHostHeaderRequests_Alert","destination_id":"'$destination_id'","message_template":{"source":"One or more of your InvalidHostHeaderRequests has been turned into 1 between {{ctx.periodStart}} and {{ctx.periodEnd}}, Please find below details about the domain for further actions and troubleshooting. \n\n- Severity: {{ctx.trigger.severity}}\n- Domain names: \n  {{#ctx.results.0.aggregations.InvalidHostHeaderRequests.buckets}} \n    {{key}} https://'$InstanceIP'/_dashboards/app/dashboards#/view/19087650-454f-11eb-87ad-632020bc8bdf?_a=(query:(language:kuery,query:%27domain_name%20:%20%22{{key}}%22%27)) \n{{/ctx.results.0.aggregations.InvalidHostHeaderRequests.buckets}}","lang":"mustache"},"throttle_enabled":false,"subject_template":{"source":"InvalidHostHeaderRequests_Alert"}}]}]}'
72 | 
73 | 
74 | # Create Monitor for Cluster when CPUUtilization is > 80% in last 15 mins
75 | curl -s -XPOST -u DOMAIN_ADMIN_UNAME:DOMAIN_ADMIN_PW 'https://DOMAIN_ENDPOINT/_opendistro/_alerting/monitors' -H 'Content-Type: application/json' -d'{"type":"monitor","name":"CPUUtilization","enabled":true,"schedule":{"period":{"interval":15,"unit":"MINUTES"}},"inputs":[{"search":{"indices":["domains-*"],"query":{"size":0,"aggs":{"CPUUtilization":{"terms":{"field":"domain_name.keyword","size":25}}},"query":{"bool":{"filter":[{"range":{"@timestamp":{"from":"{{period_end}}||-15m","to":"{{period_end}}","include_lower":true,"include_upper":true,"format":"epoch_millis","boost":1}}},{"range":{"CPUUtilization":{"gte":80}}}]}}}}}],"triggers":[{"name":"CPUUtilization","severity":"1","condition":{"script":{"source":"ctx.results[0].hits.total.value > 0","lang":"painless"}},"actions":[{"name":"CPUUtilization_Alert","destination_id":"'$destination_id'","message_template":{"source":"CPU of one or more of your Domain has reached 80% between {{ctx.periodStart}} and {{ctx.periodEnd}}, Please find below details about the domain for further actions and troubleshooting. \n\n- Severity: {{ctx.trigger.severity}}\n- Domain names: \n  {{#ctx.results.0.aggregations.CPUUtilization.buckets}} \n    {{key}} https://'$InstanceIP'/_dashboards/app/dashboards#/view/19087650-454f-11eb-87ad-632020bc8bdf?_a=(query:(language:kuery,query:%27domain_name%20:%20%22{{key}}%22%27)) \n{{/ctx.results.0.aggregations.CPUUtilization.buckets}}","lang":"mustache"},"throttle_enabled":false,"subject_template":{"source":"CPUUtilization_Alert"}}]}]}'
76 | 
77 | 
78 | # Create Monitor for Cluster when JVMMemoryPressure is > 80% in last 5 mins
79 | curl -s -XPOST -u DOMAIN_ADMIN_UNAME:DOMAIN_ADMIN_PW 'https://DOMAIN_ENDPOINT/_opendistro/_alerting/monitors' -H 'Content-Type: application/json' -d'{"type":"monitor","name":"JVMMemoryPressure","enabled":true,"schedule":{"period":{"interval":5,"unit":"MINUTES"}},"inputs":[{"search":{"indices":["domains-*"],"query":{"size":0,"aggs":{"JVMMemoryPressure":{"terms":{"field":"domain_name.keyword","size":25}}},"query":{"bool":{"filter":[{"range":{"@timestamp":{"from":"{{period_end}}||-5m","to":"{{period_end}}","include_lower":true,"include_upper":true,"format":"epoch_millis","boost":1}}},{"range":{"JVMMemoryPressure":{"gte":80}}}]}}}}}],"triggers":[{"name":"JVMMemoryPressure","severity":"1","condition":{"script":{"source":"ctx.results[0].hits.total.value > 0","lang":"painless"}},"actions":[{"name":"JVMMemoryPressure_Alert","destination_id":"'$destination_id'","message_template":{"source":"JVM of one or more of your Domain has reached 80% between {{ctx.periodStart}} and {{ctx.periodEnd}}, Please find below details about the domain for further actions and troubleshooting. \n\n- Severity: {{ctx.trigger.severity}}\n- Domain names: \n  {{#ctx.results.0.aggregations.JVMMemoryPressure.buckets}} \n    {{key}} https://'$InstanceIP'/_dashboards/app/dashboards#/view/19087650-454f-11eb-87ad-632020bc8bdf?_a=(query:(language:kuery,query:%27domain_name%20:%20%22{{key}}%22%27)) \n{{/ctx.results.0.aggregations.JVMMemoryPressure.buckets}}","lang":"mustache"},"throttle_enabled":false,"subject_template":{"source":"JVMMemoryPressure_Alert"}}]}]}'
80 | 
81 | 
82 | echo -e "Completed Dashboards alerts and dashboard generation"
83 | 
84 | 


--------------------------------------------------------------------------------
/opensearch/nginx_opensearch.conf:
--------------------------------------------------------------------------------
 1 | server {
 2 |     listen 443;
 3 |     server_name $host;
 4 |     rewrite ^/$ https://$host/_dashboards redirect;
 5 | 
 6 |     # openssl req -x509 -nodes -days 365 -newkey rsa:2048 -keyout /etc/nginx/cert.key -out /etc/nginx/cert.crt -subj /C=US/ST=./L=./O=./CN=.\n
 7 |     ssl_certificate           /etc/nginx/cert.crt;
 8 |     ssl_certificate_key       /etc/nginx/cert.key;
 9 | 
10 |     ssl on;
11 |     ssl_session_cache  builtin:1000  shared:SSL:10m;
12 |     ssl_protocols  TLSv1 TLSv1.1 TLSv1.2;
13 |     ssl_ciphers HIGH:!aNULL:!eNULL:!EXPORT:!CAMELLIA:!DES:!MD5:!PSK:!RC4;
14 |     ssl_prefer_server_ciphers on;
15 | 
16 | 
17 |     location ^~ /_dashboards {
18 |         # Forward requests to OpenSearch Dashboards
19 |         proxy_pass https://DOMAIN_ENDPOINT/_dashboards;
20 | 
21 |         # Update cookie domain and path
22 |         proxy_cookie_domain DOMAIN_ENDPOINT $host;
23 | 
24 |         proxy_set_header Accept-Encoding "";
25 |         sub_filter_types *;
26 |         sub_filter DOMAIN_ENDPOINT $host;
27 |         sub_filter_once off;
28 | 
29 |         # Response buffer settings
30 |         proxy_buffer_size 128k;
31 |         proxy_buffers 4 256k;
32 |         proxy_busy_buffers_size 256k;
33 |     }
34 | 
35 | }


--------------------------------------------------------------------------------
/opensearch/opensearch_monitor_stack.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  3 | SPDX-License-Identifier: MIT-0
  4 | '''
  5 | 
  6 | from aws_cdk import (
  7 |     aws_dynamodb as ddb,
  8 |     aws_opensearchservice as opensearch,
  9 |     aws_events as events,
 10 |     aws_events_targets as targets,
 11 |     aws_iam as iam,
 12 |     aws_lambda as lambda_,
 13 |     aws_ec2 as ec2,
 14 |     aws_sns as sns,
 15 |     aws_sns_subscriptions as subscriptions,
 16 |     Aws, CfnOutput, Stack, RemovalPolicy, SecretValue, Duration
 17 | )
 18 | from aws_cdk.aws_s3_assets import Asset
 19 | from constructs import Construct
 20 | import boto3
 21 | import fileinput
 22 | import json
 23 | import os
 24 | import random
 25 | import string
 26 | import sys
 27 | 
 28 | # Jump host specific settings, change key name if you need an existing key to be used
 29 | EC2_KEY_NAME = 'amazon_opensearch_monitoring'
 30 | EC2_INSTANCE_TYPE = 't3.nano'
 31 | 
 32 | # Fill this in with a valid email to receive SNS notifications.
 33 | SNS_NOTIFICATION_EMAIL = 'user@example.com'
 34 | 
 35 | # Lambda Interval Settings (seconds)
 36 | LAMBDA_INTERVAL = 300
 37 | 
 38 | # OpenSearch and Dashboards specific constants 
 39 | DOMAIN_NAME = 'amazon-opensearch-monitor'
 40 | DOMAIN_ADMIN_UNAME = 'opensearch'
 41 | DOMAIN_ADMIN_PW = ''.join(
 42 |     random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for i in range(13)) + random.choice(
 43 |     string.ascii_lowercase) + random.choice(string.ascii_uppercase) + random.choice(string.digits) + "!"
 44 | DOMAIN_DATA_NODE_INSTANCE_TYPE = 'm6g.large.search'
 45 | DOMAIN_DATA_NODE_INSTANCE_COUNT = 2
 46 | DOMAIN_INSTANCE_VOLUME_SIZE = 100
 47 | DOMAIN_AZ_COUNT = 2
 48 | 
 49 | # Excluded regions ap-east-1, af-south-1, eu-south-1, and the me-south-1 as they are not enabled by default,
 50 | # change this if those are enabled in your account
 51 | # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html#concepts-available-regions
 52 | REGIONS_TO_MONITOR = '["us-east-1", "us-east-2", "us-west-1", "us-west-2", "ap-south-1", "ap-northeast-1", "ap-northeast-2", "ap-southeast-1", "ap-southeast-2", "ca-central-1", "eu-central-1", "eu-west-1", "eu-west-2", "eu-west-3", "eu-north-1", "sa-east-1"]'
 53 | # Set REGIONS_TO_MONITOR in setupCWSubscriptionFilter.py
 54 | for line in fileinput.input("opensearch/setupCWSubscriptionFilter.py", inplace=True):
 55 |     if line.strip().startswith('REGIONS_TO_MONITOR='):
 56 |         line = 'REGIONS_TO_MONITOR=\'' + REGIONS_TO_MONITOR + '\'\n'
 57 |     sys.stdout.write(line)
 58 | 
 59 | SERVERLESS_REGIONS_TO_MONITOR = '["us-east-1", "us-east-2", "us-west-2", "ap-northeast-1", "ap-southeast-1", "ap-southeast-2", "eu-central-1", "eu-west-1"]'
 60 | 
 61 | # By default monitoring stack will be setup without dedicated master node, to have dedicated master node in stack
 62 | # do change the number of nodes and type (if needed) # Maximum Master Instance count supported by service is 5,
 63 | # so either have 3 or 5 dedicated node for master
 64 | DOMAIN_MASTER_NODE_INSTANCE_TYPE = 'c6g.large.search'
 65 | DOMAIN_MASTER_NODE_INSTANCE_COUNT = 0
 66 | 
 67 | ## To enable UW, please make master node count as 3 or 5, and UW node count as minimum 2
 68 | ## Also change data node to be non T2/T3 as UW does not support T2/T3 as data nodes
 69 | DOMAIN_UW_NODE_INSTANCE_TYPE = 'ultrawarm1.medium.search'
 70 | DOMAIN_UW_NODE_INSTANCE_COUNT = 0
 71 | 
 72 | # DDB settings
 73 | TABLE_NAME = 'timestamps'
 74 | 
 75 | 
 76 | class OpenSearchMonitor(Stack):
 77 |     def __init__(self, scope: Construct, id: str, **kwargs) -> None:
 78 |         super().__init__(scope, id, **kwargs)
 79 | 
 80 |         ################################################################################
 81 |         # VPC
 82 |         vpc = ec2.Vpc(self, "Monitoring VPC", max_azs=3)
 83 | 
 84 |         ################################################################################
 85 |         # Amazon OpenSearch Service domain
 86 |         es_sec_grp = ec2.SecurityGroup(self, 'OpenSearchSecGrpMonitoring',
 87 |                                        vpc=vpc,
 88 |                                        allow_all_outbound=True,
 89 |                                        security_group_name='OpenSearchSecGrpMonitoring')
 90 |         es_sec_grp.add_ingress_rule(ec2.Peer.any_ipv4(), ec2.Port.tcp(80))
 91 |         es_sec_grp.add_ingress_rule(ec2.Peer.any_ipv4(), ec2.Port.tcp(443))
 92 | 
 93 |         vpc_subnets = ec2.SubnetSelection(
 94 |             subnet_type=ec2.SubnetType.PUBLIC
 95 |         )
 96 |         domain = opensearch.Domain(self, 'opensearch-service-monitor',
 97 |                                    version=opensearch.EngineVersion.OPENSEARCH_2_3,  # Upgrade when CDK upgrades
 98 |                                    domain_name=DOMAIN_NAME,
 99 |                                    removal_policy=RemovalPolicy.DESTROY,
100 |                                    capacity=opensearch.CapacityConfig(
101 |                                        data_node_instance_type=DOMAIN_DATA_NODE_INSTANCE_TYPE,
102 |                                        data_nodes=DOMAIN_DATA_NODE_INSTANCE_COUNT,
103 |                                        master_node_instance_type=DOMAIN_MASTER_NODE_INSTANCE_TYPE,
104 |                                        master_nodes=DOMAIN_MASTER_NODE_INSTANCE_COUNT,
105 |                                        warm_instance_type=DOMAIN_UW_NODE_INSTANCE_TYPE,
106 |                                        warm_nodes=DOMAIN_UW_NODE_INSTANCE_COUNT
107 |                                    ),
108 |                                    ebs=opensearch.EbsOptions(
109 |                                        enabled=True,
110 |                                        volume_size=DOMAIN_INSTANCE_VOLUME_SIZE,
111 |                                        volume_type=ec2.EbsDeviceVolumeType.GP2
112 |                                    ),
113 |                                    vpc=vpc,
114 |                                    vpc_subnets=[vpc_subnets],
115 |                                    security_groups=[es_sec_grp],
116 |                                    zone_awareness=opensearch.ZoneAwarenessConfig(
117 |                                        enabled=True,
118 |                                        availability_zone_count=DOMAIN_AZ_COUNT
119 |                                    ),
120 |                                    enforce_https=True,
121 |                                    node_to_node_encryption=True,
122 |                                    encryption_at_rest={
123 |                                        "enabled": True
124 |                                    },
125 |                                    use_unsigned_basic_auth=True,
126 |                                    fine_grained_access_control={
127 |                                        "master_user_name": DOMAIN_ADMIN_UNAME,
128 |                                        "master_user_password": SecretValue.unsafe_plain_text(DOMAIN_ADMIN_PW)
129 |                                    }
130 |                                    )
131 | 
132 |         CfnOutput(self, "MasterUser",
133 |                   value=DOMAIN_ADMIN_UNAME,
134 |                   description="Master User Name for Amazon OpenSearch Service")
135 | 
136 |         CfnOutput(self, "MasterPW",
137 |                   value=DOMAIN_ADMIN_PW,
138 |                   description="Master User Password for Amazon OpenSearch Service")
139 | 
140 |         ################################################################################
141 |         # Dynamo DB table for time stamp tracking
142 |         table = ddb.Table(self, 'opensearch-monitor-lambda-timestamp',
143 |                           table_name=TABLE_NAME,
144 |                           partition_key=ddb.Attribute(
145 |                               name="domain",
146 |                               type=ddb.AttributeType.STRING
147 |                           ),
148 |                           sort_key=ddb.Attribute(
149 |                               name='region',
150 |                               type=ddb.AttributeType.STRING
151 |                           ),
152 |                           removal_policy=RemovalPolicy.DESTROY
153 |                           )
154 | 
155 |         ################################################################################
156 |         # define a Lambda Layer for boto3
157 |         boto3_lambda_layer = lambda_.LayerVersion(
158 |             self, 'Boto3LambdaLayer',
159 |             code=lambda_.AssetCode('boto3-layer/'),
160 |             compatible_runtimes=[lambda_.Runtime.PYTHON_3_8]
161 |         )
162 | 
163 |         # Lambda monitoring function
164 |         lambda_func = lambda_.Function(
165 |             self, 'CWMetricsToOpenSearch',
166 |             function_name="CWMetricsToOpenSearch_monitoring",
167 |             runtime=lambda_.Runtime.PYTHON_3_8,
168 |             code=lambda_.Code.from_asset('CWMetricsToOpenSearch'),
169 |             handler='handler.handler',
170 |             memory_size=1024,
171 |             layers=[boto3_lambda_layer],
172 |             timeout=Duration.minutes(10),
173 |             vpc=vpc
174 |         )
175 | 
176 |         table.grant_read_data(lambda_func)
177 |         table.grant_write_data(lambda_func)
178 |         lambda_func.add_environment('TABLE', table.table_name)
179 |         lambda_func.add_environment('DOMAIN_ENDPOINT', 'https://' + domain.domain_endpoint)
180 |         lambda_func.add_environment('DOMAIN_ADMIN_UNAME', DOMAIN_ADMIN_UNAME)
181 |         lambda_func.add_environment('DOMAIN_ADMIN_PW', DOMAIN_ADMIN_PW)
182 |         lambda_func.add_environment('REGIONS', REGIONS_TO_MONITOR)
183 |         lambda_func.add_environment('SERVERLESS_REGIONS', SERVERLESS_REGIONS_TO_MONITOR)
184 | 
185 |         # When the domain is created here, restrict access
186 |         lambda_func.add_to_role_policy(iam.PolicyStatement(actions=['es:*'],
187 |                                                            resources=['*']))
188 | 
189 |         # The function needs to read CW events. Restrict
190 |         lambda_func.add_to_role_policy(iam.PolicyStatement(actions=['cloudwatch:*'],
191 |                                                            resources=['*']))
192 | 
193 |         # The function needs to read CW events. Restrict
194 |         lambda_func.add_to_role_policy(iam.PolicyStatement(actions=['aoss:*'],
195 |                                                            resources=['*']))
196 | 
197 |         lambda_schedule = events.Schedule.rate(Duration.seconds(LAMBDA_INTERVAL))
198 |         event_lambda_target = targets.LambdaFunction(handler=lambda_func)
199 |         events.Rule(
200 |             self,
201 |             "Monitoring",
202 |             enabled=True,
203 |             schedule=lambda_schedule,
204 |             targets=[event_lambda_target])
205 | 
206 |         ################################################################################
207 |         # Lambda for CW Logs
208 |         lambda_func_cw_logs = lambda_.Function(
209 |             self, 'CWLogsToOpenSearch',
210 |             function_name="CWLogsToOpenSearch_monitoring",
211 |             runtime=lambda_.Runtime.NODEJS_18_X,
212 |             code=lambda_.Code.from_asset('CWLogsToOpenSearch'),
213 |             handler='index.handler',
214 |             vpc=vpc
215 |         )
216 | 
217 |         # # Load Amazon OpenSearch Service Domain to env variable
218 |         lambda_func_cw_logs.add_environment('DOMAIN_ENDPOINT', domain.domain_endpoint)
219 | 
220 |         # # When the domain is created here, restrict access
221 |         lambda_func_cw_logs.add_to_role_policy(iam.PolicyStatement(actions=['es:*'],
222 |                                                                    resources=['*']))
223 | 
224 |         # # The function needs to read CW Logs. Restrict
225 |         lambda_func_cw_logs.add_to_role_policy(iam.PolicyStatement(actions=['logs:*'],
226 |                                                                    resources=['*']))
227 | 
228 |         # Add permission to create CW logs trigger for all specified region and current account, as region does not have an option to be wildcard
229 |         account_id = boto3.client("sts").get_caller_identity()["Account"]
230 |         for region in json.loads(REGIONS_TO_MONITOR):
231 |             lambda_func_cw_logs.add_permission(
232 |                 id="lambda-cw-logs-permission-" + region,
233 |                 principal=iam.ServicePrincipal("logs.amazonaws.com"),
234 |                 action="lambda:InvokeFunction",
235 |                 source_arn="arn:aws:logs:" + region + ":" + account_id + ":*:*:*"
236 |             )
237 | 
238 |         ################################################################################
239 |         # Jump host for SSH tunneling and direct access
240 |         sn_public = ec2.SubnetSelection(subnet_type=ec2.SubnetType.PUBLIC)
241 | 
242 |         amzn_linux = ec2.MachineImage.latest_amazon_linux(
243 |             generation=ec2.AmazonLinuxGeneration.AMAZON_LINUX_2,
244 |             edition=ec2.AmazonLinuxEdition.STANDARD,
245 |             virtualization=ec2.AmazonLinuxVirt.HVM,
246 |             storage=ec2.AmazonLinuxStorage.GENERAL_PURPOSE
247 |         )
248 | 
249 |         # Instance Role and SSM Managed Policy
250 |         role = iam.Role(self, "InstanceSSM", assumed_by=iam.ServicePrincipal("ec2.amazonaws.com"))
251 |         role.add_managed_policy(iam.ManagedPolicy.from_aws_managed_policy_name("service-role/AmazonEC2RoleforSSM"))
252 |         role.add_managed_policy(iam.ManagedPolicy.from_aws_managed_policy_name("AmazonSSMManagedInstanceCore"))
253 | 
254 |         instance = ec2.Instance(self, 'instance',
255 |                                 instance_type=ec2.InstanceType(EC2_INSTANCE_TYPE),
256 |                                 vpc=vpc,
257 |                                 machine_image=amzn_linux,
258 |                                 vpc_subnets=sn_public,
259 |                                 role=role,
260 |                                 )
261 |         instance.connections.allow_from_any_ipv4(ec2.Port.tcp(22), 'SSH')
262 |         instance.connections.allow_from_any_ipv4(ec2.Port.tcp(443), 'HTTPS')
263 | 
264 |         stmt = iam.PolicyStatement(actions=['es:*'],
265 |                                    resources=[domain.domain_arn])
266 |         instance.add_to_role_policy(stmt)
267 | 
268 |         # Create SNS topic, subscription, IAM roles, Policies
269 |         sns_topic = sns.Topic(self, "cdk_monitoring_topic")
270 | 
271 |         sns_topic.add_subscription(subscriptions.EmailSubscription(SNS_NOTIFICATION_EMAIL))
272 | 
273 |         sns_policy_statement = iam.PolicyStatement(
274 |             actions=["sns:publish"],
275 |             resources=[sns_topic.topic_arn],
276 |             effect=iam.Effect.ALLOW
277 |         )
278 |         sns_policy = iam.ManagedPolicy(self, "cdk_monitoring_policy")
279 |         sns_policy.add_statements(sns_policy_statement)
280 | 
281 |         sns_role = iam.Role(self, "cdk_monitoring_sns_role",
282 |                             assumed_by=iam.ServicePrincipal("es.amazonaws.com")
283 |                             )
284 |         sns_role.add_managed_policy(sns_policy)
285 | 
286 |         dirname = os.path.dirname(__file__)
287 |         dashboards_asset = Asset(self, "DashboardsAsset",
288 |                                  path=os.path.join(dirname, 'export_opensearch_dashboards_V1_0.ndjson'))
289 |         dashboards_asset.grant_read(instance.role)
290 |         dashboards_asset_path = instance.user_data.add_s3_download_command(
291 |             bucket=dashboards_asset.bucket,
292 |             bucket_key=dashboards_asset.s3_object_key,
293 |         )
294 | 
295 |         nginx_asset = Asset(self, "NginxAsset", path=os.path.join(dirname, 'nginx_opensearch.conf'))
296 |         nginx_asset.grant_read(instance.role)
297 |         nginx_asset_path = instance.user_data.add_s3_download_command(
298 |             bucket=nginx_asset.bucket,
299 |             bucket_key=nginx_asset.s3_object_key,
300 |         )
301 | 
302 |         alerting_asset = Asset(self, "AlertingAsset", path=os.path.join(dirname, 'create_alerts.sh'))
303 |         alerting_asset.grant_read(instance.role)
304 |         alerting_asset_path = instance.user_data.add_s3_download_command(
305 |             bucket=alerting_asset.bucket,
306 |             bucket_key=alerting_asset.s3_object_key,
307 |         )
308 | 
309 |         instance.user_data.add_commands(
310 |             "yum update -y",
311 |             "yum install jq -y",
312 |             "amazon-linux-extras install nginx1.12",
313 |             "mkdir -p /home/ec2-user/assets",
314 |             "cd /home/ec2-user/assets",
315 |             "mv {} export_opensearch_dashboards_V1_0.ndjson".format(dashboards_asset_path),
316 |             "mv {} nginx_opensearch.conf".format(nginx_asset_path),
317 |             "mv {} create_alerts.sh".format(alerting_asset_path),
318 | 
319 |             "openssl req -x509 -nodes -days 365 -newkey rsa:2048 -keyout /etc/nginx/cert.key -out /etc/nginx/cert.crt -subj /C=US/ST=./L=./O=./CN=.\n"
320 |             "cp nginx_opensearch.conf /etc/nginx/conf.d/",
321 |             "sed -i 's/DEFAULT_DOMAIN_NAME/" + DOMAIN_NAME + "/g' /home/ec2-user/assets/export_opensearch_dashboards_V1_0.ndjson",
322 |             "sed -i 's/DOMAIN_ENDPOINT/" + domain.domain_endpoint + "/g' /etc/nginx/conf.d/nginx_opensearch.conf",
323 |             "sed -i 's/DOMAIN_ENDPOINT/" + domain.domain_endpoint + "/g' /home/ec2-user/assets/create_alerts.sh",
324 |             "sed -i 's=LAMBDA_CW_LOGS_ROLE_ARN=" + lambda_func_cw_logs.role.role_arn + "=g' /home/ec2-user/assets/create_alerts.sh",
325 |             "sed -i 's=SNS_ROLE_ARN=" + sns_role.role_arn + "=g' /home/ec2-user/assets/create_alerts.sh",
326 |             "sed -i 's/SNS_TOPIC_ARN/" + sns_topic.topic_arn + "/g' /home/ec2-user/assets/create_alerts.sh",
327 |             "sed -i 's=DOMAIN_ADMIN_UNAME=" + DOMAIN_ADMIN_UNAME + "=g' /home/ec2-user/assets/create_alerts.sh",
328 |             "sed -i 's=DOMAIN_ADMIN_PW=" + DOMAIN_ADMIN_PW + "=g' /home/ec2-user/assets/create_alerts.sh",
329 | 
330 |             "systemctl restart nginx.service",
331 |             "chmod 500 create_alerts.sh",
332 |             "sleep 5",
333 |             "bash --verbose create_alerts.sh",
334 |         )
335 | 
336 |         CfnOutput(self, "Dashboards URL (via Jump host)",
337 |                   value="https://" + instance.instance_public_ip,
338 |                   description="Dashboards URL via Jump host")
339 | 
340 |         CfnOutput(self, "SNS Subscription Alert Message",
341 |                   value=SNS_NOTIFICATION_EMAIL,
342 |                   description="Please confirm your SNS subscription receievedt at")
343 | 


--------------------------------------------------------------------------------
/opensearch/setupCWSubscriptionFilter.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | '''
 3 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 4 | SPDX-License-Identifier: MIT-0
 5 | '''
 6 | import boto3
 7 | import json
 8 | import sys
 9 | 
10 | if (len(sys.argv) != 2):
11 |     sys.exit("Incorrect argument, please run as python3 setupCWSubscriptionFilter.py deploy|destroy ")
12 | 
13 | # This variable gets replaced as per opensearch_monitor_stack.py#REGIONS_TO_MONITOR everytime cdk deploy/destroy kicks in
14 | REGIONS_TO_MONITOR='["us-east-1", "us-east-2", "us-west-1", "us-west-2", "ap-south-1", "ap-northeast-1", "ap-northeast-2", "ap-southeast-1", "ap-southeast-2", "ca-central-1", "eu-central-1", "eu-west-1", "eu-west-2", "eu-west-3", "eu-north-1", "sa-east-1"]'
15 | 
16 | session = boto3.session.Session()
17 | current_region = session.region_name
18 | account_id = boto3.client("sts").get_caller_identity()["Account"]
19 | # Prefix to define CW Log group which need to be traversed and create/delete subscription filter for same
20 | logGroupNamePrefixAES='/aws/aes/domains'
21 | logGroupNamePrefixOpensearch='/aws/OpenSearchService/domains'
22 | subscriptionFilterNamePrefix='OpenSearch-CWLogs-filter-'
23 | 
24 | def deploy(cwLogGroupPrefix):
25 |     # Create subscription filter for all CW Logs across regions for Amazon OpenSearch Service 
26 |     for region in json.loads(REGIONS_TO_MONITOR):
27 |         print("Starting to create CW Log filters for", region)
28 |         # Create CW Logs client
29 |         cw_logs_client = boto3.client('logs', region_name=region)
30 |         response = cw_logs_client.describe_log_groups(
31 |             logGroupNamePrefix=cwLogGroupPrefix
32 |         )
33 |         # Read response which is dict, and change that to json with quotes "
34 |         json_response = json.dumps(response)
35 | 
36 |         # Parse JSON data to extract logGroups
37 |         log_groups = json.loads(json_response)["logGroups"]
38 |         for log_group in log_groups:
39 |             print("Processing logGroups:", log_group["arn"])
40 |             cw_logs_client.put_subscription_filter(
41 |                 logGroupName=log_group["logGroupName"],
42 |                 filterName=subscriptionFilterNamePrefix + log_group["logGroupName"] + "-" + region,
43 |                 filterPattern=' ',
44 |                 destinationArn='arn:aws:lambda:' + current_region + ':' + account_id + ':function:CWLogsToOpenSearch_monitoring'
45 |             )
46 | 
47 | 
48 | def destroy(cwLogGroupPrefix):
49 |     # Delete subscription filter from all CW Logs across regions for Amazon OpenSearch Service
50 |     for region in json.loads(REGIONS_TO_MONITOR):
51 |         print("Starting to delete CW Log filters for", region)
52 |         # Create CW Logs client
53 |         cw_logs_client = boto3.client('logs', region_name=region)
54 |         response = cw_logs_client.describe_log_groups(
55 |             logGroupNamePrefix=cwLogGroupPrefix
56 |         )
57 |         # Read response which is dict, and change that to json with "
58 |         json_response = json.dumps(response)
59 | 
60 |         # Parse JSON data to extract logGroups
61 |         log_groups = json.loads(json_response)["logGroups"]
62 |         # Traverse each log group to list cw logs filter and delete the one starting with 'subscriptionFilterNamePrefix'
63 |         for log_group in log_groups:
64 |             print("Processing logGroups:", log_group["arn"])
65 |             filter_response = cw_logs_client.describe_subscription_filters(
66 |                 logGroupName=log_group["logGroupName"],
67 |                 filterNamePrefix=subscriptionFilterNamePrefix
68 |             )
69 |             # Read response which is dict, and change that to json with quotes "
70 |             filter_json_response = json.dumps(filter_response)
71 |             subscription_filters = json.loads(filter_json_response)["subscriptionFilters"]
72 | 
73 |             # Iterate subscriptionFilter to delete
74 |             for filter in subscription_filters:
75 |                 print("Deleting subscriptionFilter:", filter["filterName"])
76 |                 cw_logs_client.delete_subscription_filter(
77 |                     logGroupName=log_group["logGroupName"],
78 |                     filterName=filter["filterName"]
79 |                 )
80 | 
81 | 
82 | if (sys.argv[1].lower() == "deploy"):
83 |     # Can be removed in future major version when Amazon ES is deprecated
84 |     deploy(logGroupNamePrefixAES)
85 |     deploy(logGroupNamePrefixOpensearch)
86 | 
87 | elif (sys.argv[1].lower() == "destroy"):
88 |     # Can be removed in future major version when Amazon ES is deprecated
89 |     destroy(logGroupNamePrefixAES)
90 |     destroy(logGroupNamePrefixOpensearch)
91 | 
92 | else:
93 |     sys.exit("Unrecognised argument '" + sys.argv[1].lower() + "', please run as python3 setupCWSubscriptionFilter.py deploy|destroy ")
94 | 
95 | 
96 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | aws-cdk-lib>=2.35.0
2 | constructs>=10.0.0
3 | boto3


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | SPDX-License-Identifier: MIT-0
 4 | '''
 5 | 
 6 | import setuptools
 7 | 
 8 | with open("README.md") as fp:
 9 |     long_description = fp.read()
10 | 
11 | 
12 | setuptools.setup(
13 |     name="opensearch",
14 |     version="0.0.1",
15 | 
16 |     description="An empty CDK Python app",
17 |     long_description=long_description,
18 |     long_description_content_type="text/markdown",
19 | 
20 |     author="author",
21 | 
22 |     package_dir={"": "opensearch"},
23 |     packages=setuptools.find_packages(where="opensearch"),
24 | 
25 |     install_requires=[
26 |         "aws-cdk.core==1.82.0",
27 |     ],
28 | 
29 |     python_requires=">=3.6",
30 | 
31 |     classifiers=[
32 |         "Development Status :: 4 - Beta",
33 | 
34 |         "Intended Audience :: Developers",
35 | 
36 |         "License :: OSI Approved :: Apache Software License",
37 | 
38 |         "Programming Language :: JavaScript",
39 |         "Programming Language :: Python :: 3 :: Only",
40 |         "Programming Language :: Python :: 3.6",
41 |         "Programming Language :: Python :: 3.7",
42 |         "Programming Language :: Python :: 3.8",
43 | 
44 |         "Topic :: Software Development :: Code Generators",
45 |         "Topic :: Utilities",
46 | 
47 |         "Typing :: Typed",
48 |     ],
49 | )
50 | 


--------------------------------------------------------------------------------