├── Statement20230428.pdf
├── pipeline_architecture.png
├── CODE_OF_CONDUCT.md
├── classification.csv
├── src
    ├── cbaSavingsRowProcessor.py
    ├── rowProcessor.py
    ├── startJob.py
    ├── inputHandler.py
    ├── cbaCCRowProcessor.py
    ├── apiRequestHandler.py
    └── getResults.py
├── LICENSE
├── README.md
├── CONTRIBUTING.md
├── frontend
    └── index.html
└── cloudFormation.yml


/Statement20230428.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/textract-bank-statement-processor/HEAD/Statement20230428.pdf


--------------------------------------------------------------------------------
/pipeline_architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/textract-bank-statement-processor/HEAD/pipeline_architecture.png


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.


--------------------------------------------------------------------------------
/classification.csv:
--------------------------------------------------------------------------------
 1 | key,type,subtype
 2 | paypal,Online,Paypal
 3 | subway,Entertainment,Eating out
 4 | noodle & sushi,Entertainment,Eating out
 5 | vic roads,Transport,Car
 6 | citylink,Transport,Toll
 7 | pizza,Entertainment,Eating out
 8 | wage,Employment,Acme Corporation
 9 | bp,Transport,Petrol
10 | red rooster,Entertainment,Eating out
11 | 7-eleven,Groceries,7-Eleven
12 | belmont wines,Entertainment,Liquor
13 | beatie service,Transport,Car
14 | kfc,Entertainment,Eating out
15 | telstra,Utilities,Mobile
16 | wdl,Cash,ATM
17 | safeway,Groceries,Safeway
18 | picture box,Home,Artwork
19 | salvos,Clothing,Salvos
20 | cignall,Entertainment,Tobacco
21 | sw petrol,Transport,Petrol
22 | service centre,Transport,Car
23 | coles,Groceries,Coles
24 | kmart,Home,KMart
25 | bunnings,Home,Bunnings
26 | fee,Fees,Transaction fee
27 | transfer,Transfer,Internal Transfer
28 | 


--------------------------------------------------------------------------------
/src/cbaSavingsRowProcessor.py:
--------------------------------------------------------------------------------
 1 | """Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 
 2 | SPDX-License-Identifier: MIT-0
 3 | """
 4 | from src.rowProcessor import TableRowProcessor
 5 | 
 6 | class CBASavingsRowProcessor(TableRowProcessor):
 7 | 
 8 |     def process_row(self, row, statement_type, statement_name):
 9 |         print("Processing a CBA savings row")
10 |         print(row)
11 |         row['Date'] = super().get_date(statement_type, statement_name, row['Date'])
12 |         if row['Credit']:
13 |             row['Credit'] = super().convert_str_to_float(row['Credit'])
14 |             row['Debit'] = 0
15 |         if row['Debit']:
16 |             row['Debit'] = super().convert_str_to_float(row['Debit'])
17 |             row['Credit'] = 0
18 |         del row['Balance']
19 |         row_no_blank_keys = {k: v for k, v in row.items() if k}
20 |         print(row_no_blank_keys)
21 |         return row_no_blank_keys
22 |         


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this
 4 | software and associated documentation files (the "Software"), to deal in the Software
 5 | without restriction, including without limitation the rights to use, copy, modify,
 6 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
 7 | permit persons to whom the Software is furnished to do so.
 8 | 
 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
10 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
11 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
12 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
13 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
14 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/src/rowProcessor.py:
--------------------------------------------------------------------------------
 1 | """Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 
 2 | SPDX-License-Identifier: MIT-0
 3 | """
 4 | from abc import ABC, abstractmethod
 5 | from re import sub
 6 | 
 7 | class TableRowProcessor(ABC):
 8 | 
 9 |     @abstractmethod
10 |     def process_row(self, row, statement_type, statement_name):
11 |         """Processes a row in a table extracted
12 |         by Textract, into an object in the standard 
13 |         transaction format
14 |         """
15 |         pass
16 | 
17 |     def get_date(self, statement_type, filename, date):
18 |         year = int(filename[9:13])
19 |         if (statement_type == 'cba_bank'):
20 |             mon = date[3:6]
21 |             mon_last_year = ['Oct', 'Nov', 'Dec']
22 |             if mon in mon_last_year:
23 |                 year = year - 1
24 |             return date + ' ' + str(year)
25 |         elif (statement_type == 'cba_cc'):
26 |             year = int(filename[9:13])
27 |             return date + ' ' + str(year)
28 |         else:
29 |             return date
30 |     
31 |     def convert_str_to_float(self, value):
32 |         try:
33 |             return float(sub(r'[^\d.]', '', value))
34 |         except Exception as e:
35 |             print(e)
36 |             return 0.0
37 | 


--------------------------------------------------------------------------------
/src/startJob.py:
--------------------------------------------------------------------------------
 1 | """Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 
 2 | SPDX-License-Identifier: MIT-0
 3 | """
 4 | import json
 5 | import time
 6 | import boto3
 7 | import sys
 8 | 
 9 | textract_client = boto3.client('textract')
10 | s3_client = boto3.client('s3')
11 | 
12 | def startJob(bucket_name, object_name):
13 |     response = textract_client.start_document_analysis(
14 |         DocumentLocation={
15 |             'S3Object': {
16 |                 'Bucket': bucket_name,
17 |                 'Name': object_name,
18 |             }
19 |         },
20 |         FeatureTypes=['TABLES'],
21 |     )
22 | 
23 |     return response["JobId"]
24 | 
25 | def handle(event, context):
26 |     if event['eventSource'] != 'aws:s3':
27 |         print("ERROR: Unexpected event type")
28 |         print(json.dumps(event))
29 |         raise ValueError("ERROR: Unexpected event type")
30 | 
31 |     bucket_name = event['s3']['bucket']['name']
32 |     key = event['s3']['object']['key']
33 | 
34 |     response = s3_client.head_object(Bucket=bucket_name, Key=key)
35 |     
36 |     print('Response: {}'.format(response))
37 | 
38 |     print(f"StartJob: s3://{bucket_name}/{key}")
39 |     statement_type = response['Metadata']['statement_type']
40 | 
41 |     job_id = startJob(bucket_name, key)
42 |     print(f"JobId: {job_id}")
43 | 
44 |     return {
45 |         "bucket_name": bucket_name,
46 |         "object_name": key,
47 |         "job_id": job_id,
48 |         "job_start_timestamp": time.time(),
49 |         "statement_type": statement_type,
50 |     }
51 | 
52 | if __name__ == "__main__":
53 |     import sys
54 |     with open(sys.argv[1], "rt") as f:
55 |         event = json.load(f)
56 |     ret = handler(event, {})
57 |     print(json.dumps(ret, indent=2))


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # textract-statement-processor
 2 | 
 3 | A sample pipeline that takes as input bank statements, extracts transaction information from
 4 | tables within the statements using Textract, stores, and classifies each transaction.
 5 | 
 6 | 
 7 | ![Architecture](pipeline_architecture.png)
 8 | 
 9 | 1.	PDF bank statements that have been scanned, or downloaded from an online banking application, are uploaded to the Landing bucket in S3
10 | 2.	The landing of the file in the S3 bucket triggers a Lambda function that starts the step function
11 | 3.	The Lambda function starts the step function execution 
12 | 4.	The first step in the step function calls a Lambda to start a new Textract document analysis job
13 | 5.	A new document analysis job is invoked with the uploaded PDF
14 | 6.	The step function periodically calls a Lambda to get the job results
15 | 7.	The Lambda checks with Textract, using the job identifier, whether the analysis job is complete
16 | 8.	When the analysis job is complete the Lambda takes the output of the job, extracts the tabular data, and processes the transaction records into a JSON file which it then saves in the Processed bucket in S3. 
17 | 9.	An API Lambda queries the JSON files stored in the S3 bucket in response to a request from the API gateway. An additional classification step at this point classifies each transaction into a type and sub-type based on user configurable classification rules.
18 | 10.	The API Gateway serves a RESTful API that a we frontend consumes to visualise transaction data
19 | 11.	Finally, the visualisation output of multiple years worth of classified transaction data is visualised within a Sankey diagram as shown below, allowing users to see at a glance income vs expenditure.
20 | 12.	ML models can be trained and run aginst historical transaction data
21 | 


--------------------------------------------------------------------------------
/src/inputHandler.py:
--------------------------------------------------------------------------------
 1 | """Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 
 2 | SPDX-License-Identifier: MIT-0
 3 | """
 4 | import boto3
 5 | import os
 6 | import random
 7 | import json
 8 | import csv
 9 | from re import sub
10 | 
11 | client = boto3.client('stepfunctions')
12 | s3_client = boto3.client('s3')
13 | 
14 | def convert_str_to_float(value):
15 |     try:
16 |         return float(sub(r'[^\d.]', '', value))
17 |     except Exception as e:
18 |         print(e)
19 |         return 0.0
20 | 
21 | def handle(event, context):
22 |     print(event)
23 |     bucket_name = event['Records'][0]['s3']['bucket']['name']
24 |     key = event['Records'][0]['s3']['object']['key']
25 |     response = s3_client.head_object(Bucket=bucket_name, Key=key)
26 |     print('Response: {}'.format(response))
27 |     statement_type = response['Metadata']['statement_type']
28 |     if statement_type == 'ing':
29 |         handle_csv_statement(bucket_name, key)
30 |     elif statement_type == 'cba_cc' or statement_type == 'cba_bank':
31 |         stateMachineARN = os.environ['statemachine_arn']
32 |         response = client.start_execution(
33 |             stateMachineArn=stateMachineARN,
34 |             name='test-sf'+str(random.randint(10, 100000)),
35 |             input=json.dumps(event['Records'][0])
36 |         )
37 | 
38 | def handle_csv_statement(bucket_name, key):
39 |     print('Handle CSV statement')
40 |     response = s3_client.get_object(Bucket=bucket_name, Key=key)
41 |     contents = response['Body'].read().decode('utf-8').splitlines()
42 | 
43 |     result = []
44 |     csvReader = csv.DictReader(contents)
45 |     for row in csvReader:
46 |        result.append(row)
47 | 
48 |     for transaction in result:
49 |         transaction['Debit'] = convert_str_to_float(transaction['Debit'].lstrip('-'))
50 |         transaction['Credit'] = convert_str_to_float(transaction['Credit'])
51 | 
52 |     output_bucket = os.environ['OUTPUT_BUCKET']
53 |     output_prefix = os.environ['OUTPUT_PREFIX']
54 | 
55 |     output_object_base = os.path.join(output_prefix, os.path.abspath(key))
56 | 
57 |     output_object = f"{output_object_base}.json"
58 |     s3_client.put_object(
59 |         Bucket=output_bucket,
60 |         Key=output_object,
61 |         Body=(bytes(json.dumps(result).encode('UTF-8'))),
62 |         ServerSideEncryption='AES256',
63 |         ContentType='application/json',
64 |     )
65 |     print(f"File saved to: s3://{output_bucket}/{output_object}")
66 | 
67 | 


--------------------------------------------------------------------------------
/src/cbaCCRowProcessor.py:
--------------------------------------------------------------------------------
 1 | """Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 
 2 | SPDX-License-Identifier: MIT-0
 3 | """
 4 | from src.rowProcessor import TableRowProcessor
 5 | 
 6 | class CBACCRowProcessor(TableRowProcessor):
 7 | 
 8 |     def process_row(self, row, statement_type, statement_name):
 9 |         print(row)
10 |         print("Processing a CBA CC row")
11 |         row['Date'] = super().get_date(statement_type, statement_name, row['Date'])
12 |         if 'Transaction Details' in row:
13 |             row['Transaction'] = row['Transaction Details']
14 |             del row['Transaction Details']
15 |             if row['Amount (A$)'].endswith('-'):
16 |                 credit = row['Amount (A$)'][:-1]
17 |                 row['Credit'] = super().convert_str_to_float(credit)
18 |                 row['Debit'] = 0
19 |                 del row['Amount (A$)']
20 |             else:
21 |                 row['Debit'] = super().convert_str_to_float(row['Amount (A$)'])
22 |                 row['Credit'] = 0
23 |                 del row['Amount (A$)']
24 |         if 'Transaction details' in row:
25 |             row['Transaction'] = row['Transaction details']
26 |             del row['Transaction details']
27 |             if row['Amount (A$)'].endswith('-'):
28 |                 credit = row['Amount (A$)'][:-1]
29 |                 row['Credit'] = super().convert_str_to_float(credit)
30 |                 row['Debit'] = 0
31 |                 del row['Amount (A$)']
32 |             else:
33 |                 row['Debit'] = super().convert_str_to_float(row['Amount (A$)'])
34 |                 row['Credit'] = 0
35 |                 del row['Amount (A$)']
36 |         if 'Transaction details Amount (A$)' in row:
37 |             tokens = row['Transaction details Amount (A$)'].split()
38 |             amount = tokens[-1]
39 |             row['Debit'] = super().convert_str_to_float(amount)
40 |             row['Credit'] = 0
41 |             row['Transaction'] = row['Transaction details Amount (A$)']
42 |             del row['Transaction details Amount (A$)']
43 |         if 'Transaction Details Amount (A$)' in row:
44 |             tokens = row['Transaction Details Amount (A$)'].split()
45 |             amount = tokens[-1]
46 |             row['Debit'] = super().convert_str_to_float(amount)
47 |             row['Credit'] = 0
48 |             row['Transaction'] = row['Transaction Details Amount (A$)']
49 |             del row['Transaction Details Amount (A$)']
50 | 
51 |         row_no_blank_keys = {k: v for k, v in row.items() if k}
52 |         print(row_no_blank_keys)
53 |         return row_no_blank_keys
54 |         


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.


--------------------------------------------------------------------------------
/src/apiRequestHandler.py:
--------------------------------------------------------------------------------
 1 | """Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 
 2 | SPDX-License-Identifier: MIT-0
 3 | """
 4 | import json
 5 | import csv
 6 | import boto3
 7 | import os
 8 | import re
 9 | 
10 | s3 = boto3.client('s3')
11 | bucket_name = os.environ['DATA_BUCKET']
12 | 
13 | classifications = []
14 | with open ("classification.csv") as classification_csv:
15 |     csv_reader = csv.DictReader(classification_csv)
16 |     for row in csv_reader:    
17 |         classifications.append(row)
18 | 
19 | def handle(event, context):
20 |     objects = s3.list_objects_v2(Bucket=bucket_name)
21 | 
22 |     json_contents = []
23 |     for obj in objects['Contents']:
24 |         key = obj['Key']
25 |         print(key)
26 |         if key.endswith('.json'): # Only process JSON files
27 |             response = s3.get_object(Bucket=bucket_name, Key=key)
28 |             contents = response['Body'].read().decode('utf-8')
29 |             print(contents)
30 |             json_contents.extend(json.loads(contents))
31 | 
32 |     type_total_dict = {}
33 |     for transaction in json_contents:
34 |         if 'Transaction' in transaction:
35 |             classification = classify_transaction(transaction['Transaction'])
36 |             type = classification['type']
37 |             sub_type = classification['subtype']
38 |             transaction['Type'] = type
39 |             transaction['Subtype'] = sub_type
40 | 
41 |             try:
42 |                 if 'Credit' in transaction and transaction['Credit'] != "":
43 |                     if type in type_total_dict:
44 |                         if sub_type in type_total_dict[type]:
45 |                             type_total_dict[type][sub_type] = float("{:.2f}".format(type_total_dict[type][sub_type])) + get_amount(transaction['Credit'])
46 |                         else:
47 |                             type_total_dict[type][sub_type] = get_amount(transaction['Credit'])
48 |                     else:
49 |                         type_total_dict[type] = {} 
50 |                         type_total_dict[type][sub_type] = get_amount(transaction['Credit'])
51 |                 if 'Debit' in transaction and transaction['Debit'] != "":
52 |                     if type in type_total_dict:
53 |                         if sub_type in type_total_dict[type]:
54 |                             type_total_dict[type][sub_type] = float("{:.2f}".format(type_total_dict[type][sub_type])) + get_amount(transaction['Debit'])
55 |                         else:
56 |                             type_total_dict[type][sub_type] = get_amount(transaction['Debit'])
57 |                     else:
58 |                         type_total_dict[type] = {} 
59 |                         type_total_dict[type][sub_type] = get_amount(transaction['Debit'])
60 |             except ValueError as ve:
61 |                 print(ve)
62 | 
63 |     print(type_total_dict)
64 |     print(json.dumps(type_total_dict, indent = 4))
65 | 
66 |     result = {'summary': type_total_dict, 'transactions': json_contents}
67 | 
68 |     return {
69 |         'statusCode': 200,
70 |         'headers': {
71 |             'Access-Control-Allow-Origin': '*'
72 |         },
73 |         'body': json.dumps(result)
74 |     }                
75 | 
76 | def get_amount(amount_str):
77 |     non_decimal = re.compile(r'[^\d.]+')
78 |     return float("{:.2f}".format(float(non_decimal.sub('',str(amount_str)).replace(",", ""))))
79 | 
80 | 
81 | def classify_transaction(description):
82 |     for classification in classifications:
83 |         if classification['key'] in description.lower():
84 |             return classification
85 |     return {'key': '', 'type': 'Unknown', 'subtype': 'Unclassified'}


--------------------------------------------------------------------------------
/frontend/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 |   <head>
  4 |     <title>Textract Statement Insight</title>
  5 |     <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.6.0/jquery.min.js"></script>
  6 |     <script src="https://cdn.datatables.net/1.13.4/js/jquery.dataTables.min.js"></script>
  7 |     <script src="https://cdn.datatables.net/1.13.4/js/dataTables.bootstrap5.min.js"></script>
  8 |     <script src="https://www.gstatic.com/charts/loader.js"></script>
  9 | 
 10 |     <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/5.2.0/css/bootstrap.min.css"/>
 11 |     <link rel="stylesheet" href="https://cdn.datatables.net/1.13.4/css/dataTables.bootstrap5.min.css"/>
 12 |   </head>
 13 | 
 14 |   <body>
 15 |     <div id="sankey_multiple"></div>
 16 |     <div>
 17 |       <table
 18 |         id="tx-table"
 19 |         class="table table-striped table-bordered"
 20 |         style="width:100%"
 21 |       >
 22 |         <thead>
 23 |           <tr>
 24 |             <th>Date</th>
 25 |             <th>Description</th>
 26 |             <th>Type</th>
 27 |             <th>Subtype</th>
 28 |             <th>Debit</th>
 29 |             <th>Credit</th>
 30 |           </tr>
 31 |         </thead>
 32 |         <tbody></tbody>
 33 |       </table>
 34 |     </div>
 35 | 
 36 |     <script type="text/javascript">
 37 |       google.charts.load("current", { packages: ["sankey"] });
 38 |       google.charts.setOnLoadCallback(drawChart);
 39 | 
 40 |       function drawChart() {
 41 |           $.ajax({
 42 |               url: "{Enter service URL of API here}",
 43 |               type: "GET",
 44 |               headers: {
 45 |                   "x-api-key": "{Enter API Key of API here}"
 46 |               },
 47 |               dataType: "json",
 48 |               success: function (data) {
 49 |                   $("#response").html(JSON.stringify(data));
 50 |                   var sankeyData = formatDataForSankey(data.summary);
 51 |                   var tableData = new google.visualization.DataTable();
 52 |                   tableData.addColumn('string', 'From');
 53 |                   tableData.addColumn('string', 'To');
 54 |                   tableData.addColumn('number', 'Weight');
 55 |                   tableData.addRows(sankeyData);
 56 |                   var rowsQty = tableData.getNumberOfRows();
 57 |                   var autoHeight = rowsQty*20;
 58 |                   //Set chart options
 59 |                   var options = {
 60 |                       width: 1000,
 61 |                       height: autoHeight,
 62 |                       sankey: { node: { nodePadding: 10 }, iterations: 100 },
 63 |                   };
 64 | 
 65 |                   //Instantiate and draw our chart, passing in some options.
 66 |                   var chart = new google.visualization.Sankey(document.getElementById('sankey_multiple'));
 67 |                   chart.draw(tableData, options);
 68 | 
 69 |                   console.log(data);
 70 | 
 71 |                   $.each(data.transactions, function (key, value) {
 72 |                       var row = $("<tr>");
 73 |                       row.append($("<td>" + value.Date + "</td>"));
 74 |                       row.append($("<td>" + value.Transaction + "</td>"));
 75 |                       row.append($("<td>" + value.Type + "</td>"));
 76 |                       row.append($("<td>" + value.Subtype + "</td>"));
 77 |                       row.append($("<td>" + value.Debit + "</td>"));
 78 |                       row.append($("<td>" + value.Credit + "</td>"));
 79 |                       $("#tx-table tbody").append(row);
 80 |                   });
 81 | 
 82 |                   $('#tx-table').DataTable();
 83 |               },
 84 |               error: function (jqXHR, textStatus, errorThrown) {
 85 |                   console.log("Error: " + errorThrown);
 86 |               }
 87 |           });
 88 |       }
 89 | 
 90 |       $(document).ready(function () {});
 91 | 
 92 |       function formatDataForSankey(summary) {
 93 |           var sankeyData = [];
 94 |           var incomeTotal = 0;
 95 |           var employment = summary.Employment;
 96 |           for (var [key, value] of Object.entries(employment)) {
 97 |               incomeTotal += value;
 98 |               sankeyData.push([key, 'Salary', (value)]);
 99 |           }
100 |           sankeyData.push(['Salary', 'Income', (incomeTotal)]);
101 |           for (const [key1, value1] of Object.entries(summary)) {
102 |               if (key1 != 'Employment' && key1 != 'Cash') {
103 |                   var expenseTotal = 0;
104 |                   var categoryData = [];
105 |                   for (const [key2, value2] of Object.entries(value1)) {
106 |                     expenseTotal += value2;
107 |                     categoryData.push([key1, key2, value2]);
108 |                   }
109 |                   sankeyData.push(['Income', key1, expenseTotal]);
110 |                   sankeyData.push(...categoryData);
111 |               }
112 |           }
113 |           return sankeyData;
114 |       }
115 |     </script>
116 |   </body>
117 | </html>
118 | 


--------------------------------------------------------------------------------
/src/getResults.py:
--------------------------------------------------------------------------------
  1 | """Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 
  2 | SPDX-License-Identifier: MIT-0
  3 | """
  4 | import os
  5 | import csv
  6 | import io
  7 | import json
  8 | import time
  9 | import boto3
 10 | import csv
 11 | from collections import defaultdict
 12 | 
 13 | from src.cbaCCRowProcessor import CBACCRowProcessor
 14 | from src.cbaSavingsRowProcessor import CBASavingsRowProcessor
 15 | 
 16 | cba_cc_row_processor = CBACCRowProcessor()
 17 | cba_savings_row_processor = CBASavingsRowProcessor()
 18 | 
 19 | s3_client = boto3.client('s3')
 20 | textract_client = boto3.client('textract')
 21 | 
 22 | def generate_table_csv(table_result, blocks_map, table_index):
 23 |     rows = get_rows_columns_map(table_result, blocks_map)
 24 |     csv = ''
 25 |     for row_index, cols in rows.items():
 26 |         for col_index, text in cols.items():
 27 |             csv += '{}'.format(text).strip() + "|"
 28 |         csv += '\n'
 29 |     return csv
 30 | 
 31 | def get_rows_columns_map(table_result, blocks_map):
 32 |     rows = {}
 33 |     for relationship in table_result['Relationships']:
 34 |         if relationship['Type'] == 'CHILD':
 35 |             for child_id in relationship['Ids']:
 36 |                 cell = blocks_map[child_id]
 37 |                 if cell['BlockType'] == 'CELL':
 38 |                     row_index = cell['RowIndex']
 39 |                     col_index = cell['ColumnIndex']
 40 |                     if row_index not in rows:
 41 |                         # create new row
 42 |                         rows[row_index] = {}
 43 |                     # get the text value
 44 |                     rows[row_index][col_index] = get_text(cell, blocks_map)
 45 |     return rows
 46 | 
 47 | def get_text(result, blocks_map):
 48 |     text = ''
 49 |     if 'Relationships' in result:
 50 |         for relationship in result['Relationships']:
 51 |             if relationship['Type'] == 'CHILD':
 52 |                 for child_id in relationship['Ids']:
 53 |                     word = blocks_map[child_id]
 54 |                     if word['BlockType'] == 'WORD':
 55 |                         text += word['Text'] + ' '
 56 |                     if word['BlockType'] == 'SELECTION_ELEMENT':
 57 |                         if word['SelectionStatus'] =='SELECTED':
 58 |                             text +=  'X '    
 59 |     return text
 60 | 
 61 | def getJobResults(job_id, next_token = None):
 62 |     kwargs = {}
 63 |     if next_token:
 64 |         kwargs['NextToken'] = next_token
 65 | 
 66 |     response = textract_client.get_document_analysis(JobId=job_id, **kwargs)
 67 | 
 68 |     return response
 69 | 
 70 | 
 71 | 
 72 | def handle(event, context):
 73 |     blocks_map = {}
 74 |     table_blocks = []
 75 |     statement_name = event['object_name']
 76 |     job_id = event['job_id']
 77 |     statement_type = event['statement_type']
 78 | 
 79 |     results = getJobResults(job_id)
 80 |     event['job_status'] = results['JobStatus']
 81 |     event['job_update_timestamp'] = time.time()
 82 | 
 83 |     if event['job_status'] != "SUCCEEDED":
 84 |         if event['job_status'] != "IN_PROGRESS":
 85 |             event['results'] = results
 86 |         return event
 87 | 
 88 |     # Job succeeded - retrieve the results
 89 |     input_bucket = event['bucket_name']
 90 |     input_object = event['object_name']
 91 | 
 92 |     output_bucket = os.getenv('OUTPUT_BUCKET', input_bucket)
 93 |     output_prefix = os.environ['OUTPUT_PREFIX']
 94 |     output_object_base = output_prefix + input_object
 95 | 
 96 |     event['output_bucket'] = output_bucket
 97 |     blocks = []
 98 | 
 99 |     while True:
100 |         if 'Blocks' in results:
101 |             blocks.extend(results['Blocks'])
102 |             for block in results['Blocks']:
103 |                 blocks_map[block['Id']] = block
104 |                 if block['BlockType'] == "TABLE":
105 |                     table_blocks.append(block)
106 | 
107 |         if 'NextToken' not in results:
108 |             break
109 | 
110 |         print(f"NextToken: {results['NextToken']}")
111 |         results = getJobResults(job_id, next_token=results['NextToken'])
112 | 
113 |     rows = []
114 |     for index, table in enumerate(table_blocks):
115 |         table_csv = generate_table_csv(table, blocks_map, index +1)
116 | 
117 |         print(table_csv)
118 | 
119 |         if (table_csv.startswith('Date')):
120 |             data = io.StringIO(table_csv.strip())
121 |             
122 |             for row in csv.DictReader(data, delimiter="|", quoting=csv.QUOTE_NONE):
123 |                 try:
124 |                     #Use statement type from S3 metadata to determine which row processor to use
125 |                     if statement_type == 'cba_cc':
126 |                         rows.append(cba_cc_row_processor.process_row(row, statement_type, statement_name))
127 |                     elif statement_type == 'cba_bank':
128 |                         rows.append(cba_savings_row_processor.process_row(row, statement_type, statement_name))
129 |                 except Exception as e:
130 |                     print(e)
131 | 
132 |     output_object = f"{output_object_base}.json"
133 |     s3_client.put_object(
134 |         Bucket=output_bucket,
135 |         Key=output_object,
136 |         Body=(bytes(json.dumps(rows).encode('UTF-8'))),
137 |         ServerSideEncryption='AES256',
138 |         ContentType='application/json',
139 |     )
140 |     print(f"Blocks file saved to: s3://{output_bucket}/{output_object}")
141 |     event['blocks'] = output_object
142 | 
143 |     return event


--------------------------------------------------------------------------------
/cloudFormation.yml:
--------------------------------------------------------------------------------
  1 | AWSTemplateFormatVersion: 2010-09-09
  2 | Description: The AWS CloudFormation template for the statement processing pipeline
  3 | 
  4 | Parameters:
  5 |   Stage:
  6 |     Type: String
  7 |     Description: The deployment stage
  8 |   BucketPostfix:
  9 |     Type: String
 10 |     Description: A postfix to ensure uniqueness of bucket naming
 11 |   DeploymentBucket:
 12 |     Type: String
 13 |     Description: Name of the bucket containing the lambda deployment package zip
 14 | Resources:
 15 |   StartStateMachineLogGroup:
 16 |     Type: 'AWS::Logs::LogGroup'
 17 |     Properties:
 18 |       LogGroupName: !Sub '/aws/lambda/statement-insight-${Stage}-startStateMachine'
 19 |   RestAPILogGroup:
 20 |     Type: 'AWS::Logs::LogGroup'
 21 |     Properties:
 22 |       LogGroupName: !Sub '/aws/lambda/statement-insight-${Stage}-restAPI'
 23 |   StartJobLogGroup:
 24 |     Type: 'AWS::Logs::LogGroup'
 25 |     Properties:
 26 |       LogGroupName: !Sub '/aws/lambda/statement-insight-${Stage}-startJob'
 27 |   GetResultsLogGroup:
 28 |     Type: 'AWS::Logs::LogGroup'
 29 |     Properties:
 30 |       LogGroupName: !Sub '/aws/lambda/statement-insight-${Stage}-getResults'
 31 |   IamRoleLambdaExecution:
 32 |     Type: 'AWS::IAM::Role'
 33 |     Properties:
 34 |       AssumeRolePolicyDocument:
 35 |         Version: 2012-10-17
 36 |         Statement:
 37 |           - Effect: Allow
 38 |             Principal:
 39 |               Service:
 40 |                 - lambda.amazonaws.com
 41 |             Action:
 42 |               - 'sts:AssumeRole'
 43 |       Policies:
 44 |         - PolicyName: !Join 
 45 |             - '-'
 46 |             - - statement-insight
 47 |               - lambda
 48 |           PolicyDocument:
 49 |             Version: 2012-10-17
 50 |             Statement:
 51 |               - Effect: Allow
 52 |                 Action:
 53 |                   - 'logs:CreateLogStream'
 54 |                   - 'logs:CreateLogGroup'
 55 |                   - 'logs:TagResource'
 56 |                 Resource:
 57 |                   - !Sub >-
 58 |                     arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/statement-insight-${Stage}*
 59 |               - Effect: Allow
 60 |                 Action:
 61 |                   - 'logs:PutLogEvents'
 62 |                 Resource:
 63 |                   - !Sub >-
 64 |                     arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/statement-insight-${Stage}*
 65 |               - Effect: Allow
 66 |                 Action:
 67 |                   - 'textract:StartDocumentTextDetection'
 68 |                   - 'textract:StartDocumentAnalysis'
 69 |                   - 'textract:GetDocumentTextDetection'
 70 |                   - 'textract:GetDocumentAnalysis'
 71 |                 Resource:
 72 |                   - '*'
 73 |               - Effect: Allow
 74 |                 Action:
 75 |                   - 'states:StartExecution'
 76 |                 Resource:
 77 |                   - !Sub 'arn:aws:states:${AWS::Region}:${AWS::AccountId}:stateMachine:ProcessPDFStepFunction-${Stage}'
 78 |               - Effect: Allow
 79 |                 Action:
 80 |                   - 's3:PutObject'
 81 |                   - 's3:GetObject'
 82 |                   - 's3:ListBucket'
 83 |                 Resource:
 84 |                   - !Sub 'arn:aws:s3:::statement-insight-${BucketPostfix}'
 85 |                   - !Sub 'arn:aws:s3:::statement-insight-${BucketPostfix}/*'
 86 |                   - !Sub 'arn:aws:s3:::statement-insight-${BucketPostfix}-output'
 87 |                   - !Sub 'arn:aws:s3:::statement-insight-${BucketPostfix}-output/*'
 88 |       Path: /
 89 |       RoleName: !Join 
 90 |         - '-'
 91 |         - - statement-insight
 92 |           - !Sub '${Stage}'
 93 |           - !Ref 'AWS::Region'
 94 |           - lambdaRole
 95 |   StartStateMachineLambdaFunction:
 96 |     Type: 'AWS::Lambda::Function'
 97 |     Properties:
 98 |       Code:
 99 |         S3Bucket: !Sub '${DeploymentBucket}'
100 |         S3Key: >-
101 |           statement-insight.zip
102 |       Handler: src/inputHandler.handle
103 |       Runtime: python3.10
104 |       FunctionName: !Sub 'statement-insight-${Stage}-startStateMachine'
105 |       MemorySize: 1024
106 |       Timeout: 600
107 |       Environment:
108 |         Variables:
109 |           statemachine_arn: !Ref ProcessPDFStepFunction
110 |           OUTPUT_BUCKET: !Sub 'statement-insight-${BucketPostfix}-output'
111 |           OUTPUT_PREFIX: output
112 |       Role: !GetAtt 
113 |         - IamRoleLambdaExecution
114 |         - Arn
115 |     DependsOn:
116 |       - StartStateMachineLogGroup
117 |   RestAPILambdaFunction:
118 |     Type: 'AWS::Lambda::Function'
119 |     Properties:
120 |       Code:
121 |         S3Bucket: !Sub '${DeploymentBucket}'
122 |         S3Key: >-
123 |           statement-insight.zip
124 |       Handler: src/apiRequestHandler.handle
125 |       Runtime: python3.10
126 |       FunctionName: !Sub 'statement-insight-${Stage}-restAPI'
127 |       MemorySize: 1024
128 |       Timeout: 6
129 |       Environment:
130 |         Variables:
131 |           DATA_BUCKET: !Sub 'statement-insight-${BucketPostfix}-output'
132 |       Role: !GetAtt 
133 |         - IamRoleLambdaExecution
134 |         - Arn
135 |     DependsOn:
136 |       - RestAPILogGroup
137 |   StartJobLambdaFunction:
138 |     Type: 'AWS::Lambda::Function'
139 |     Properties:
140 |       Code:
141 |         S3Bucket: !Sub '${DeploymentBucket}'
142 |         S3Key: >-
143 |           statement-insight.zip
144 |       Handler: src/startJob.handle
145 |       Runtime: python3.10
146 |       FunctionName: !Sub statement-insight-${Stage}-startJob
147 |       MemorySize: 1024
148 |       Timeout: 600
149 |       Role: !GetAtt 
150 |         - IamRoleLambdaExecution
151 |         - Arn
152 |     DependsOn:
153 |       - StartJobLogGroup
154 |   GetResultsLambdaFunction:
155 |     Type: 'AWS::Lambda::Function'
156 |     Properties:
157 |       Code:
158 |         S3Bucket: !Sub '${DeploymentBucket}'
159 |         S3Key: >-
160 |           statement-insight.zip
161 |       Handler: src/getResults.handle
162 |       Runtime: python3.10
163 |       FunctionName: !Sub 'statement-insight-${Stage}-getResults'
164 |       MemorySize: 1024
165 |       Timeout: 600
166 |       Environment:
167 |         Variables:
168 |           OUTPUT_BUCKET: !Sub 'statement-insight-${BucketPostfix}-output'
169 |           OUTPUT_PREFIX: output
170 |       Role: !GetAtt 
171 |         - IamRoleLambdaExecution
172 |         - Arn
173 |     DependsOn:
174 |       - GetResultsLogGroup
175 |   ProcessPDFStepFunctionRole:
176 |     Type: 'AWS::IAM::Role'
177 |     Properties:
178 |       AssumeRolePolicyDocument:
179 |         Version: 2012-10-17
180 |         Statement:
181 |           - Effect: Allow
182 |             Principal:
183 |               Service: !Sub 'states.${AWS::Region}.amazonaws.com'
184 |             Action: 'sts:AssumeRole'
185 |       Policies:
186 |         - PolicyName: !Sub '${Stage}-statement-insight-statemachine'
187 |           PolicyDocument:
188 |             Version: 2012-10-17
189 |             Statement:
190 |               - Effect: Allow
191 |                 Action:
192 |                   - 'lambda:InvokeFunction'
193 |                 Resource:
194 |                   - !Sub >-
195 |                     arn:aws:lambda:${AWS::Region}:${AWS::AccountId}:function:statement-insight-${Stage}-startJob
196 |                   - !Sub 
197 |                     - '${functionArn}:*'
198 |                     - functionArn: !Sub >-
199 |                         arn:aws:lambda:${AWS::Region}:${AWS::AccountId}:function:statement-insight-${Stage}-startJob
200 |                   - !Sub >-
201 |                     arn:aws:lambda:${AWS::Region}:${AWS::AccountId}:function:statement-insight-${Stage}-getResults
202 |                   - !Sub 
203 |                     - '${functionArn}:*'
204 |                     - functionArn: !Sub >-
205 |                         arn:aws:lambda:${AWS::Region}:${AWS::AccountId}:function:statement-insight-${Stage}-getResults
206 |   ProcessPDFStepFunction:
207 |     Type: 'AWS::StepFunctions::StateMachine'
208 |     Properties:
209 |       DefinitionString: !Sub 
210 |         - |-
211 |           {
212 |             "StartAt": "StartJob",
213 |             "States": {
214 |               "StartJob": {
215 |                 "Type": "Task",
216 |                 "Resource": "${startJobLambdaArn}",
217 |                 "Next": "Wait"
218 |               },
219 |               "Wait": {
220 |                 "Type": "Wait",
221 |                 "Seconds": 5,
222 |                 "Next": "GetResults"
223 |               },
224 |               "GetResults": {
225 |                 "Type": "Task",
226 |                 "Resource": "${getResultsLambdaArn}",
227 |                 "Next": "IsJobDone"
228 |               },
229 |               "IsJobDone": {
230 |                 "Type": "Choice",
231 |                 "Choices": [
232 |                   {
233 |                     "Variable": "$.job_status",
234 |                     "StringEquals": "IN_PROGRESS",
235 |                     "Next": "Wait"
236 |                   },
237 |                   {
238 |                     "Variable": "$.job_status",
239 |                     "StringEquals": "SUCCEEDED",
240 |                     "Next": "Success"
241 |                   }
242 |                 ]
243 |               },
244 |               "Success": {
245 |                 "Type": "Succeed"
246 |               }
247 |             }
248 |           }
249 |         - startJobLambdaArn: !Sub >-
250 |             arn:aws:lambda:${AWS::Region}:${AWS::AccountId}:function:statement-insight-${Stage}-startJob
251 |           getResultsLambdaArn: !Sub >-
252 |             arn:aws:lambda:${AWS::Region}:${AWS::AccountId}:function:statement-insight-${Stage}-getResults
253 |       RoleArn: !GetAtt 
254 |         - ProcessPDFStepFunctionRole
255 |         - Arn
256 |       StateMachineName: !Sub "ProcessPDFStepFunction-${Stage}"
257 |     DependsOn:
258 |       - ProcessPDFStepFunctionRole
259 |   S3BucketStatementinsightOutput:
260 |     Type: 'AWS::S3::Bucket'
261 |     Properties:
262 |       BucketName: !Sub "statement-insight-${BucketPostfix}-output"
263 |       PublicAccessBlockConfiguration:
264 |         BlockPublicAcls: true
265 |         BlockPublicPolicy: true
266 |         IgnorePublicAcls: true
267 |         RestrictPublicBuckets: true
268 |   S3BucketStatementinsight:
269 |     Type: 'AWS::S3::Bucket'
270 |     Properties:
271 |       BucketName: !Sub "statement-insight-${BucketPostfix}"
272 |       PublicAccessBlockConfiguration:
273 |         BlockPublicAcls: true
274 |         BlockPublicPolicy: true
275 |         IgnorePublicAcls: true
276 |         RestrictPublicBuckets: true
277 |       NotificationConfiguration:
278 |         LambdaConfigurations:
279 |           - Event: 's3:ObjectCreated:*'
280 |             Function: !GetAtt 
281 |               - StartStateMachineLambdaFunction
282 |               - Arn
283 |     DependsOn:
284 |       - StartStateMachineLambdaPermissionStatementinsight
285 |   StartStateMachineLambdaPermissionStatementinsight:
286 |     Type: 'AWS::Lambda::Permission'
287 |     Properties:
288 |       FunctionName: !GetAtt 
289 |         - StartStateMachineLambdaFunction
290 |         - Arn
291 |       Action: 'lambda:InvokeFunction'
292 |       Principal: s3.amazonaws.com
293 |       SourceArn: !Join 
294 |         - ''
295 |         - - 'arn:'
296 |           - !Ref 'AWS::Partition'
297 |           - !Sub ':s3:::statement-insight-${BucketPostfix}'
298 |       SourceAccount: !Ref 'AWS::AccountId'
299 |   ApiGatewayRestApi:
300 |     Type: 'AWS::ApiGateway::RestApi'
301 |     Properties:
302 |       Name: !Sub "${Stage}-statement-insight"
303 |       EndpointConfiguration:
304 |         Types:
305 |           - EDGE
306 |       Policy: ''
307 |   ApiGatewayMethodOptions:
308 |     Type: 'AWS::ApiGateway::Method'
309 |     Properties:
310 |       AuthorizationType: NONE
311 |       HttpMethod: OPTIONS
312 |       MethodResponses:
313 |         - StatusCode: '200'
314 |           ResponseParameters:
315 |             method.response.header.Access-Control-Allow-Origin: true
316 |             method.response.header.Access-Control-Allow-Headers: true
317 |             method.response.header.Access-Control-Allow-Methods: true
318 |           ResponseModels: {}
319 |       RequestParameters: {}
320 |       Integration:
321 |         Type: MOCK
322 |         RequestTemplates:
323 |           application/json: '{statusCode:200}'
324 |         ContentHandling: CONVERT_TO_TEXT
325 |         IntegrationResponses:
326 |           - StatusCode: '200'
327 |             ResponseParameters:
328 |               method.response.header.Access-Control-Allow-Origin: '''*'''
329 |               method.response.header.Access-Control-Allow-Headers: >-
330 |                 'Content-Type,X-Amz-Date,Authorization,X-Api-Key,X-Amz-Security-Token,X-Amz-User-Agent,X-Amzn-Trace-Id'
331 |               method.response.header.Access-Control-Allow-Methods: '''OPTIONS,DELETE,GET,HEAD,PATCH,POST,PUT'''
332 |             ResponseTemplates:
333 |               application/json: ''
334 |       ResourceId: !GetAtt 
335 |         - ApiGatewayRestApi
336 |         - RootResourceId
337 |       RestApiId: !Ref ApiGatewayRestApi
338 |   ApiGatewayMethodAny:
339 |     Type: 'AWS::ApiGateway::Method'
340 |     Properties:
341 |       HttpMethod: ANY
342 |       RequestParameters: {}
343 |       ResourceId: !GetAtt 
344 |         - ApiGatewayRestApi
345 |         - RootResourceId
346 |       RestApiId: !Ref ApiGatewayRestApi
347 |       ApiKeyRequired: true
348 |       AuthorizationType: NONE
349 |       Integration:
350 |         IntegrationHttpMethod: POST
351 |         Type: AWS_PROXY
352 |         Uri: !Join 
353 |           - ''
354 |           - - 'arn:'
355 |             - !Ref 'AWS::Partition'
356 |             - ':apigateway:'
357 |             - !Ref 'AWS::Region'
358 |             - ':lambda:path/2015-03-31/functions/'
359 |             - !GetAtt 
360 |               - RestAPILambdaFunction
361 |               - Arn
362 |             - /invocations
363 |       MethodResponses: []
364 |     DependsOn:
365 |       - RestAPILambdaPermissionApiGateway
366 |   ApiGatewayDeployment:
367 |     Type: 'AWS::ApiGateway::Deployment'
368 |     Properties:
369 |       RestApiId: !Ref ApiGatewayRestApi
370 |       StageName: !Sub "${Stage}"
371 |     DependsOn:
372 |       - ApiGatewayMethodOptions
373 |       - ApiGatewayMethodAny
374 |   ApiGatewayApiKey:
375 |     Type: 'AWS::ApiGateway::ApiKey'
376 |     Properties:
377 |       Enabled: true
378 |       Name: !Sub "statement-insight-key-${Stage}"
379 |       StageKeys:
380 |         - RestApiId: !Ref ApiGatewayRestApi
381 |           StageName: !Sub "${Stage}"
382 |     DependsOn: ApiGatewayDeployment
383 |   ApiGatewayUsagePlan:
384 |     Type: 'AWS::ApiGateway::UsagePlan'
385 |     DependsOn: ApiGatewayDeployment
386 |     Properties:
387 |       ApiStages:
388 |         - ApiId: !Ref ApiGatewayRestApi
389 |           Stage: !Sub "${Stage}"
390 |       Description: !Sub "Usage plan for statement-insight ${Stage} stage"
391 |       UsagePlanName: !Sub "statement-insight-${Stage}"
392 |   ApiGatewayUsagePlanKey:
393 |     Type: 'AWS::ApiGateway::UsagePlanKey'
394 |     Properties:
395 |       KeyId: !Ref ApiGatewayApiKey
396 |       KeyType: API_KEY
397 |       UsagePlanId: !Ref ApiGatewayUsagePlan
398 |   RestAPILambdaPermissionApiGateway:
399 |     Type: 'AWS::Lambda::Permission'
400 |     Properties:
401 |       FunctionName: !GetAtt 
402 |         - RestAPILambdaFunction
403 |         - Arn
404 |       Action: 'lambda:InvokeFunction'
405 |       Principal: apigateway.amazonaws.com
406 |       SourceArn: !Join 
407 |         - ''
408 |         - - 'arn:'
409 |           - !Ref 'AWS::Partition'
410 |           - ':execute-api:'
411 |           - !Ref 'AWS::Region'
412 |           - ':'
413 |           - !Ref 'AWS::AccountId'
414 |           - ':'
415 |           - !Ref ApiGatewayRestApi
416 |           - /*/*
417 | Outputs:
418 |   LandingBucket:
419 |     Description: The landing bucket to upload data to to be processed
420 |     Value: !Sub "statement-insight-${BucketPostfix}"
421 |   CLIApiKey:
422 |     Description: CLI command to get the api key value.
423 |     Value: !Sub "aws apigateway get-api-key --api-key ${ApiGatewayApiKey.APIKeyId} --include-value --query \"value\" --output text"
424 |   ServiceEndpoint:
425 |     Description: URL of the service endpoint
426 |     Value: !Join 
427 |       - ''
428 |       - - 'https://'
429 |         - !Ref ApiGatewayRestApi
430 |         - .execute-api.
431 |         - !Ref 'AWS::Region'
432 |         - .
433 |         - !Ref 'AWS::URLSuffix'
434 |         - !Sub "/${Stage}"
435 | 
436 | 
437 | 


--------------------------------------------------------------------------------