├── .gitignore
├── .npmignore
├── .nvmrc
├── 00-deploy.sh
├── 10-test.sh
├── 20-query.sh
├── 30-download.sh
├── 40-publish.sh
├── 99-destroy.sh
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── assets
├── api-gw-event.json
├── finalize-event.json
├── media-object.json
├── mediaconvert-job-sample.json
├── player.template.html
├── s3-event.json
├── subtitles.srt
├── video-trigger.json
└── vmap.xml
├── bin
└── polly-preview-simple.ts
├── cdk.json
├── functions
├── finalize-lambda
│ └── finalize.py
├── polly-lambda
│ ├── package.json
│ └── polly.js
├── postprod-lambda
│ ├── fadeout.py
│ └── images.py
├── scrape-lambda
│ ├── .nvmrc
│ ├── lib
│ │ ├── SMPTE.js
│ │ ├── SRT.js
│ │ ├── VMAP.js
│ │ └── utils.js
│ ├── package-lock.json
│ ├── package.json
│ ├── scrape.js
│ └── voices.json
└── video-lambda
│ ├── full_hls.json
│ ├── preview_mp4.json
│ └── video.py
├── jest.config.js
├── lib
└── polly-preview-simple-stack.ts
├── package-lock.json
├── package.json
├── template
└── .gitignore
├── test
└── polly-preview-simple.test.ts
└── tsconfig.json
/.gitignore:
--------------------------------------------------------------------------------
1 | !jest.config.js
2 | *.d.ts
3 | node_modules
4 |
5 | # CDK asset staging directory
6 | .cdk.staging
7 | cdk.out
8 | cdk-outputs.json
9 |
10 | # Stack assets
11 | stack.out/*
12 | functions/postprod-lambda/bin/
13 |
--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
1 | *.ts
2 | !*.d.ts
3 |
4 | # CDK asset staging directory
5 | .cdk.staging
6 | cdk.out
7 |
--------------------------------------------------------------------------------
/.nvmrc:
--------------------------------------------------------------------------------
1 | 14.17.6
--------------------------------------------------------------------------------
/00-deploy.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export NVM_DIR=$HOME/.nvm
4 | source $NVM_DIR/nvm.sh
5 |
6 | download_ffmpeg(){
7 | echo "Downloading and installing FFmpeg and FFprobe."
8 | DEFAULT_PATH='functions/postprod-lambda/bin'
9 | DEST_PATH=${1:-$DEFAULT_PATH}
10 | FFMPEG_URL='https://github.com/giusedroid/ffmpeg-lgpl-amazonlinux2-no-network/raw/main/bin/ffmpeg?raw=true'
11 | FFPROBE_URL='https://github.com/giusedroid/ffmpeg-lgpl-amazonlinux2-no-network/raw/main/bin/ffprobe?raw=true'
12 | mkdir -p $DEST_PATH
13 | curl -sL $FFMPEG_URL > $DEST_PATH/ffmpeg
14 | curl -sL $FFPROBE_URL > $DEST_PATH/ffprobe
15 | chmod +x $DEST_PATH/ffmpeg
16 | chmod +x $DEST_PATH/ffprobe
17 |
18 | echo "FFmpeg has been downloaded. Please review its license before proceeding."
19 | $DEST_PATH/ffmpeg -L
20 |
21 | echo "###################### IMPORTANT LEGAL NOTICE ######################"
22 | echo "this solution uses FFmpeg to analyze and manipulate the low-level visual and audio features of the uploaded media files."
23 | echo "FFmpeg (https://ffmpeg.org/) is a free and open-source software suite for handling video, audio, and other multimedia files and streams."
24 | echo "FFmpeg is distributed under the LGPL license (https://www.gnu.org/licenses/lgpl-2.1.en.html)."
25 | echo "For more information about FFmpeg, please see the following link: https://www.ffmpeg.org/."
26 | echo "Please carefully review the license prompted above before continuing."
27 | echo "Your use of the solution will cause you to use FFmpeg. If you do not want use of FFmpeg, do not use the solution."
28 | read -p "Do you wish to continue? (Y/N): " confirm && [[ $confirm == [yY] || $confirm == [yY][eE][sS] ]] || exit 1
29 | }
30 |
31 | mkdir -p stack.out
32 |
33 | echo "Installing JQ"
34 | sudo yum install -y jq
35 |
36 | download_ffmpeg
37 |
38 | echo "OK, deploying the solution to your AWS account."
39 |
40 | nvm install 14.17.6
41 |
42 | cd functions/scrape-lambda
43 | nvm use
44 | npm i
45 | cd ../..
46 |
47 | nvm use
48 | npm i
49 | npx cdk bootstrap
50 | npx cdk synth
51 | npx cdk deploy --outputs-file ./stack.out/cdk-outputs.json
--------------------------------------------------------------------------------
/10-test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Download templates if not exist
4 | wget -nc https://github.com/giusedroid/aws-visual-narratives-templates/raw/main/assets/Template_video_right.mov -O template/Template_video_right.mov
5 | wget -nc https://github.com/giusedroid/aws-visual-narratives-templates/raw/main/assets/template.mov -O template/template.mov
6 |
7 | # Upload templates to bucket
8 | S3_BUCKET=$(jq .'PollyPreviewSimpleStack.AssetStoreBucketName' stack.out/cdk-outputs.json | xargs)
9 | aws s3 sync template s3://$S3_BUCKET/custom/template
10 |
11 | # Invoke API
12 | API_ENDPOINT=$(jq .'PollyPreviewSimpleStack.APIEndpoint' stack.out/cdk-outputs.json | xargs)
13 |
14 | curl -X POST -H "Content-Type: application/json" \
15 | -d '{"Url": "https://giusedroid.wordpress.com/2021/04/29/a-brief-history-of-ferrari"}' \
16 | $API_ENDPOINT > stack.out/article.json
17 |
--------------------------------------------------------------------------------
/20-query.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | METADATA_ASSET_STORE=$(jq '.PollyPreviewSimpleStack.MetadataStoreName' stack.out/cdk-outputs.json | xargs)
4 | ASSET_ID=$(jq '.AssetId' stack.out/article.json)
5 |
6 | aws dynamodb get-item \
7 | --table-name $METADATA_ASSET_STORE \
8 | --key "{\"AssetId\":{\"S\":$ASSET_ID}}" \
9 | --output json > stack.out/processed-article.json
--------------------------------------------------------------------------------
/30-download.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | PREVIEW_FILE=$(jq '.Item.PreviewVideoFile.S' stack.out/processed-article.json | xargs)
4 |
5 | aws s3 cp $PREVIEW_FILE stack.out/preview.mp4
--------------------------------------------------------------------------------
/40-publish.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | AWS_REGION=$(aws configure get region)
3 | ASSET_STORE=$(jq '.PollyPreviewSimpleStack.AssetStoreBucketName' stack.out/cdk-outputs.json | xargs)
4 | FULL_STREAM_M3U8=$(jq '.Item.FullVideoStream.S' stack.out/processed-article.json | xargs)
5 | FULL_STREAM_PATH=${FULL_STREAM_M3U8%/*}
6 | ASSET_ID=${FULL_STREAM_PATH##*/}
7 | PUBLIC_PATH=s3://$ASSET_STORE/public/full/hls/$ASSET_ID
8 |
9 | STREAM_URL=https://$ASSET_STORE.s3.$AWS_REGION.amazonaws.com/public/full/hls/$ASSET_ID/template.m3u8
10 | INDEX_URL=https://$ASSET_STORE.s3.$AWS_REGION.amazonaws.com/public/full/hls/$ASSET_ID/index.html
11 |
12 | echo "WARNING: By running this script, you will publish your HLS (video asset) to s3://$ASSET_STORE/public/full/hls"
13 | read -p "Do you wish to continue? (Y/N): " confirm && [[ $confirm == [yY] || $confirm == [yY][eE][sS] ]] || exit 1
14 |
15 | aws s3 cp $FULL_STREAM_PATH/ $PUBLIC_PATH --acl public-read --recursive
16 |
17 | rm stack.out/index.html
18 | cp assets/player.template.html stack.out/index.html
19 | sed -i "s@{STREAM_URL}@$STREAM_URL@g" stack.out/index.html
20 |
21 | aws s3 cp stack.out/index.html $PUBLIC_PATH/ --acl public-read
22 |
23 | aws s3 cp $FULL_STREAM_M3U8 stack.out/template.m3u8
24 |
25 | echo "Website endpoint: $INDEX_URL"
26 |
27 |
--------------------------------------------------------------------------------
/99-destroy.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | npx cdk destroy PollyPreviewSimpleStack
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing Guidelines
2 |
3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
4 | documentation, we greatly value feedback and contributions from our community.
5 |
6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
7 | information to effectively respond to your bug report or contribution.
8 |
9 |
10 | ## Reporting Bugs/Feature Requests
11 |
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 |
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 |
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 |
22 |
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 |
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 |
30 | To send us a pull request, please:
31 |
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 |
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 |
42 |
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 |
46 |
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 |
52 |
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 |
56 |
57 | ## Licensing
58 |
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
4 | this software and associated documentation files (the "Software"), to deal in
5 | the Software without restriction, including without limitation the rights to
6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
7 | the Software, and to permit persons to whom the Software is furnished to do so.
8 |
9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
15 |
16 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Social Media Stories and Visual Narrations with Amazon Polly and AWS Elemental MediaConvert
2 |
3 | This project deploys a stack that ingests wordpress articles and produces two digital assets
4 | - a full visual narration of variable length in which the article is read by Polly and the first four images
5 | - a Social Media story (30 seconds in duration) you can share on your favourite social media platform
6 |
7 | You can find more info about the stack on the [AWS Media Blog](insert-blog-article-url-here)
8 |
9 | ## Deploying
10 |
11 | 1. Log to your AWS account
12 | 2. Deploy a Cloud 9 environment following [these instructions](https://docs.aws.amazon.com/cloud9/latest/user-guide/tutorial-create-environment.html)
13 | 3. Clone this repository on your Cloud9 Environment by running `git clone https://github.com/aws-samples/aws-elemental-mediaconvert-articles2video.git`
14 | 4. In a shell, run `./00-deploy.sh` and follow the instructions the script will prompt.
15 |
16 | this will produce the file `stack.out/cdk-outputs.json`
17 |
18 | **IMPORTANT LEGAL NOTICE**
19 |
20 | This solution uses FFmpeg to analyze and manipulate the low-level visual and audio features of the uploaded media files.
21 | [FFmpeg](https://ffmpeg.org/) is a free and open-source software suite for handling video, audio, and other multimedia files and streams.
22 | FFmpeg is distributed under the [LGPL license v2.1](https://www.gnu.org/licenses/lgpl-2.1.en.html).
23 | For more information about FFmpeg, please see the following [here](https://www.ffmpeg.org/).
24 | Your use of the solution will cause you to use FFmpeg. If you do not want use of FFmpeg, do not use the solution.
25 |
26 | **IMPORTANT SECURITY NOTICE**
27 | This solution makes use of FFmpeg compiled with `--disable-network` in order to prevent access to external resources. As this solution does not provide a mechanism to update FFmpeg, please make sure that you're updating version when needed.
28 |
29 |
30 | ## Testing
31 |
32 | Once the deployment is completed, run `./10-test.sh` to start the workflow.
33 | The script will produce the file `stack.out/article.json` with the response from the API.
34 |
35 | ## Getting Results
36 |
37 | You can monitor the workflow by running `./20-query.sh`.
38 | This will produce the file `stack.out/processed-article.json`
39 | As the workflow may take a couple of minutes to produce all of the assets, please
40 | run this script every 30 seconds until `PreviewVideoFile` and `FullVideoStream`
41 | are populated.
42 |
43 | ## Downloading the Preview
44 |
45 | Run this step once `PreviewVideoFile` is populated by `20-query.sh` in `stack.out/processed-article.json`.
46 | You can download the preview video file by running `./30-download.sh`.
47 | This will produce the file `stack.out/preview.mp4`
48 |
49 | ## Publishing the Full Narration
50 | Run this step once `FullVideoStream` has been populated by `20-query.sh` in `stack.out/processed-article.json`.
51 |
52 | **WARNING:** by running the following step, you will be making a public copy of
53 | your full narration video asset.
54 |
55 | To publish your full narration, you can run `./40-publish.sh`: follow the instructions prompted
56 | by the script.
57 | This step will copy your HLS playlist and segments and will make them public
58 | on the S3 Asset Store deployed by this code sample. Additionally, the script will
59 | generate and upload an HTML page on the S3 Asset Store.
60 | At the end of the process, the script will prompt a URL to the HTML page where you
61 | can play the full narration.
--------------------------------------------------------------------------------
/assets/api-gw-event.json:
--------------------------------------------------------------------------------
1 | {
2 | "body": "eyJ0ZXN0IjoiYm9keSJ9",
3 | "resource": "/{proxy+}",
4 | "path": "/path/to/resource",
5 | "httpMethod": "POST",
6 | "isBase64Encoded": true,
7 | "queryStringParameters": {
8 | "foo": "bar"
9 | },
10 | "multiValueQueryStringParameters": {
11 | "foo": [
12 | "bar"
13 | ]
14 | },
15 | "pathParameters": {
16 | "proxy": "/path/to/resource"
17 | },
18 | "stageVariables": {
19 | "baz": "qux"
20 | },
21 | "headers": {
22 | "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
23 | "Accept-Encoding": "gzip, deflate, sdch",
24 | "Accept-Language": "en-US,en;q=0.8",
25 | "Cache-Control": "max-age=0",
26 | "CloudFront-Forwarded-Proto": "https",
27 | "CloudFront-Is-Desktop-Viewer": "true",
28 | "CloudFront-Is-Mobile-Viewer": "false",
29 | "CloudFront-Is-SmartTV-Viewer": "false",
30 | "CloudFront-Is-Tablet-Viewer": "false",
31 | "CloudFront-Viewer-Country": "US",
32 | "Host": "1234567890.execute-api.eu-west-1.amazonaws.com",
33 | "Upgrade-Insecure-Requests": "1",
34 | "User-Agent": "Custom User Agent String",
35 | "Via": "1.1 08f323deadbeefa7af34d5feb414ce27.cloudfront.net (CloudFront)",
36 | "X-Amz-Cf-Id": "cDehVQoZnx43VYQb9j2-nvCh-9z396Uhbp027Y2JvkCPNLmGJHqlaA==",
37 | "X-Forwarded-For": "127.0.0.1, 127.0.0.2",
38 | "X-Forwarded-Port": "443",
39 | "X-Forwarded-Proto": "https"
40 | },
41 | "multiValueHeaders": {
42 | "Accept": [
43 | "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"
44 | ],
45 | "Accept-Encoding": [
46 | "gzip, deflate, sdch"
47 | ],
48 | "Accept-Language": [
49 | "en-US,en;q=0.8"
50 | ],
51 | "Cache-Control": [
52 | "max-age=0"
53 | ],
54 | "CloudFront-Forwarded-Proto": [
55 | "https"
56 | ],
57 | "CloudFront-Is-Desktop-Viewer": [
58 | "true"
59 | ],
60 | "CloudFront-Is-Mobile-Viewer": [
61 | "false"
62 | ],
63 | "CloudFront-Is-SmartTV-Viewer": [
64 | "false"
65 | ],
66 | "CloudFront-Is-Tablet-Viewer": [
67 | "false"
68 | ],
69 | "CloudFront-Viewer-Country": [
70 | "US"
71 | ],
72 | "Host": [
73 | "0123456789.execute-api.eu-west-1.amazonaws.com"
74 | ],
75 | "Upgrade-Insecure-Requests": [
76 | "1"
77 | ],
78 | "User-Agent": [
79 | "Custom User Agent String"
80 | ],
81 | "Via": [
82 | "1.1 08f323deadbeefa7af34d5feb414ce27.cloudfront.net (CloudFront)"
83 | ],
84 | "X-Amz-Cf-Id": [
85 | "cDehVQoZnx43VYQb9j2-nvCh-9z396Uhbp027Y2JvkCPNLmGJHqlaA=="
86 | ],
87 | "X-Forwarded-For": [
88 | "127.0.0.1, 127.0.0.2"
89 | ],
90 | "X-Forwarded-Port": [
91 | "443"
92 | ],
93 | "X-Forwarded-Proto": [
94 | "https"
95 | ]
96 | },
97 | "requestContext": {
98 | "accountId": "123456789012",
99 | "resourceId": "123456",
100 | "stage": "prod",
101 | "requestId": "c6af9ac6-7b61-11e6-9a41-93e8deadbeef",
102 | "requestTime": "09/Apr/2015:12:34:56 +0000",
103 | "requestTimeEpoch": 1428582896000,
104 | "identity": {
105 | "cognitoIdentityPoolId": null,
106 | "accountId": null,
107 | "cognitoIdentityId": null,
108 | "caller": null,
109 | "accessKey": null,
110 | "sourceIp": "127.0.0.1",
111 | "cognitoAuthenticationType": null,
112 | "cognitoAuthenticationProvider": null,
113 | "userArn": null,
114 | "userAgent": "Custom User Agent String",
115 | "user": null
116 | },
117 | "path": "/prod/path/to/resource",
118 | "resourcePath": "/{proxy+}",
119 | "httpMethod": "POST",
120 | "apiId": "1234567890",
121 | "protocol": "HTTP/1.1"
122 | }
123 | }
--------------------------------------------------------------------------------
/assets/finalize-event.json:
--------------------------------------------------------------------------------
1 | {
2 | "Records": [
3 | {
4 | "eventVersion": "2.1",
5 | "eventSource": "aws:s3",
6 | "awsRegion": "eu-west-1",
7 | "eventTime": "2021-09-14T12:06:32.578Z",
8 | "eventName": "ObjectCreated:Put",
9 | "userIdentity": {
10 | "principalId": "AWS:REDACTED"
11 | },
12 | "requestParameters": {
13 | "sourceIPAddress": "REDACTED"
14 | },
15 | "responseElements": {
16 | "x-amz-request-id": "REDACTED",
17 | "x-amz-id-2": "REDACTED"
18 | },
19 | "s3": {
20 | "s3SchemaVersion": "1.0",
21 | "configurationId": "REDACTED",
22 | "bucket": {
23 | "name": "REDACTED",
24 | "ownerIdentity": {
25 | "principalId": "REDACTED"
26 | },
27 | "arn": "arn:aws:s3:::REDACTED"
28 | },
29 | "object": {
30 | "key": "output/full/hls/62de657b-7884-4dc0-8286-b9b63c521351/template62de657b-7884-4dc0-8286-b9b63c521351.m3u8",
31 | "size": 1089,
32 | "eTag": "REDACTED",
33 | "sequencer": "REDACTED"
34 | }
35 | }
36 | }
37 | ]
38 | }
--------------------------------------------------------------------------------
/assets/media-object.json:
--------------------------------------------------------------------------------
1 | {
2 | "s3_full_path": "s3://pollypreviewsimplestack-pollyassetstore920ee247-1pvn3ec82d905/audio/full/test-001/REDACTED.e56032ba-6893-4be8-8694-a703c5ab1c39.mp3",
3 | "s3_path": "pollypreviewsimplestack-pollyassetstore920ee247-1pvn3ec82d905/audio/full/test-001/REDACTED.e56032ba-6893-4be8-8694-a703c5ab1c39.mp3",
4 | "s3_bucket": "pollypreviewsimplestack-pollyassetstore920ee247-1pvn3ec82d905",
5 | "s3_key": "audio/full/test-001/REDACTED.e56032ba-6893-4be8-8694-a703c5ab1c39.mp3",
6 | "media_type": "audio",
7 | "media_format": "full",
8 | "media_extension": "mp3",
9 | "media_polly_file": "REDACTED.e56032ba-6893-4be8-8694-a703c5ab1c39.mp3",
10 | "media_polly_no_extension": "REDACTED.e56032ba-6893-4be8-8694-a703c5ab1c39",
11 | "media_document_id": "full/test-001",
12 | "local_path": "/tmp/audio/full/full/test-001",
13 | "local_full_path": "/tmp/audio/full/test-001/REDACTED.e56032ba-6893-4be8-8694-a703c5ab1c39.mp3",
14 | "local_preview_path": "/tmp/audio/preview/full/test-001",
15 | "local_preview_full_path": "/tmp/audio/preview/full/test-001/REDACTED.e56032ba-6893-4be8-8694-a703c5ab1c39..wav",
16 | "preview_s3_key": "audio/preview/full/test-001/REDACTED.e56032ba-6893-4be8-8694-a703c5ab1c39..wav",
17 | "preview_s3_full_path": "pollypreviewsimplestack-pollyassetstore920ee247-1pvn3ec82d905/audio/preview/full/test-001/REDACTED.e56032ba-6893-4be8-8694-a703c5ab1c39..wav"
18 | }
--------------------------------------------------------------------------------
/assets/mediaconvert-job-sample.json:
--------------------------------------------------------------------------------
1 | {
2 | "Queue": "arn:aws:mediaconvert:eu-west-1:REDACTED:queues/Default",
3 | "UserMetadata": {
4 | "assetID": "a19216b2-9f75-4119-9a39-7a6a950be0d8.json",
5 | "application": "VOD",
6 | "input": "s3://gbatt-blogs/narratives/template.mov",
7 | "settings": "Default"
8 | },
9 | "Role": "arn:aws:iam::REDACTED:role/MediaconvertPassDownRole",
10 | "Settings": {
11 | "OutputGroups": [
12 | {
13 | "Name": "File Group",
14 | "Outputs": [
15 | {
16 | "ContainerSettings": {
17 | "Container": "MP4",
18 | "Mp4Settings": {}
19 | },
20 | "VideoDescription": {
21 | "CodecSettings": {
22 | "Codec": "H_264",
23 | "H264Settings": {
24 | "Bitrate": 2000000
25 | }
26 | }
27 | },
28 | "AudioDescriptions": [
29 | {
30 | "CodecSettings": {
31 | "Codec": "AAC",
32 | "AacSettings": {
33 | "Bitrate": 96000,
34 | "CodingMode": "CODING_MODE_2_0",
35 | "SampleRate": 48000
36 | }
37 | }
38 | }
39 | ],
40 | "CaptionDescriptions": [
41 | {
42 | "CaptionSelectorName": "Captions Selector 1",
43 | "DestinationSettings": {
44 | "DestinationType": "BURN_IN",
45 | "BurninDestinationSettings": {
46 | "Alignment": "CENTERED",
47 | "OutlineSize": 0,
48 | "FontOpacity": 255,
49 | "OutlineColor": "BLACK"
50 | }
51 | }
52 | }
53 | ]
54 | }
55 | ],
56 | "OutputGroupSettings": {
57 | "Type": "FILE_GROUP_SETTINGS",
58 | "FileGroupSettings": {
59 | "Destination": "s3://$POLLY_ASSET_STORE/output/preview/a19216b2-9f75-4119-9a39-7a6a950be0d8"
60 | }
61 | }
62 | }
63 | ],
64 | "AdAvailOffset": 0,
65 | "Inputs": [
66 | {
67 | "InputClippings": [
68 | {
69 | "EndTimecode": "00:00:31:00",
70 | "StartTimecode": "00:00:00:00"
71 | }
72 | ],
73 | "AudioSelectors": {
74 | "Audio Selector 1": {
75 | "DefaultSelection": "DEFAULT",
76 | "ExternalAudioFileInput": "s3://$POLLY_ASSET_STORE/audio/preview/a19216b2-9f75-4119-9a39-7a6a950be0d8.json/$PREVIEW.wav"
77 | }
78 | },
79 | "VideoSelector": {},
80 | "TimecodeSource": "ZEROBASED",
81 | "CaptionSelectors": {
82 | "Captions Selector 1": {
83 | "SourceSettings": {
84 | "SourceType": "SRT",
85 | "FileSourceSettings": {
86 | "SourceFile": "s3://$POLLY_ASSET_STORE/srt/preview/a19216b2-9f75-4119-9a39-7a6a950be0d8.json.srt"
87 | }
88 | }
89 | }
90 | },
91 | "ImageInserter": {
92 | "InsertableImages": [
93 | {
94 | "Width": 1200,
95 | "Height": 900,
96 | "ImageX": 400,
97 | "ImageY": 10,
98 | "Duration": 5000,
99 | "Layer": 0,
100 | "ImageInserterInput": "s3://$POLLY_ASSET_STORE/image/output/a19216b2-9f75-4119-9a39-7a6a950be0d8.json/ferrari_01.jpg.tga",
101 | "StartTime": "00:00:05:00",
102 | "Opacity": 100
103 | },
104 | {
105 | "Width": 1200,
106 | "Height": 900,
107 | "ImageX": 400,
108 | "ImageY": 10,
109 | "Duration": 5000,
110 | "Layer": 1,
111 | "ImageInserterInput": "s3://$POLLY_ASSET_STORE/image/output/a19216b2-9f75-4119-9a39-7a6a950be0d8.json/ferrari_04.jpg.tga",
112 | "StartTime": "00:00:10:00",
113 | "Opacity": 100
114 | },
115 | {
116 | "Width": 1200,
117 | "Height": 900,
118 | "ImageX": 400,
119 | "ImageY": 10,
120 | "Duration": 5000,
121 | "Layer": 2,
122 | "ImageInserterInput": "s3://$POLLY_ASSET_STORE/image/output/a19216b2-9f75-4119-9a39-7a6a950be0d8.json/ferrari_02.jpg.tga",
123 | "StartTime": "00:00:15:00",
124 | "Opacity": 100
125 | },
126 | {
127 | "Width": 1200,
128 | "Height": 900,
129 | "ImageX": 400,
130 | "ImageY": 10,
131 | "Duration": 5000,
132 | "Layer": 3,
133 | "ImageInserterInput": "s3://$POLLY_ASSET_STORE/image/output/a19216b2-9f75-4119-9a39-7a6a950be0d8.json/ferrari_05.jpg.tga",
134 | "StartTime": "00:00:20:10",
135 | "Opacity": 100
136 | }
137 | ]
138 | },
139 | "FileInput": "s3://gbatt-blogs/narratives/template.mov"
140 | }
141 | ]
142 | },
143 | "AccelerationSettings": {
144 | "Mode": "DISABLED"
145 | },
146 | "StatusUpdateInterval": "SECONDS_60",
147 | "Priority": 0
148 | }
--------------------------------------------------------------------------------
/assets/player.template.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | AWS Visual Narrations Demo
6 |
7 |
18 |
19 |
20 |
21 |
22 | AWS Visual Narrations Demo
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
34 |
35 |
36 |
--------------------------------------------------------------------------------
/assets/s3-event.json:
--------------------------------------------------------------------------------
1 | {
2 | "Records": [
3 | {
4 | "eventVersion": "2.0",
5 | "eventSource": "aws:s3",
6 | "awsRegion": "eu-west-1",
7 | "eventTime": "1970-01-01T00:00:00.000Z",
8 | "eventName": "ObjectCreated:Put",
9 | "userIdentity": {
10 | "principalId": "EXAMPLE"
11 | },
12 | "requestParameters": {
13 | "sourceIPAddress": "127.0.0.1"
14 | },
15 | "responseElements": {
16 | "x-amz-request-id": "EXAMPLE123456789",
17 | "x-amz-id-2": "EXAMPLE123/5678abcdefghijklambdaisawesome/mnopqrstuvwxyzABCDEFGH"
18 | },
19 | "s3": {
20 | "s3SchemaVersion": "1.0",
21 | "configurationId": "testConfigRule",
22 | "bucket": {
23 | "name": "example-bucket",
24 | "ownerIdentity": {
25 | "principalId": "EXAMPLE"
26 | },
27 | "arn": "arn:aws:s3:::example-bucket"
28 | },
29 | "object": {
30 | "key": "test/key",
31 | "size": 1024,
32 | "eTag": "0123456789abcdef0123456789abcdef",
33 | "sequencer": "0A1B2C3D4E5F678901"
34 | }
35 | }
36 | }
37 | ]
38 | }
--------------------------------------------------------------------------------
/assets/subtitles.srt:
--------------------------------------------------------------------------------
1 | 1
2 | 00:00:00,000 --> 00:00:05,000
3 | A Brief History of Ferrari
4 |
5 | 2
6 | 00:00:05,100 --> 00:00:10,000
7 | Racing DNA
8 |
9 | 3
10 | 00:00:10,100 --> 00:00:15,000
11 | Motorsport Evolution
12 |
13 | 4
14 | 00:00:15,100 --> 00:00:20,000
15 | Road Car
16 |
17 | 5
18 | 00:00:20,100 --> 00:00:25,000
19 | Modern Supercars
20 |
21 | 6
22 | 00:00:25,100 --> 00:00:30,000
23 | More info at aws.amazon.com
24 |
--------------------------------------------------------------------------------
/assets/video-trigger.json:
--------------------------------------------------------------------------------
1 | {
2 | "Bucket": "pollypreviewsimplestack-pollyassetstore920ee247-1pvn3ec82d905",
3 | "Key": "video-trigger/e7b0f0dc-2400-42cb-be84-88ddc9407b45.json",
4 | "AssetId": "e7b0f0dc-2400-42cb-be84-88ddc9407b45.json",
5 | "ArticleBody": {
6 | "AssetId": "e7b0f0dc-2400-42cb-be84-88ddc9407b45.json",
7 | "Text": "Ferrari is an Italian luxury sports car manufacturer based in Maranello, Italy. Founded by Enzo Ferrari in 1939 out of the Alfa Romeo race division as Auto Avio Costruzioni, the company built its first car in 1940, and produced its first Ferrari-badged car in 1947.Enzo Ferrari was not initially interested in the idea of producing road cars when he formed Scuderia Ferrari in 1929, with headquarters in Modena. Scuderia Ferrari literally means \u201cFerrari Stable\u201d and is usually used to mean \u201cTeam Ferrari.\u201d Ferrari bought prepared, and fielded Alfa Romeo racing cars for gentleman drivers, functioning as the racing division of Alfa Romeo. In 1933, Alfa Romeo withdrew its in-house racing team and Scuderia Ferrari took over as its works team: the Scuderia received Alfa\u2019s Grand Prix cars of the latest specifications and fielded many famous drivers such as Tazio Nuvolari and Achille Varzi. In 1938, Alfa Romeo brought its racing operation again in-house, forming Alfa Corse in Milan and hired Enzo Ferrari as manager of the new racing department; therefore the Scuderia Ferrari was disbanded.Since the company\u2019s beginnings, Ferrari has been involved in motorsport, competing in a range of categories including Formula One and sports car racing through its Scuderia Ferrari sporting division as well as supplying cars and engines to other teams and for one-make race series.1940 AAC 815 was the first racing car to be designed by Enzo Ferrari, although it was not badged as a Ferrari model.The first vehicle made with the Ferrari name was the 125 S. Only two of this small two-seat sports/racing V12 car were made. In 1949, the 166 Inter was introduced marking the company\u2019s significant move into the grand touring road car market. The first 166 Inter was a four-seat (2+2) berlinetta coupe with body work designed by Carrozzeria Touring Superleggera. Road cars quickly became the bulk of Ferrari sales.The early Ferrari cars typically featured bodywork designed and customised by independent coach builders such as Pininfarina, Scaglietti, Zagato, Vignale and Bertone.The original road cars were typically two-seat front-engined V12s. This platform served Ferrari very well through the 1950s and 1960s. In 1968 the Dino was introduced as the first two-seat rear mid-engined Ferrari. The Dino was produced primarily with a V6 engine, however, a V8 model was also developed. This rear mid-engine layout would go on to be used in many Ferraris of the 1980s, 1990s and to the present day. Current road cars typically use V8 or V12 engines, with V8 models making up well over half of the marque\u2019s total production. Historically, Ferrari has also produced flat 12 engines.The 1984 288 GTO may be considered the first in the line of Ferrari supercars. This pedigree extends through the Enzo Ferrari to the LaFerrari. In February 2019, at the 89th Geneva International Motor Show, Ferrari revealed its latest mid-engine V8 supercar, the F8 Tributo.Ferrari SF90 Stradale is the first-ever Ferrari to feature PHEV (Plug-in Hybrid Electric Vehicle) architecture which sees the internal combustion engine integrated with three electric motors, two of which are independent and located on the front axle, with the third at the rear between the engine and the gearbox.",
8 | "LanguageCode": "en-US",
9 | "VoiceId": "Kevin",
10 | "Engine": "neural",
11 | "Url": "https://giusedroid.wordpress.com/2021/04/29/a-brief-history-of-ferrari/amp/",
12 | "ImagesURLs": [
13 | "https://giusedroid.files.wordpress.com/2021/04/ferrari_01.jpg",
14 | "https://giusedroid.files.wordpress.com/2021/04/ferrari_04.jpg",
15 | "https://giusedroid.files.wordpress.com/2021/04/ferrari_02.jpg",
16 | "https://giusedroid.files.wordpress.com/2021/04/ferrari_05.jpg"
17 | ],
18 | "TitlesText": [
19 | "Racing DNA",
20 | "Motorsport Evolution",
21 | "Road Cars",
22 | "Modern Supercars"
23 | ],
24 | "Entities": [
25 | {
26 | "Score": 0.9922897219657898,
27 | "Type": "ORGANIZATION",
28 | "Text": "Ferrari",
29 | "BeginOffset": 0,
30 | "EndOffset": 7
31 | },
32 | {
33 | "Score": 0.9665127992630005,
34 | "Type": "OTHER",
35 | "Text": "Italian",
36 | "BeginOffset": 15,
37 | "EndOffset": 22
38 | },
39 | {
40 | "Score": 0.9865739941596985,
41 | "Type": "LOCATION",
42 | "Text": "Maranello, Italy",
43 | "BeginOffset": 63,
44 | "EndOffset": 79
45 | },
46 | {
47 | "Score": 0.8678830862045288,
48 | "Type": "PERSON",
49 | "Text": "Enzo Ferrari",
50 | "BeginOffset": 92,
51 | "EndOffset": 104
52 | },
53 | {
54 | "Score": 0.9990893602371216,
55 | "Type": "DATE",
56 | "Text": "1939",
57 | "BeginOffset": 108,
58 | "EndOffset": 112
59 | },
60 | {
61 | "Score": 0.997645914554596,
62 | "Type": "ORGANIZATION",
63 | "Text": "Alfa Romeo",
64 | "BeginOffset": 124,
65 | "EndOffset": 134
66 | },
67 | {
68 | "Score": 0.9951758980751038,
69 | "Type": "ORGANIZATION",
70 | "Text": "Auto Avio Costruzioni",
71 | "BeginOffset": 152,
72 | "EndOffset": 173
73 | },
74 | {
75 | "Score": 0.9887651205062866,
76 | "Type": "QUANTITY",
77 | "Text": "first car",
78 | "BeginOffset": 197,
79 | "EndOffset": 206
80 | },
81 | {
82 | "Score": 0.9988935589790344,
83 | "Type": "DATE",
84 | "Text": "1940",
85 | "BeginOffset": 210,
86 | "EndOffset": 214
87 | },
88 | {
89 | "Score": 0.9943069815635681,
90 | "Type": "QUANTITY",
91 | "Text": "first",
92 | "BeginOffset": 233,
93 | "EndOffset": 238
94 | },
95 | {
96 | "Score": 0.4226551353931427,
97 | "Type": "ORGANIZATION",
98 | "Text": "Ferrari",
99 | "BeginOffset": 239,
100 | "EndOffset": 246
101 | },
102 | {
103 | "Score": 0.5782862305641174,
104 | "Type": "QUANTITY",
105 | "Text": "car",
106 | "BeginOffset": 254,
107 | "EndOffset": 257
108 | },
109 | {
110 | "Score": 0.9896280765533447,
111 | "Type": "DATE",
112 | "Text": "1947.Enzo",
113 | "BeginOffset": 261,
114 | "EndOffset": 270
115 | },
116 | {
117 | "Score": 0.8742840886116028,
118 | "Type": "PERSON",
119 | "Text": "Ferrari",
120 | "BeginOffset": 271,
121 | "EndOffset": 278
122 | },
123 | {
124 | "Score": 0.9315663576126099,
125 | "Type": "ORGANIZATION",
126 | "Text": "Scuderia Ferrari",
127 | "BeginOffset": 358,
128 | "EndOffset": 374
129 | },
130 | {
131 | "Score": 0.9992067217826843,
132 | "Type": "DATE",
133 | "Text": "1929",
134 | "BeginOffset": 378,
135 | "EndOffset": 382
136 | },
137 | {
138 | "Score": 0.9991596937179565,
139 | "Type": "LOCATION",
140 | "Text": "Modena",
141 | "BeginOffset": 405,
142 | "EndOffset": 411
143 | },
144 | {
145 | "Score": 0.9156971573829651,
146 | "Type": "ORGANIZATION",
147 | "Text": "Scuderia Ferrari",
148 | "BeginOffset": 413,
149 | "EndOffset": 429
150 | },
151 | {
152 | "Score": 0.7246978282928467,
153 | "Type": "ORGANIZATION",
154 | "Text": "Ferrari",
155 | "BeginOffset": 447,
156 | "EndOffset": 454
157 | },
158 | {
159 | "Score": 0.932436466217041,
160 | "Type": "ORGANIZATION",
161 | "Text": "Ferrari",
162 | "BeginOffset": 497,
163 | "EndOffset": 504
164 | },
165 | {
166 | "Score": 0.966004490852356,
167 | "Type": "ORGANIZATION",
168 | "Text": "Ferrari",
169 | "BeginOffset": 507,
170 | "EndOffset": 514
171 | },
172 | {
173 | "Score": 0.97238689661026,
174 | "Type": "ORGANIZATION",
175 | "Text": "Alfa Romeo",
176 | "BeginOffset": 544,
177 | "EndOffset": 554
178 | },
179 | {
180 | "Score": 0.9982865452766418,
181 | "Type": "ORGANIZATION",
182 | "Text": "Alfa Romeo",
183 | "BeginOffset": 628,
184 | "EndOffset": 638
185 | },
186 | {
187 | "Score": 0.9988477230072021,
188 | "Type": "DATE",
189 | "Text": "1933",
190 | "BeginOffset": 643,
191 | "EndOffset": 647
192 | },
193 | {
194 | "Score": 0.9961930513381958,
195 | "Type": "ORGANIZATION",
196 | "Text": "Alfa Romeo",
197 | "BeginOffset": 649,
198 | "EndOffset": 659
199 | },
200 | {
201 | "Score": 0.9934269785881042,
202 | "Type": "ORGANIZATION",
203 | "Text": "Scuderia Ferrari",
204 | "BeginOffset": 698,
205 | "EndOffset": 714
206 | },
207 | {
208 | "Score": 0.9871456623077393,
209 | "Type": "ORGANIZATION",
210 | "Text": "Scuderia",
211 | "BeginOffset": 748,
212 | "EndOffset": 756
213 | },
214 | {
215 | "Score": 0.9732167720794678,
216 | "Type": "ORGANIZATION",
217 | "Text": "Alfa",
218 | "BeginOffset": 766,
219 | "EndOffset": 770
220 | },
221 | {
222 | "Score": 0.5122107863426208,
223 | "Type": "EVENT",
224 | "Text": "Prix",
225 | "BeginOffset": 779,
226 | "EndOffset": 783
227 | },
228 | {
229 | "Score": 0.5101088881492615,
230 | "Type": "QUANTITY",
231 | "Text": "many",
232 | "BeginOffset": 830,
233 | "EndOffset": 834
234 | },
235 | {
236 | "Score": 0.9267215728759766,
237 | "Type": "COMMERCIAL_ITEM",
238 | "Text": "Tazio Nuvolari",
239 | "BeginOffset": 858,
240 | "EndOffset": 872
241 | },
242 | {
243 | "Score": 0.7145555019378662,
244 | "Type": "COMMERCIAL_ITEM",
245 | "Text": "Achille Varzi",
246 | "BeginOffset": 877,
247 | "EndOffset": 890
248 | },
249 | {
250 | "Score": 0.998923122882843,
251 | "Type": "DATE",
252 | "Text": "1938",
253 | "BeginOffset": 895,
254 | "EndOffset": 899
255 | },
256 | {
257 | "Score": 0.9972896575927734,
258 | "Type": "ORGANIZATION",
259 | "Text": "Alfa Romeo",
260 | "BeginOffset": 901,
261 | "EndOffset": 911
262 | },
263 | {
264 | "Score": 0.9886752367019653,
265 | "Type": "ORGANIZATION",
266 | "Text": "Alfa",
267 | "BeginOffset": 965,
268 | "EndOffset": 969
269 | },
270 | {
271 | "Score": 0.5990666747093201,
272 | "Type": "COMMERCIAL_ITEM",
273 | "Text": "Corse",
274 | "BeginOffset": 970,
275 | "EndOffset": 975
276 | },
277 | {
278 | "Score": 0.9966844916343689,
279 | "Type": "LOCATION",
280 | "Text": "Milan",
281 | "BeginOffset": 979,
282 | "EndOffset": 984
283 | },
284 | {
285 | "Score": 0.6742328405380249,
286 | "Type": "PERSON",
287 | "Text": "Enzo Ferrari",
288 | "BeginOffset": 995,
289 | "EndOffset": 1007
290 | },
291 | {
292 | "Score": 0.7614614963531494,
293 | "Type": "ORGANIZATION",
294 | "Text": "Scuderia Ferrari",
295 | "BeginOffset": 1063,
296 | "EndOffset": 1079
297 | },
298 | {
299 | "Score": 0.991943359375,
300 | "Type": "ORGANIZATION",
301 | "Text": "Ferrari",
302 | "BeginOffset": 1126,
303 | "EndOffset": 1133
304 | },
305 | {
306 | "Score": 0.43177852034568787,
307 | "Type": "EVENT",
308 | "Text": "Formula",
309 | "BeginOffset": 1212,
310 | "EndOffset": 1219
311 | },
312 | {
313 | "Score": 0.5792743563652039,
314 | "Type": "TITLE",
315 | "Text": "One",
316 | "BeginOffset": 1220,
317 | "EndOffset": 1223
318 | },
319 | {
320 | "Score": 0.9387639760971069,
321 | "Type": "ORGANIZATION",
322 | "Text": "Scuderia Ferrari",
323 | "BeginOffset": 1258,
324 | "EndOffset": 1274
325 | },
326 | {
327 | "Score": 0.839101254940033,
328 | "Type": "QUANTITY",
329 | "Text": "one-make",
330 | "BeginOffset": 1354,
331 | "EndOffset": 1362
332 | },
333 | {
334 | "Score": 0.9148198366165161,
335 | "Type": "COMMERCIAL_ITEM",
336 | "Text": "AAC 815",
337 | "BeginOffset": 1380,
338 | "EndOffset": 1387
339 | },
340 | {
341 | "Score": 0.7636953592300415,
342 | "Type": "QUANTITY",
343 | "Text": "first racing car",
344 | "BeginOffset": 1396,
345 | "EndOffset": 1412
346 | },
347 | {
348 | "Score": 0.5968416929244995,
349 | "Type": "PERSON",
350 | "Text": "Enzo",
351 | "BeginOffset": 1431,
352 | "EndOffset": 1435
353 | },
354 | {
355 | "Score": 0.6526040434837341,
356 | "Type": "ORGANIZATION",
357 | "Text": "Ferrari",
358 | "BeginOffset": 1436,
359 | "EndOffset": 1443
360 | },
361 | {
362 | "Score": 0.9813588261604309,
363 | "Type": "ORGANIZATION",
364 | "Text": "Ferrari",
365 | "BeginOffset": 1477,
366 | "EndOffset": 1484
367 | },
368 | {
369 | "Score": 0.9520201086997986,
370 | "Type": "QUANTITY",
371 | "Text": "first vehicle",
372 | "BeginOffset": 1495,
373 | "EndOffset": 1508
374 | },
375 | {
376 | "Score": 0.9882596731185913,
377 | "Type": "ORGANIZATION",
378 | "Text": "Ferrari",
379 | "BeginOffset": 1523,
380 | "EndOffset": 1530
381 | },
382 | {
383 | "Score": 0.9969419240951538,
384 | "Type": "COMMERCIAL_ITEM",
385 | "Text": "125 S.",
386 | "BeginOffset": 1544,
387 | "EndOffset": 1550
388 | },
389 | {
390 | "Score": 0.803063690662384,
391 | "Type": "QUANTITY",
392 | "Text": "two",
393 | "BeginOffset": 1556,
394 | "EndOffset": 1559
395 | },
396 | {
397 | "Score": 0.8591575622558594,
398 | "Type": "QUANTITY",
399 | "Text": "two-seat sports",
400 | "BeginOffset": 1574,
401 | "EndOffset": 1589
402 | },
403 | {
404 | "Score": 0.5812758207321167,
405 | "Type": "COMMERCIAL_ITEM",
406 | "Text": "V12",
407 | "BeginOffset": 1597,
408 | "EndOffset": 1600
409 | },
410 | {
411 | "Score": 0.9987095594406128,
412 | "Type": "DATE",
413 | "Text": "1949",
414 | "BeginOffset": 1619,
415 | "EndOffset": 1623
416 | },
417 | {
418 | "Score": 0.9952574968338013,
419 | "Type": "COMMERCIAL_ITEM",
420 | "Text": "166 Inter",
421 | "BeginOffset": 1629,
422 | "EndOffset": 1638
423 | },
424 | {
425 | "Score": 0.8105729222297668,
426 | "Type": "QUANTITY",
427 | "Text": "first",
428 | "BeginOffset": 1737,
429 | "EndOffset": 1742
430 | },
431 | {
432 | "Score": 0.9807888269424438,
433 | "Type": "COMMERCIAL_ITEM",
434 | "Text": "166 Inter",
435 | "BeginOffset": 1743,
436 | "EndOffset": 1752
437 | },
438 | {
439 | "Score": 0.946994423866272,
440 | "Type": "QUANTITY",
441 | "Text": "four-seat",
442 | "BeginOffset": 1759,
443 | "EndOffset": 1768
444 | },
445 | {
446 | "Score": 0.4215838611125946,
447 | "Type": "COMMERCIAL_ITEM",
448 | "Text": "2",
449 | "BeginOffset": 1770,
450 | "EndOffset": 1771
451 | },
452 | {
453 | "Score": 0.753473162651062,
454 | "Type": "COMMERCIAL_ITEM",
455 | "Text": "2",
456 | "BeginOffset": 1772,
457 | "EndOffset": 1773
458 | },
459 | {
460 | "Score": 0.8835095763206482,
461 | "Type": "ORGANIZATION",
462 | "Text": "Carrozzeria",
463 | "BeginOffset": 1819,
464 | "EndOffset": 1830
465 | },
466 | {
467 | "Score": 0.4891807734966278,
468 | "Type": "COMMERCIAL_ITEM",
469 | "Text": "Touring Superleggera",
470 | "BeginOffset": 1831,
471 | "EndOffset": 1851
472 | },
473 | {
474 | "Score": 0.9941658973693848,
475 | "Type": "ORGANIZATION",
476 | "Text": "Ferrari",
477 | "BeginOffset": 1890,
478 | "EndOffset": 1897
479 | },
480 | {
481 | "Score": 0.9968177080154419,
482 | "Type": "ORGANIZATION",
483 | "Text": "Ferrari",
484 | "BeginOffset": 1914,
485 | "EndOffset": 1921
486 | },
487 | {
488 | "Score": 0.894444465637207,
489 | "Type": "ORGANIZATION",
490 | "Text": "Pininfarina",
491 | "BeginOffset": 2017,
492 | "EndOffset": 2028
493 | },
494 | {
495 | "Score": 0.9082576632499695,
496 | "Type": "ORGANIZATION",
497 | "Text": "Scaglietti",
498 | "BeginOffset": 2030,
499 | "EndOffset": 2040
500 | },
501 | {
502 | "Score": 0.9445992112159729,
503 | "Type": "ORGANIZATION",
504 | "Text": "Zagato",
505 | "BeginOffset": 2042,
506 | "EndOffset": 2048
507 | },
508 | {
509 | "Score": 0.9102649092674255,
510 | "Type": "ORGANIZATION",
511 | "Text": "Vignale",
512 | "BeginOffset": 2050,
513 | "EndOffset": 2057
514 | },
515 | {
516 | "Score": 0.9756594896316528,
517 | "Type": "ORGANIZATION",
518 | "Text": "Bertone.The",
519 | "BeginOffset": 2062,
520 | "EndOffset": 2073
521 | },
522 | {
523 | "Score": 0.9741680026054382,
524 | "Type": "QUANTITY",
525 | "Text": "two-seat",
526 | "BeginOffset": 2108,
527 | "EndOffset": 2116
528 | },
529 | {
530 | "Score": 0.5622731447219849,
531 | "Type": "COMMERCIAL_ITEM",
532 | "Text": "V12s",
533 | "BeginOffset": 2131,
534 | "EndOffset": 2135
535 | },
536 | {
537 | "Score": 0.9950791597366333,
538 | "Type": "ORGANIZATION",
539 | "Text": "Ferrari",
540 | "BeginOffset": 2158,
541 | "EndOffset": 2165
542 | },
543 | {
544 | "Score": 0.9958176016807556,
545 | "Type": "DATE",
546 | "Text": "1950s",
547 | "BeginOffset": 2188,
548 | "EndOffset": 2193
549 | },
550 | {
551 | "Score": 0.9959827661514282,
552 | "Type": "DATE",
553 | "Text": "1960s",
554 | "BeginOffset": 2198,
555 | "EndOffset": 2203
556 | },
557 | {
558 | "Score": 0.9971714615821838,
559 | "Type": "DATE",
560 | "Text": "1968",
561 | "BeginOffset": 2208,
562 | "EndOffset": 2212
563 | },
564 | {
565 | "Score": 0.9540412425994873,
566 | "Type": "COMMERCIAL_ITEM",
567 | "Text": "Dino",
568 | "BeginOffset": 2217,
569 | "EndOffset": 2221
570 | },
571 | {
572 | "Score": 0.9416077136993408,
573 | "Type": "QUANTITY",
574 | "Text": "first two-seat",
575 | "BeginOffset": 2244,
576 | "EndOffset": 2258
577 | },
578 | {
579 | "Score": 0.9857861995697021,
580 | "Type": "ORGANIZATION",
581 | "Text": "Ferrari",
582 | "BeginOffset": 2276,
583 | "EndOffset": 2283
584 | },
585 | {
586 | "Score": 0.9540470242500305,
587 | "Type": "COMMERCIAL_ITEM",
588 | "Text": "Dino",
589 | "BeginOffset": 2289,
590 | "EndOffset": 2293
591 | },
592 | {
593 | "Score": 0.867891252040863,
594 | "Type": "COMMERCIAL_ITEM",
595 | "Text": "V6",
596 | "BeginOffset": 2324,
597 | "EndOffset": 2326
598 | },
599 | {
600 | "Score": 0.957985520362854,
601 | "Type": "COMMERCIAL_ITEM",
602 | "Text": "V8",
603 | "BeginOffset": 2346,
604 | "EndOffset": 2348
605 | },
606 | {
607 | "Score": 0.8559430241584778,
608 | "Type": "ORGANIZATION",
609 | "Text": "Ferraris",
610 | "BeginOffset": 2434,
611 | "EndOffset": 2442
612 | },
613 | {
614 | "Score": 0.8858150243759155,
615 | "Type": "DATE",
616 | "Text": "1980s",
617 | "BeginOffset": 2450,
618 | "EndOffset": 2455
619 | },
620 | {
621 | "Score": 0.9936612844467163,
622 | "Type": "DATE",
623 | "Text": "1990s",
624 | "BeginOffset": 2457,
625 | "EndOffset": 2462
626 | },
627 | {
628 | "Score": 0.9749894142150879,
629 | "Type": "COMMERCIAL_ITEM",
630 | "Text": "V8",
631 | "BeginOffset": 2519,
632 | "EndOffset": 2521
633 | },
634 | {
635 | "Score": 0.8934127688407898,
636 | "Type": "COMMERCIAL_ITEM",
637 | "Text": "V12",
638 | "BeginOffset": 2525,
639 | "EndOffset": 2528
640 | },
641 | {
642 | "Score": 0.9613357186317444,
643 | "Type": "COMMERCIAL_ITEM",
644 | "Text": "V8",
645 | "BeginOffset": 2543,
646 | "EndOffset": 2545
647 | },
648 | {
649 | "Score": 0.45344188809394836,
650 | "Type": "QUANTITY",
651 | "Text": "half",
652 | "BeginOffset": 2573,
653 | "EndOffset": 2577
654 | },
655 | {
656 | "Score": 0.9758495688438416,
657 | "Type": "ORGANIZATION",
658 | "Text": "Ferrari",
659 | "BeginOffset": 2626,
660 | "EndOffset": 2633
661 | },
662 | {
663 | "Score": 0.9186448454856873,
664 | "Type": "QUANTITY",
665 | "Text": "12 engines.The",
666 | "BeginOffset": 2657,
667 | "EndOffset": 2671
668 | },
669 | {
670 | "Score": 0.840392529964447,
671 | "Type": "COMMERCIAL_ITEM",
672 | "Text": "1984 288 GTO",
673 | "BeginOffset": 2672,
674 | "EndOffset": 2684
675 | },
676 | {
677 | "Score": 0.9424274563789368,
678 | "Type": "QUANTITY",
679 | "Text": "first",
680 | "BeginOffset": 2707,
681 | "EndOffset": 2712
682 | },
683 | {
684 | "Score": 0.9751522541046143,
685 | "Type": "ORGANIZATION",
686 | "Text": "Ferrari",
687 | "BeginOffset": 2728,
688 | "EndOffset": 2735
689 | },
690 | {
691 | "Score": 0.7264823913574219,
692 | "Type": "ORGANIZATION",
693 | "Text": "Enzo Ferrari",
694 | "BeginOffset": 2781,
695 | "EndOffset": 2793
696 | },
697 | {
698 | "Score": 0.8831573128700256,
699 | "Type": "COMMERCIAL_ITEM",
700 | "Text": "LaFerrari",
701 | "BeginOffset": 2801,
702 | "EndOffset": 2810
703 | },
704 | {
705 | "Score": 0.9983037710189819,
706 | "Type": "DATE",
707 | "Text": "February 2019",
708 | "BeginOffset": 2815,
709 | "EndOffset": 2828
710 | },
711 | {
712 | "Score": 0.8724089860916138,
713 | "Type": "EVENT",
714 | "Text": "89th Geneva International Motor Show",
715 | "BeginOffset": 2837,
716 | "EndOffset": 2873
717 | },
718 | {
719 | "Score": 0.9788976907730103,
720 | "Type": "ORGANIZATION",
721 | "Text": "Ferrari",
722 | "BeginOffset": 2875,
723 | "EndOffset": 2882
724 | },
725 | {
726 | "Score": 0.7649800181388855,
727 | "Type": "COMMERCIAL_ITEM",
728 | "Text": "V8",
729 | "BeginOffset": 2914,
730 | "EndOffset": 2916
731 | },
732 | {
733 | "Score": 0.9487022161483765,
734 | "Type": "COMMERCIAL_ITEM",
735 | "Text": "F8 Tributo.Ferrari",
736 | "BeginOffset": 2931,
737 | "EndOffset": 2949
738 | },
739 | {
740 | "Score": 0.9001845121383667,
741 | "Type": "COMMERCIAL_ITEM",
742 | "Text": "SF90 Stradale",
743 | "BeginOffset": 2950,
744 | "EndOffset": 2963
745 | },
746 | {
747 | "Score": 0.9331264495849609,
748 | "Type": "QUANTITY",
749 | "Text": "first",
750 | "BeginOffset": 2971,
751 | "EndOffset": 2976
752 | },
753 | {
754 | "Score": 0.9794452786445618,
755 | "Type": "ORGANIZATION",
756 | "Text": "Ferrari",
757 | "BeginOffset": 2982,
758 | "EndOffset": 2989
759 | },
760 | {
761 | "Score": 0.9125649333000183,
762 | "Type": "QUANTITY",
763 | "Text": "three electric motors",
764 | "BeginOffset": 3111,
765 | "EndOffset": 3132
766 | },
767 | {
768 | "Score": 0.9724904298782349,
769 | "Type": "QUANTITY",
770 | "Text": "two",
771 | "BeginOffset": 3134,
772 | "EndOffset": 3137
773 | },
774 | {
775 | "Score": 0.877232015132904,
776 | "Type": "QUANTITY",
777 | "Text": "third",
778 | "BeginOffset": 3203,
779 | "EndOffset": 3208
780 | }
781 | ],
782 | "SRTFile": "TODO",
783 | "VMAPFile": "TODO"
784 | },
785 | "Metadata": {
786 | "Engine": "neural",
787 | "LanguageCode": "en-US",
788 | "FullNarration": "s3://pollypreviewsimplestack-pollyassetstore920ee247-1pvn3ec82d905/audio/full/e7b0f0dc-2400-42cb-be84-88ddc9407b45.json/.394cc3a0-5fef-4c18-b49a-f10e998cc9c9.mp3",
789 | "ArticlePath": "s3://pollypreviewsimplestack-pollyassetstore920ee247-1pvn3ec82d905/text/e7b0f0dc-2400-42cb-be84-88ddc9407b45.json",
790 | "PostProducedImagesS3Paths": [
791 | "s3://pollypreviewsimplestack-pollyassetstore920ee247-1pvn3ec82d905/image/output/e7b0f0dc-2400-42cb-be84-88ddc9407b45.json/ferrari_01.jpg.tga",
792 | "s3://pollypreviewsimplestack-pollyassetstore920ee247-1pvn3ec82d905/image/output/e7b0f0dc-2400-42cb-be84-88ddc9407b45.json/ferrari_04.jpg.tga",
793 | "s3://pollypreviewsimplestack-pollyassetstore920ee247-1pvn3ec82d905/image/output/e7b0f0dc-2400-42cb-be84-88ddc9407b45.json/ferrari_02.jpg.tga",
794 | "s3://pollypreviewsimplestack-pollyassetstore920ee247-1pvn3ec82d905/image/output/e7b0f0dc-2400-42cb-be84-88ddc9407b45.json/ferrari_05.jpg.tga"
795 | ],
796 | "VoiceId": "Kevin",
797 | "AudioPreview": "s3://pollypreviewsimplestack-pollyassetstore920ee247-1pvn3ec82d905/audio/preview/e7b0f0dc-2400-42cb-be84-88ddc9407b45.json/.394cc3a0-5fef-4c18-b49a-f10e998cc9c9.wav",
798 | "AssetId": "e7b0f0dc-2400-42cb-be84-88ddc9407b45.json",
799 | "Bucket": "pollypreviewsimplestack-pollyassetstore920ee247-1pvn3ec82d905",
800 | "Url": "https://giusedroid.wordpress.com/2021/04/29/a-brief-history-of-ferrari/amp/",
801 | "ImagesURLs": [
802 | "https://giusedroid.files.wordpress.com/2021/04/ferrari_01.jpg",
803 | "https://giusedroid.files.wordpress.com/2021/04/ferrari_04.jpg",
804 | "https://giusedroid.files.wordpress.com/2021/04/ferrari_02.jpg",
805 | "https://giusedroid.files.wordpress.com/2021/04/ferrari_05.jpg"
806 | ]
807 | }
808 | }
--------------------------------------------------------------------------------
/assets/vmap.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/bin/polly-preview-simple.ts:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 | import * as cdk from '@aws-cdk/core';
3 | import { PollyPreviewSimpleStack } from '../lib/polly-preview-simple-stack';
4 |
5 | const app = new cdk.App();
6 | new PollyPreviewSimpleStack(app, 'PollyPreviewSimpleStack');
7 |
--------------------------------------------------------------------------------
/cdk.json:
--------------------------------------------------------------------------------
1 | {
2 | "app": "npx ts-node --prefer-ts-exts bin/polly-preview-simple.ts",
3 | "context": {
4 | "@aws-cdk/aws-apigateway:usagePlanKeyOrderInsensitiveId": true,
5 | "@aws-cdk/core:enableStackNameDuplicates": "true",
6 | "aws-cdk:enableDiffNoFail": "true",
7 | "@aws-cdk/core:stackRelativeExports": "true",
8 | "@aws-cdk/aws-ecr-assets:dockerIgnoreSupport": true,
9 | "@aws-cdk/aws-secretsmanager:parseOwnedSecretName": true,
10 | "@aws-cdk/aws-kms:defaultKeyPolicies": true,
11 | "@aws-cdk/aws-s3:grantWriteWithoutAcl": true,
12 | "@aws-cdk/aws-ecs-patterns:removeDefaultDesiredCount": true,
13 | "@aws-cdk/aws-rds:lowercaseDbIdentifier": true,
14 | "@aws-cdk/aws-efs:defaultEncryptionAtRest": true
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/functions/finalize-lambda/finalize.py:
--------------------------------------------------------------------------------
1 | import boto3
2 | import os
3 | import json
4 | from botocore.exceptions import ClientError
5 |
6 | try:
7 | POLLY_METADATA_STORE = os.environ['POLLY_METADATA_STORE']
8 | except KeyError as e:
9 | print(f"Missing env variable: {e}")
10 | exit(1)
11 |
12 | dynamo = boto3.resource("dynamodb")
13 | polly_metadata_store = dynamo.Table(POLLY_METADATA_STORE)
14 |
15 | def create_media_object(item):
16 | # "output/full/hls/62de657b-7884-4dc0-8286-b9b63c521351/template62de657b-7884-4dc0-8286-b9b63c521351.m3u8"
17 | # output/preview/62de657b-7884-4dc0-8286-b9b63c521351.mp4
18 | media_key = item['s3']['object']['key']
19 | media_type = media_key.split('/')[1]
20 | media_bucket = item['s3']['bucket']['name']
21 |
22 | if media_type == "preview":
23 | media_id = media_key.split("/")[2].replace(".mp4", ".json")
24 | elif media_type == "full":
25 | media_id = media_key.split("/")[3] + ".json"
26 | else:
27 | media_id = None
28 |
29 | return {
30 | "media_id": media_id,
31 | "media_type": media_type,
32 | "media_key": media_key,
33 | "media_bucket": media_bucket
34 | }
35 |
36 | def ddb_value(item):
37 | return {
38 | "Value": item
39 | }
40 |
41 | def is_successful_ops(media_object):
42 | return media_object["metadata_updated"]
43 |
44 | def is_failed_ops(media_object):
45 | return not is_successful_ops(media_object)
46 |
47 | def update_metadata(media_object):
48 |
49 | media_object['metadata_updated'] = False
50 |
51 | attribute_updates = {}
52 | full_path = f"s3://{media_object['media_bucket']}/{media_object['media_key']}"
53 |
54 |
55 | if media_object['media_type'] == "preview":
56 | attribute_updates['PreviewVideoFile'] = ddb_value(full_path)
57 | if media_object['media_type'] == "full":
58 | attribute_updates['FullVideoStream'] = ddb_value(full_path)
59 |
60 | print(attribute_updates)
61 |
62 | if len(attribute_updates) == 0:
63 | return media_object
64 |
65 | asset_id = media_object["media_id"]
66 |
67 | try:
68 | dynamo_response = polly_metadata_store.update_item(
69 | Key={"AssetId": asset_id},
70 | AttributeUpdates=attribute_updates,
71 | )
72 | media_object["metadata_updated"] = True
73 | except ClientError as e:
74 | print(e)
75 |
76 | return media_object
77 |
78 | def handler(event, context):
79 |
80 | media_objects = [ create_media_object(item) for item in event['Records'] ]
81 | print(media_objects)
82 | updates = [ update_metadata(media_object) for media_object in media_objects ]
83 | print(updates)
84 |
85 | successful_ops = [is_successful_ops(update) for update in updates]
86 | failed_ops = [ is_failed_ops(update) for update in updates]
87 |
88 | return {
89 | "statusCode":200,
90 | "body": json.dumps({
91 | "SuccessfulOps" : successful_ops,
92 | "FailedOps": failed_ops
93 |
94 | }, default=str)
95 | }
96 |
--------------------------------------------------------------------------------
/functions/polly-lambda/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "polly-lambda",
3 | "version": "1.0.0",
4 | "description": "Triggered when a JSON object is uploaded to an S3 bucket, will trigger a Polly job to produce a synthetized speech mp3 file",
5 | "main": "polly.js",
6 | "scripts": {
7 | "test": "echo \"Error: no test specified\" && exit 1"
8 | },
9 | "keywords": [
10 | "AWS",
11 | "Polly"
12 | ],
13 | "author": "gbatt@amazon.com",
14 | "license": "MIT"
15 | }
16 |
--------------------------------------------------------------------------------
/functions/polly-lambda/polly.js:
--------------------------------------------------------------------------------
1 | const AWS = require('aws-sdk');
2 |
3 | const Polly = new AWS.Polly();
4 | const S3 = new AWS.S3();
5 | const Dynamo = new AWS.DynamoDB.DocumentClient();
6 |
7 | const getBucket = Record => Record.s3.bucket.name;
8 | const getKey = Record => Record.s3.object.key;
9 |
10 | const {OutputS3BucketName, Table} = process.env;
11 |
12 | exports.handler = async event => {
13 | const {Records} = event;
14 |
15 | const SuccessfulOps = [];
16 | const FailedOps = [];
17 |
18 | for (const Record of Records){
19 |
20 | const Bucket = getBucket(Record);
21 | const Key = getKey(Record);
22 |
23 | let File;
24 | try{
25 | File = await S3.getObject({
26 | Key,
27 | Bucket
28 | }).promise();
29 |
30 | }catch(S3Error){
31 | console.error(`Error while retrieving file from S3 \n${S3Error}`);
32 | FailedOps.push({
33 | error: S3Error,
34 | Record
35 | });
36 | continue;
37 | }
38 |
39 | const {Text, VoiceId, Engine, LanguageCode} = JSON.parse(File.Body.toString());
40 |
41 | const PollyJobParams = {
42 | OutputFormat: "mp3",
43 | OutputS3BucketName,
44 | Text,
45 | VoiceId,
46 | Engine,
47 | LanguageCode, // it's ok if it's null or undefined
48 | OutputS3KeyPrefix: `audio/full/${Key.replace('text/', '')}/`,
49 | TextType: "text"
50 | };
51 |
52 | let PollyJob;
53 |
54 | try{
55 | PollyJob = await Polly.startSpeechSynthesisTask(PollyJobParams).promise();
56 | }catch(PollyError){
57 | console.error(`Error while creating PollyJob \n${PollyError}`);
58 | FailedOps.push({
59 | error: PollyError,
60 | Record,
61 | PollyJob
62 | });
63 | continue;
64 | }
65 | const DDBParams = {
66 | "TableName":Table,
67 | Key:{
68 | AssetId: Key.replace("text/", "")
69 | },
70 | ExpressionAttributeNames: {
71 | "#fullNarration": "FullNarration"
72 | },
73 | ExpressionAttributeValues:{
74 | ":fullNarration": "IN_PROGRESS"
75 | },
76 | UpdateExpression: `SET #fullNarration = :fullNarration`
77 | };
78 |
79 | let ddbResponse;
80 | try{
81 | ddbResponse = await Dynamo.update(DDBParams).promise();
82 | }catch(DDBError){
83 | console.error(`Error while writing to DDB \n${DDBError}`);
84 | FailedOps.push({
85 | error: DDBError,
86 | Record,
87 | ddbResponse
88 | });
89 | continue;
90 | }
91 |
92 | SuccessfulOps.push({
93 | Record
94 | });
95 | }
96 |
97 | console.log("SuccessfulOps");
98 | console.log(JSON.stringify(SuccessfulOps, null, 2));
99 | console.log("FailedOps");
100 | console.log(JSON.stringify(FailedOps, null, 2));
101 |
102 | return {
103 | statusCode: 200,
104 | body: JSON.stringify({
105 | SuccessfulOps,
106 | FailedOps
107 | })
108 | }
109 | }
--------------------------------------------------------------------------------
/functions/postprod-lambda/fadeout.py:
--------------------------------------------------------------------------------
1 | import os
2 | import subprocess
3 | import pathlib
4 | import boto3
5 | import json
6 | from botocore.exceptions import ClientError
7 | from decimal import Decimal
8 |
9 | try:
10 | POLLY_METADATA_STORE = os.environ['POLLY_METADATA_STORE']
11 | except KeyError as e:
12 | print(f"Missing env variable: {e}")
13 | exit(1)
14 |
15 | FFMPEG_PREVIEW_DURATION = int(os.environ.get("FFMPEG_PREVIEW_DURATION", 30))
16 | FFMPEG_FADEOUT_DURATION = int(os.environ.get("FFMPEG_FADEOUT_DURATION", 3))
17 |
18 | s3 = boto3.client("s3")
19 | dynamo = boto3.resource("dynamodb")
20 | polly_metadata_store = dynamo.Table(POLLY_METADATA_STORE)
21 |
22 | ROOT_PATH = "/tmp"
23 |
24 | def default(obj):
25 | if isinstance(obj, Decimal):
26 | return str(obj)
27 | raise TypeError("Object of type '%s' is not JSON serializable" % type(obj).__name__)
28 |
29 | def create_media_object(pair):
30 | bucket, input_path = pair
31 |
32 | # input_path = "audio/full/$DOCUMENT_ID/$POLLY_GENERATED.mp3"
33 |
34 | input_split = input_path.split("/")
35 | input_type = input_split[0] # audio
36 | input_format = input_split[1] # full
37 | input_polly = input_split[-1] # $POLLY_GENERATED.mp3
38 | input_ext = input_polly.split(".")[-1] # mp3
39 |
40 | input_polly_no_ext = input_polly.replace(f".{input_ext}", "")
41 |
42 | # get anything between input_format and input_polly
43 | input_document = "/".join(input_split[2:-1])
44 |
45 | return {
46 | "s3_full_path": f"s3://{bucket}/{input_path}",
47 | "s3_path":f"{bucket}/{input_path}",
48 | "s3_bucket": bucket,
49 | "s3_key": input_path,
50 | "media_type": input_type,
51 | "media_format": input_format,
52 | "media_extension": input_ext,
53 | "media_polly_file": input_polly,
54 | "media_polly_no_extension": input_polly_no_ext,
55 | "media_document_id": input_document,
56 | # /tmp/audio/full/$DOCUMENT_ID
57 | "local_path": f"{ROOT_PATH}/{input_type}/{input_format}/{input_document}",
58 | # /tmp/audio/full/$DOCUMENT_ID/$POLLY_GENERATED.mp3
59 | "local_full_path": f"{ROOT_PATH}/{input_path}",
60 | # /tmp/audio/preview/$DOCUMENT_ID
61 | "local_preview_path": f"{ROOT_PATH}/{input_type}/preview/{input_document}",
62 | # /tmp/audio/preview/$DOCUMENT_ID/$POLLY_GENERATED.wav
63 | "local_preview_full_path": f"{ROOT_PATH}/{input_type}/preview/{input_document}/{input_polly_no_ext}.wav",
64 | "preview_s3_key":f"{input_type}/preview/{input_document}/{input_polly_no_ext}.wav",
65 | "preview_s3_full_path": f"s3://{bucket}/{input_type}/preview/{input_document}/{input_polly_no_ext}.wav"
66 | }
67 |
68 | # pipeline_check : media_object["local_paths_exist"]
69 | def create_local_paths(media_object):
70 | try:
71 | pathlib.Path(media_object["local_path"]).mkdir(parents=True, exist_ok=True)
72 | pathlib.Path(media_object["local_preview_path"]).mkdir(parents=True, exist_ok=True)
73 | media_object["local_paths_exist"] = True
74 | except:
75 | media_object["local_paths_exist"] = False
76 | return media_object
77 |
78 | # pipeline_check : media_object["source_available"]
79 | def download(media_object):
80 |
81 | media_object["source_available"] = False
82 |
83 | if media_object["local_paths_exist"]:
84 |
85 | bucket = media_object["s3_bucket"]
86 | key = media_object["s3_key"]
87 | filename = media_object["local_full_path"]
88 |
89 | with open(filename, "wb") as fp:
90 | s3.download_fileobj(bucket, key, fp)
91 | media_object["source_available"] = True
92 |
93 | return media_object
94 |
95 | def get_duration(local_path):
96 | FFPROBE_COMMAND = [
97 | "./bin/ffprobe",
98 | "-v",
99 | "quiet",
100 | "-print_format",
101 | "compact=print_section=0:nokey=1:escape=csv",
102 | "-show_entries",
103 | "format=duration",
104 | local_path
105 | ]
106 |
107 | p = subprocess.Popen(FFPROBE_COMMAND, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
108 |
109 | out, err = p.communicate()
110 | # because ffprobe outputs on stdout stream by default, unlike ffmpeg lol
111 |
112 | return Decimal(str(float(out)))
113 |
114 | # pipeline_check : media_object["preview_available"]
115 | def fade_out(media_object):
116 |
117 | media_object["preview_available"] = False
118 |
119 | if media_object["source_available"]:
120 |
121 | filename_in = media_object["local_full_path"]
122 | filename_out = media_object["local_preview_full_path"]
123 |
124 | media_object['full_narration_duration'] = get_duration(filename_in)
125 |
126 | start_position = FFMPEG_PREVIEW_DURATION - FFMPEG_FADEOUT_DURATION
127 |
128 | FFMPEG_COMMAND = [
129 | "./bin/ffmpeg",
130 | "-i",
131 | filename_in,
132 | # f"-af 'afade=t=out:st={start_position}:d={FFMPEG_FADEOUT_DURATION}'",
133 | f"-af",
134 | f"afade=t=out:st={start_position}:d={FFMPEG_FADEOUT_DURATION}",
135 | "-to",
136 | str(FFMPEG_PREVIEW_DURATION),
137 | filename_out
138 | ]
139 |
140 | print(" ".join(FFMPEG_COMMAND))
141 |
142 | p = subprocess.Popen(FFMPEG_COMMAND, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
143 |
144 | out, err = p.communicate()
145 | # because ffmpeg outputs on error stream by default
146 | print(err)
147 |
148 | if os.path.isfile(filename_out):
149 | media_object["preview_available"] = True
150 |
151 | return media_object
152 |
153 | # pipeline_check : media_object["preview_uploaded"]
154 | def upload(media_object):
155 |
156 | media_object["preview_uploaded"] = False
157 |
158 | if media_object["preview_available"]:
159 |
160 | bucket = media_object["s3_bucket"]
161 | preview_s3_key = media_object["preview_s3_key"]
162 | local_filename = media_object["local_preview_full_path"]
163 |
164 | try:
165 | s3.upload_file(
166 | local_filename,
167 | bucket,
168 | preview_s3_key
169 | )
170 |
171 | media_object["preview_uploaded"] = True
172 | except ClientError as e:
173 | print(e)
174 |
175 | return media_object
176 |
177 | def check_for_failure(media_object):
178 | media_object["processing_successful"] = False
179 |
180 | conditions = [
181 | media_object["local_paths_exist"],
182 | media_object["source_available"],
183 | media_object["preview_available"],
184 | media_object["preview_uploaded"]
185 | ]
186 |
187 | if all(conditions):
188 | media_object["processing_successful"] = True
189 | return media_object
190 |
191 | def update_metadata(media_object):
192 | media_object["metadata_updated"] = False
193 |
194 | attribute_updates = {
195 | "FullNarration": {
196 | "Value": media_object["s3_full_path"]
197 | },
198 | "AudioPreview":{
199 | "Value": "FAILED"
200 | },
201 | "FullNarrationDurationInSeconds":{
202 | "Value": media_object["full_narration_duration"]
203 | }
204 | }
205 |
206 | if media_object["processing_successful"]:
207 | attribute_updates["AudioPreview"]["Value"] = media_object["preview_s3_full_path"]
208 |
209 | asset_id = media_object["media_document_id"]
210 | full_narration_s3_url = media_object["s3_full_path"]
211 |
212 | try:
213 | dynamo_response = polly_metadata_store.update_item(
214 | Key={"AssetId": asset_id},
215 | AttributeUpdates=attribute_updates,
216 | )
217 | media_object["metadata_updated"] = True
218 | except ClientError as e:
219 | print(e)
220 |
221 | return media_object
222 |
223 | def is_successful_ops(media_object):
224 | if media_object["processing_successful"] and media_object["metadata_updated"]:
225 | return media_object
226 | return None
227 |
228 | def is_failed_ops(media_object):
229 | if not media_object["processing_successful"] or not media_object["metadata_updated"]:
230 | return media_object
231 | return None
232 |
233 | def handler(event, context):
234 |
235 | # input key: /audio/full/$DOCUMENT_ID/$POLLY_GENERATED.mp3
236 |
237 | Records = event["Records"]
238 | # assuming all objects are coming from the same bucket
239 | bucket = Records[0]["s3"]["bucket"]["name"]
240 |
241 | # [ "bucket_name", "audio/full/$DOCUMENT_ID/$POLLY_GENERATED.mp3" ]
242 | object_pairs = [
243 | [ bucket, x["s3"]["object"]["key"] ]
244 | for x in Records
245 | ]
246 |
247 | media_objects = [ create_media_object(pair) for pair in object_pairs]
248 |
249 | # "/tmp/audio/full/$DOCUMENT_ID/$POLLY_GENERATED.mp3"
250 | print(media_objects)
251 |
252 | local_paths = [ create_local_paths(media_object) for media_object in media_objects]
253 |
254 | full_narrations = [ download(local_path) for local_path in local_paths ]
255 |
256 | # "/tmp/audio/preview/$DOCUMENT_ID/$POLLY_GENERATED.wav"
257 | previews = [ fade_out(full_narration) for full_narration in full_narrations ]
258 |
259 | uploads = [ upload(preview) for preview in previews]
260 |
261 | checks = [ check_for_failure(upload) for upload in uploads]
262 |
263 | updates = [ update_metadata(check) for check in checks]
264 |
265 | successful_ops = [is_successful_ops(update) for update in updates]
266 | failed_ops = [ is_failed_ops(update) for update in updates]
267 |
268 | print("****************RESULTS****************")
269 |
270 | print(successful_ops)
271 | print(failed_ops)
272 |
273 | return {
274 | "statusCode":200,
275 | "body": json.dumps({
276 | "SuccessfulOps" : successful_ops,
277 | "FailedOps": failed_ops
278 |
279 | }, default=default)
280 | }
281 |
--------------------------------------------------------------------------------
/functions/postprod-lambda/images.py:
--------------------------------------------------------------------------------
1 | import os
2 | import subprocess
3 | import pathlib
4 | import boto3
5 | import json
6 | import urllib.request
7 | from botocore.exceptions import ClientError
8 | from decimal import Decimal
9 |
10 | try:
11 | POLLY_METADATA_STORE = os.environ['POLLY_METADATA_STORE']
12 | except KeyError as e:
13 | print(f"Missing env variable: {e}")
14 | exit(1)
15 |
16 | s3 = boto3.client("s3")
17 | dynamo = boto3.resource("dynamodb")
18 | polly_metadata_store = dynamo.Table(POLLY_METADATA_STORE)
19 |
20 | ROOT_PATH = "/tmp"
21 |
22 | def default(obj):
23 | if isinstance(obj, Decimal):
24 | return str(obj)
25 | raise TypeError("Object of type '%s' is not JSON serializable" % type(obj).__name__)
26 |
27 |
28 | def create_media_object(pair):
29 | bucket, input_path = pair
30 |
31 | # input_path = "audio/preview/$DOCUMENT_ID/$POLLY_GENERATED.wav"
32 |
33 | input_split = input_path.split("/")
34 | input_type = input_split[0] # audio
35 | input_format = input_split[1] # preview
36 | input_polly = input_split[-1] # $POLLY_GENERATED.wav
37 | input_ext = input_polly.split(".")[-1] # wav
38 |
39 | input_polly_no_ext = input_polly.replace(f".{input_ext}", "")
40 |
41 | # get anything between input_format and input_polly
42 | input_document = "/".join(input_split[2:-1])
43 |
44 | return {
45 | "s3_full_path": f"s3://{bucket}/{input_path}",
46 | "s3_path":f"{bucket}/{input_path}",
47 | "s3_bucket": bucket,
48 | "s3_key": input_path,
49 | "media_type": input_type,
50 | "media_format": input_format,
51 | "media_extension": input_ext,
52 | "media_polly_file": input_polly,
53 | "media_polly_no_extension": input_polly_no_ext,
54 | "media_document_id": input_document,
55 | "source_local_path": f"{ROOT_PATH}/source/{input_document}",
56 | "output_local_path": f"{ROOT_PATH}/output/{input_document}",
57 | "source_s3_path": f"s3://{bucket}/image/source/{input_document}",
58 | "output_s3_path": f"s3://{bucket}/image/output/{input_document}",
59 | "output_s3_key": f"image/output/{input_document}",
60 | "article_s3_path": f"s3://{bucket}/text/{input_document}",
61 | "article_s3_key": f"text/{input_document}",
62 | "article_local_path": f"{ROOT_PATH}/source/{input_document}/{input_document}", # by design is /tmp/source/article.json/article/json
63 | "video_trigger_local_path": f"{ROOT_PATH}/output/{input_document}/{input_document}",
64 | "video_trigger_s3_path": f"s3://{bucket}/video-trigger/{input_document}",
65 | "video_trigger_s3_key": f"video-trigger/{input_document}"
66 | }
67 |
68 | # pipeline_check : media_object["local_paths_exist"]
69 | def create_local_paths(media_object):
70 | try:
71 | pathlib.Path(media_object["source_local_path"]).mkdir(parents=True, exist_ok=True)
72 | pathlib.Path(media_object["output_local_path"]).mkdir(parents=True, exist_ok=True)
73 | media_object["local_paths_exist"] = True
74 | except:
75 | media_object["local_paths_exist"] = False
76 | return media_object
77 |
78 | # pipeline_check : media_object["article_available"]
79 | def download_article_object(media_object):
80 |
81 | media_object["article_available"] = False
82 |
83 | if media_object["local_paths_exist"]:
84 |
85 | bucket = media_object["s3_bucket"]
86 | key = media_object["article_s3_key"]
87 | filename = media_object["article_local_path"]
88 |
89 | with open(filename, "wb") as fp:
90 | s3.download_fileobj(bucket, key, fp)
91 | media_object["article_available"] = True
92 |
93 | return media_object
94 |
95 | # pipeline_check : media_object["source_images_available"]
96 | def download_images(media_object):
97 |
98 | media_object["source_images_available"] = False
99 |
100 | if media_object["article_available"]:
101 |
102 | media_object["source_images_local_paths"] = []
103 |
104 | with open(media_object["article_local_path"]) as fp:
105 | json_object = json.load(fp)
106 | media_object["article_body"] = json_object
107 | media_object["images_urls"] = json_object["ImagesURLs"]
108 |
109 | # downloading only the first 4 images
110 | for url in media_object["images_urls"][:4]:
111 | source_filename = url.split("/")[-1]
112 | output_filename = f"{media_object['source_local_path']}/{source_filename}"
113 |
114 | http_response = urllib.request.urlopen(url)
115 | with open(output_filename, "wb") as fp:
116 | fp.write(http_response.read())
117 | media_object["source_images_local_paths"].append(output_filename)
118 |
119 | media_object["source_images_available"] = True
120 |
121 | return media_object
122 |
123 | def convert_image(input_path, output_path):
124 | FFMPEG_COMMAND = [
125 | "./bin/ffmpeg",
126 | "-i",
127 | input_path,
128 | output_path
129 | ]
130 |
131 | print(" ".join(FFMPEG_COMMAND))
132 |
133 | p = subprocess.Popen(FFMPEG_COMMAND, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
134 |
135 | out, err = p.communicate()
136 | # because ffmpeg outputs on error stream by default
137 | print(err)
138 |
139 | if os.path.isfile(output_path):
140 | return output_path
141 | return None
142 |
143 | # pipeline_check : media_object["output_images_available"]
144 | def convert_images(media_object):
145 |
146 | media_object["output_images_available"] = False
147 |
148 | if media_object["source_images_available"]:
149 | media_object["output_images_local_paths"] = [
150 | convert_image(image_filename, f"{image_filename}.tga")
151 | for image_filename in media_object["source_images_local_paths"]
152 | ]
153 |
154 | if any(media_object["output_images_local_paths"]):
155 | media_object["output_images_available"] = True
156 |
157 | return media_object
158 |
159 | def upload_image(local_path, bucket, key):
160 | try:
161 | s3.upload_file(
162 | local_path,
163 | bucket,
164 | key
165 | )
166 |
167 | except ClientError as e:
168 | print(e)
169 | return None
170 |
171 | return f"s3://{bucket}/{key}"
172 |
173 | # pipeline_check : media_object["images_uploaded"]
174 | def upload(media_object):
175 |
176 | media_object["images_uploaded"] = False
177 |
178 | if media_object["output_images_available"]:
179 |
180 | bucket = media_object["s3_bucket"]
181 | output_key = media_object["output_s3_key"]
182 | get_filename = lambda x : x.split("/")[-1]
183 |
184 | media_object["output_images_s3_paths"] = [
185 | upload_image(
186 | output_image_local_path,
187 | bucket,
188 | f"{output_key}/{get_filename(output_image_local_path)}"
189 | )
190 | for output_image_local_path in media_object["output_images_local_paths"]
191 | ]
192 |
193 | if any(media_object["output_images_s3_paths"]):
194 | media_object["images_uploaded"] = True
195 |
196 | return media_object
197 |
198 | def check_for_failure(media_object):
199 | media_object["processing_successful"] = False
200 |
201 | conditions = [
202 | media_object["local_paths_exist"],
203 | media_object["article_available"],
204 | media_object["source_images_available"],
205 | media_object["output_images_available"],
206 | media_object["images_uploaded"]
207 | ]
208 |
209 | if all(conditions):
210 | media_object["processing_successful"] = True
211 | return media_object
212 |
213 | def update_metadata(media_object):
214 | media_object["metadata_updated"] = False
215 |
216 | attribute_updates = {
217 | "ImagesURLs": {
218 | "Value": media_object["images_urls"]
219 | },
220 | "PostProducedImagesS3Paths":{
221 | "Value": "FAILED"
222 | }
223 | }
224 |
225 | if media_object["processing_successful"]:
226 | attribute_updates["PostProducedImagesS3Paths"]["Value"] = media_object["output_images_s3_paths"]
227 |
228 | asset_id = media_object["media_document_id"]
229 |
230 | try:
231 | dynamo_response = polly_metadata_store.update_item(
232 | Key={"AssetId": asset_id},
233 | AttributeUpdates=attribute_updates,
234 | ReturnValues="ALL_NEW"
235 | )
236 | media_object["metadata"] = dynamo_response
237 | media_object["metadata_updated"] = True
238 | except ClientError as e:
239 | print(e)
240 |
241 | return media_object
242 |
243 | def trigger_video_pipeline(media_object):
244 |
245 | media_object['video_pipeline_triggered'] = False
246 |
247 | if media_object["metadata_updated"]:
248 |
249 | output_file = {
250 | "Bucket": media_object['s3_bucket'],
251 | "Key": media_object['video_trigger_s3_key'],
252 | "AssetId": media_object['media_document_id'],
253 | "ArticleBody": media_object['article_body'],
254 | "Metadata": media_object['metadata']['Attributes']
255 | }
256 |
257 | with open(media_object["video_trigger_local_path"], "w") as fp:
258 | json.dump(output_file, fp, default=default)
259 |
260 | try:
261 | s3.upload_file(
262 | media_object['video_trigger_local_path'],
263 | media_object['s3_bucket'],
264 | media_object['video_trigger_s3_key']
265 | )
266 |
267 | media_object['video_pipeline_triggered'] = True
268 |
269 | except ClientError as e:
270 | print(e)
271 | return None
272 |
273 | return media_object
274 |
275 | def is_successful_ops(media_object):
276 | if media_object["processing_successful"] and media_object["metadata_updated"] and media_object["video_pipeline_triggered"]:
277 | return media_object
278 | return None
279 |
280 | def is_failed_ops(media_object):
281 | if not media_object["processing_successful"] or not media_object["metadata_updated"] or not media_object["video_pipeline_triggered"]:
282 | return media_object
283 | return None
284 |
285 |
286 |
287 |
288 | def handler(event, context):
289 |
290 | # input key: /audio/preview/$DOCUMENT_ID/$POLLY_GENERATED.wav
291 |
292 | Records = event["Records"]
293 | # assuming all objects are coming from the same bucket
294 | bucket = Records[0]["s3"]["bucket"]["name"]
295 |
296 | # [ "bucket_name", "audio/preview/$DOCUMENT_ID/$POLLY_GENERATED.wav" ]
297 | object_pairs = [
298 | [ bucket, x["s3"]["object"]["key"] ]
299 | for x in Records
300 | ]
301 |
302 | media_objects = [ create_media_object(pair) for pair in object_pairs]
303 |
304 | local_paths = [ create_local_paths(media_object) for media_object in media_objects]
305 |
306 | articles = [download_article_object(local_path) for local_path in local_paths]
307 |
308 | images = [download_images(article) for article in articles]
309 |
310 | targas = [convert_images(image) for image in images]
311 |
312 | uploads = [ upload(targa) for targa in targas]
313 |
314 | checks = [ check_for_failure(upload) for upload in uploads]
315 |
316 | updates = [ update_metadata(check) for check in checks]
317 |
318 | trigger_videos = [trigger_video_pipeline(update) for update in updates]
319 |
320 | print(trigger_videos)
321 |
322 | successful_ops = [is_successful_ops(update) for update in updates]
323 | failed_ops = [ is_failed_ops(update) for update in updates]
324 |
325 | print("****************RESULTS****************")
326 |
327 | print(successful_ops)
328 | print(failed_ops)
329 |
330 | return {
331 | "statusCode":200,
332 | "body": json.dumps({
333 | "SuccessfulOps" : successful_ops,
334 | "FailedOps": failed_ops
335 | }, default=default)
336 | }
337 |
338 |
--------------------------------------------------------------------------------
/functions/scrape-lambda/.nvmrc:
--------------------------------------------------------------------------------
1 | 14.17.6
2 |
--------------------------------------------------------------------------------
/functions/scrape-lambda/lib/SMPTE.js:
--------------------------------------------------------------------------------
1 | /** Convert seconds to SMPTE timecode JSON object, example input is html video.currentTime */
2 | function secondsToSMPTE(seconds, framerate=1000) {
3 | var f = Math.floor((seconds % 1) * framerate);
4 | var s = Math.floor(seconds);
5 | var m = Math.floor(s / 60);
6 | var h = Math.floor(m / 60);
7 | m = m % 60;
8 | s = s % 60;
9 |
10 | return {h: h, m: m, s: s, f: f};
11 | }
12 |
13 | /** Pretty print SMPTE timecode JSON object */
14 | function SMPTEToString(timecode) {
15 | if (timecode.h < 10) { timecode.h = "0" + timecode.h; }
16 | if (timecode.m < 10) { timecode.m = "0" + timecode.m; }
17 | if (timecode.s < 10) { timecode.s = "0" + timecode.s; }
18 | let f = timecode.f;
19 | if(timecode.f < 100){
20 | f = "0" + timecode.f
21 | }
22 | if (timecode.f < 10) {
23 | f = "00" + timecode.f;
24 | }
25 |
26 | return timecode.h + ":" + timecode.m + ":" + timecode.s + "," + f;
27 | }
28 |
29 | function secondsToSMPTEString(seconds, framerate=1000){
30 | return SMPTEToString(
31 | secondsToSMPTE(seconds, framerate)
32 | );
33 | }
34 |
35 | module.exports = {
36 | secondsToSMPTEString
37 | };
--------------------------------------------------------------------------------
/functions/scrape-lambda/lib/SRT.js:
--------------------------------------------------------------------------------
1 | const {secondsToSMPTEString} = require('./SMPTE');
2 |
3 | const makeSRTItem = (index, start, end, title) =>
4 | `${index}
5 | ${start} --> ${end}
6 | ${title}
7 |
8 | `;
9 |
10 | const makeSRTFile = (titles, duration) => {
11 | if(!titles?.length){
12 | return null;
13 | }
14 |
15 | const segmentDurationInSec = duration / titles.length;
16 | const delta = 0.1;
17 | const segments = Array(titles.length).fill(segmentDurationInSec).map(
18 | (current, index) => ([
19 | index+1,
20 | secondsToSMPTEString(current*index + delta),
21 | secondsToSMPTEString(current*(index+1)),
22 | titles[index]
23 | ])
24 | );
25 |
26 | return segments
27 | .map(item => makeSRTItem( ... item))
28 | .reduce((memo, next) => memo + next, "");
29 | }
30 |
31 | module.exports = {
32 | makeSRTFile
33 | };
--------------------------------------------------------------------------------
/functions/scrape-lambda/lib/VMAP.js:
--------------------------------------------------------------------------------
1 | const targetScore = score => item => item.Score >= score;
2 | const targetTypes = types => item => types.includes(item.Type);
3 | const textLens = item => item?.Text;
4 | const setReducer = (memo, next) => memo.add(next);
5 | const stringReducer = (memo, next) => memo += `${next};`;
6 |
7 | const VMAPBody = ADS_URL =>
8 | `
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 | `;
25 |
26 | const allowedTypes = [
27 | "ORGANIZATION"
28 | ];
29 |
30 | const getKeywordsFromEntities =
31 | entities =>
32 | entities.filter(targetScore(0.80))
33 | .filter(targetTypes(allowedTypes))
34 | .map(textLens)
35 | .reduce(setReducer, new Set())
36 |
37 | const getKeywordsFromEntitiesString =
38 | entities =>
39 | Array.from(getKeywordsFromEntities(entities))
40 | .reduce(stringReducer)
41 |
42 |
43 |
44 | module.exports = {
45 | VMAPBody,
46 | getKeywordsFromEntities,
47 | getKeywordsFromEntitiesString
48 | };
--------------------------------------------------------------------------------
/functions/scrape-lambda/lib/utils.js:
--------------------------------------------------------------------------------
1 | const choice = list => list[Math.floor(Math.random()*list.length)];
2 |
3 | const getRandomVoiceId = (languageCode, voices) => {
4 | const [language, country] = languageCode.split("-");
5 | const fullVoiceList = voices[language];
6 |
7 | if(country){
8 | const filteredByCountry = voices[language].filter(x => x.FullLanguageCode === languageCode);
9 | return choice(filteredByCountry)
10 | }
11 | return choice(fullVoiceList);
12 |
13 | }
14 |
15 | const getUrlFromEvent = ({body}) => {
16 | let bodyJ = {};
17 | try{
18 | bodyJ = JSON.parse(body);
19 | }catch(error){
20 | return null;
21 | }
22 |
23 | return bodyJ.Url;
24 | }
25 |
26 |
27 | const getDominantLanguage =
28 | ({Languages}) => Languages.sort((a,b) => b['Score'] - a['Score'])[0];
29 |
30 | module.exports = {
31 | choice,
32 | getRandomVoiceId,
33 | getUrlFromEvent,
34 | getDominantLanguage
35 | };
--------------------------------------------------------------------------------
/functions/scrape-lambda/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "scraper-lambda",
3 | "version": "1.0.0",
4 | "description": "Lambda taht scrapes a wordpres amp article",
5 | "main": "index.js",
6 | "scripts": {
7 | "test": "echo \"Error: no test specified\" && exit 1"
8 | },
9 | "keywords": [
10 | "aws",
11 | "lambda",
12 | "scraping",
13 | "amp",
14 | "article"
15 | ],
16 | "author": "gbatt@amazon.com",
17 | "license": "MIT",
18 | "dependencies": {
19 | "axios": "^0.21.1",
20 | "cheerio": "^1.0.0-rc.9",
21 | "jsdom": "^16.5.3"
22 | },
23 | "devDependencies": {
24 | "aws-sdk": "^2.910.0"
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/functions/scrape-lambda/scrape.js:
--------------------------------------------------------------------------------
1 | const AWS = require('aws-sdk');
2 | const axios = require('axios');
3 | const cheerio = require('cheerio');
4 | const uuid = require('uuid');
5 | const qs = require('querystring');
6 | const {JSDOM} = require('jsdom');
7 | const {Voices} = require('./voices');
8 | const { makeSRTFile } = require('./lib/SRT');
9 | const {
10 | getRandomVoiceId,
11 | getUrlFromEvent,
12 | getDominantLanguage
13 | } = require('./lib/utils');
14 | const {
15 | VMAPBody,
16 | getKeywordsFromEntitiesString
17 | } = require('./lib/VMAP');
18 |
19 | const Comprehend = new AWS.Comprehend();
20 | const S3 = new AWS.S3();
21 | const Dynamo = new AWS.DynamoDB.DocumentClient();
22 |
23 | const OutputS3BucketName = process.env['OutputS3BucketName'];
24 | const Table = process.env['Table'];
25 | const FFMPEG_PREVIEW_DURATION =
26 | parseInt(process.env['FFMPEG_PREVIEW_DURATION'], 10) || 30;
27 | const ADS_URL = process.env['ADS_URL'] || 'https://ads.amazon.com';
28 |
29 | const downloadArticleP =
30 | url => axios.get(url)
31 | .then( ({data}) => data )
32 | .catch( _ => null );
33 |
34 | exports.handler = async event => {
35 |
36 | const AssetId = `${uuid.v4()}.json`;
37 |
38 | const SuccessfulOps = [];
39 | const FailedOps = [];
40 |
41 | /* Download and Scrape article */
42 | const url = getUrlFromEvent(event);
43 | const article = await downloadArticleP(url);
44 |
45 | const dom = new JSDOM(article);
46 |
47 | const { document } = dom.window;
48 |
49 | const images = Array.from(
50 | document.querySelectorAll('.entry-content > figure a')
51 | );
52 |
53 | const imagesURLs = images.map( ({href}) => href);
54 |
55 | const articleHeader = document.querySelector('.entry-title').innerHTML;
56 |
57 | const titles = Array.from(
58 | document.querySelectorAll('.entry-content > h2')
59 | );
60 |
61 | const titlesText = titles.map( ({innerHTML}) => innerHTML);
62 | titlesText.push("More info at aws.amazon.com");
63 | titlesText.unshift(articleHeader);
64 |
65 | const paragraphs = Array.from(
66 | document.querySelectorAll('.entry-content > p')
67 | );
68 |
69 | const paragraphsText = cheerio.load(
70 | paragraphs
71 | .map( ({innerHTML}) => innerHTML )
72 | .reduce((memo, next) => memo += ` ${next}`, "")
73 | )
74 | .text();
75 |
76 | if(!paragraphsText || paragraphsText === ""){
77 | return {
78 | statusCode: 400,
79 | body: "Could not find text for the selected article"
80 | }
81 | }
82 |
83 | /* Comprehend - Language and Entities */
84 | const ComprehendText =
85 | paragraphsText?.length > 4096 ?
86 | paragraphsText?.substring(4096)
87 | :
88 | paragraphsText;
89 |
90 | const LanguageParams = {
91 | Text: ComprehendText
92 | }
93 |
94 | let LanguageJob = null;
95 |
96 | try{
97 | LanguageJob =
98 | await Comprehend.detectDominantLanguage(LanguageParams).promise();
99 |
100 | SuccessfulOps.push(LanguageJob);
101 | }catch(LanguageJobError){
102 |
103 | console.error(`Error while detecting language \n${LanguageJobError}`);
104 | FailedOps.push({
105 | error: LanguageJobError,
106 | LanguageJob,
107 | LanguageParams
108 | });
109 | }
110 |
111 | const dominantLanguage = getDominantLanguage(LanguageJob);
112 |
113 | const EntitiesParams = {
114 | Text: ComprehendText,
115 | LanguageCode: dominantLanguage.LanguageCode
116 | };
117 |
118 | let EntitiesJob = null;
119 |
120 | try{
121 | EntitiesJob = await Comprehend.detectEntities(EntitiesParams).promise();
122 | SuccessfulOps.push(EntitiesJob);
123 | }catch(EntitiesJobError){
124 | console.error(`Error while detecting entities \n${EntitiesJobError}`);
125 | FailedOps.push({
126 | error: EntitiesJobError,
127 | EntitiesJob,
128 | EntitiesParams
129 | });
130 | }
131 |
132 | const SRTFile = makeSRTFile(titlesText, FFMPEG_PREVIEW_DURATION);
133 |
134 | const SRTUploadParams = {
135 | Bucket: OutputS3BucketName,
136 | Key: `srt/preview/${AssetId}.srt`,
137 | Body: SRTFile
138 | };
139 |
140 | let SRTUploadJob = null;
141 |
142 | try{
143 | SRTUploadJob = await S3.upload(SRTUploadParams).promise();
144 | SuccessfulOps.push(SRTUploadJob);
145 | }catch(SRTUploadError){
146 | console.error(`Error while writing to S3 \n${SRTUploadError}`);
147 | FailedOps.push({
148 | error: SRTUploadError,
149 | SRTUploadJob,
150 | SRTUploadParams
151 | });
152 | return {
153 | statusCode: 500,
154 | SRTFile,
155 | FailedOps
156 | }
157 | }
158 |
159 |
160 | const ADSUrlWithKeywords = `${ADS_URL}?${qs.stringify({
161 | keywords: getKeywordsFromEntitiesString(EntitiesJob.Entities)
162 | })}`;
163 | const VMAPFile = VMAPBody(ADSUrlWithKeywords);
164 |
165 | const VMAPUploadParams = {
166 | Bucket: OutputS3BucketName,
167 | Key: `vmap/${AssetId}`,
168 | Body: VMAPFile
169 | };
170 |
171 | let VMAPUploadJob = null;
172 |
173 | try{
174 | VMAPUploadJob = await S3.upload(VMAPUploadParams).promise();
175 | SuccessfulOps.push(VMAPUploadJob);
176 | }catch(VMAPUploadError){
177 | console.error(`Error while writing to S3 \n${VMAPUploadError}`);
178 | FailedOps.push({
179 | error: VMAPUploadError,
180 | VMAPUploadJob,
181 | VMAPUploadParams
182 | });
183 | return {
184 | statusCode: 500,
185 | VMAPFile,
186 | FailedOps
187 | }
188 | }
189 |
190 |
191 | const {VoiceId, Neural, FullLanguageCode} = getRandomVoiceId(
192 | dominantLanguage.LanguageCode, Voices
193 | );
194 |
195 | /* Prepare Output and Upload */
196 | const OutputDocument = {
197 | AssetId,
198 | Text: paragraphsText,
199 | LanguageCode: FullLanguageCode,
200 | VoiceId,
201 | Engine: Neural ? "neural" : "standard",
202 | Url: url,
203 | ImagesURLs: imagesURLs,
204 | TitlesText: titlesText,
205 | Entities: EntitiesJob.Entities,
206 | SRTFile,
207 | VMAPFile
208 | };
209 |
210 | const UploadParams = {
211 | Bucket: OutputS3BucketName,
212 | Key: `text/${AssetId}`,
213 | Body: JSON.stringify(OutputDocument, null, 2)
214 | };
215 |
216 | let s3UploadJob = null;
217 |
218 | try{
219 | s3UploadJob = await S3.upload(UploadParams).promise();
220 | SuccessfulOps.push(s3UploadJob);
221 | }catch(s3UploadError){
222 | console.error(`Error while writing to S3 \n${s3UploadError}`);
223 | FailedOps.push({
224 | error: s3UploadError,
225 | s3UploadJob,
226 | UploadParams
227 | });
228 | return {
229 | statusCode: 500,
230 | OutputDocument,
231 | FailedOps
232 | }
233 | }
234 |
235 | /* Store in Metadata Store */
236 |
237 | const DDBParams = {
238 | "TableName":Table,
239 | Key:{
240 | AssetId
241 | },
242 | ExpressionAttributeNames: {
243 | "#bucket": "Bucket",
244 | "#fullNarration": "FullNarration",
245 | "#voiceId": "VoiceId",
246 | "#articlePath": "ArticlePath",
247 | "#languageCode":"LanguageCode",
248 | "#engine":"Engine",
249 | "#url":"Url"
250 | },
251 | ExpressionAttributeValues:{
252 | ":bucket": OutputS3BucketName,
253 | ":fullNarration": "NOT_STARTED",
254 | ":voiceId": VoiceId,
255 | ":articlePath": `s3://${OutputS3BucketName}/text/${AssetId}`,
256 | ":languageCode": FullLanguageCode,
257 | ":engine": Neural ? "neural" : "standard",
258 | ":url":url
259 | },
260 | UpdateExpression: `SET #bucket = :bucket,
261 | #fullNarration = :fullNarration,
262 | #voiceId = :voiceId,
263 | #articlePath = :articlePath,
264 | #languageCode = :languageCode,
265 | #engine = :engine,
266 | #url = :url
267 | `
268 | };
269 |
270 | let ddbResponse;
271 | try{
272 | ddbResponse = await Dynamo.update(DDBParams).promise();
273 | }catch(DDBError){
274 | console.error(`Error while writing to DDB \n${DDBError}`);
275 | FailedOps.push({
276 | error: DDBError,
277 | ddbResponse,
278 | DDBParams
279 | });
280 | }
281 |
282 | return {
283 | statusCode: 200,
284 | body:JSON.stringify(OutputDocument)
285 | };
286 | };
287 |
288 |
289 | async function main(){
290 | const fakeEvent = {
291 | body: JSON.stringify({
292 | Url: 'https://giusedroid.wordpress.com/2021/04/29/a-brief-history-of-ferrari/amp/'
293 | })
294 | };
295 |
296 | return exports.handler(fakeEvent);
297 | }
298 |
299 | if(!module.parent){
300 | main().then(x => console.log(JSON.stringify(x, null, 2)));
301 | }
302 |
--------------------------------------------------------------------------------
/functions/scrape-lambda/voices.json:
--------------------------------------------------------------------------------
1 | {"Voices":{
2 | "arb": [
3 | {
4 | "Language": "Arabic",
5 | "FullLanguageCode": "arb",
6 | "VoiceId": "Zeina",
7 | "Gender": "Female",
8 | "Neural": false,
9 | "Standard": true,
10 | "Newscaster": false
11 | }
12 | ],
13 | "cmn": [
14 | {
15 | "Language": "Chinese Mandarin",
16 | "FullLanguageCode": "cmn-CN",
17 | "VoiceId": "Zhiyu",
18 | "Gender": "Female",
19 | "Neural": false,
20 | "Standard": true,
21 | "Newscaster": false
22 | }
23 | ],
24 | "da": [
25 | {
26 | "Language": "Danish",
27 | "FullLanguageCode": "da-DK",
28 | "VoiceId": "Naja",
29 | "Gender": "Female",
30 | "Neural": false,
31 | "Standard": true,
32 | "Newscaster": false
33 | },
34 | {
35 | "Language": "Danish",
36 | "FullLanguageCode": "da-DK",
37 | "VoiceId": "Mads",
38 | "Gender": "Male",
39 | "Neural": false,
40 | "Standard": true,
41 | "Newscaster": false
42 | }
43 | ],
44 | "nl": [
45 | {
46 | "Language": "Dutch",
47 | "FullLanguageCode": "nl-NL",
48 | "VoiceId": "Lotte",
49 | "Gender": "Female",
50 | "Neural": false,
51 | "Standard": true,
52 | "Newscaster": false
53 | },
54 | {
55 | "Language": "Dutch",
56 | "FullLanguageCode": "nl-NL",
57 | "VoiceId": "Ruben",
58 | "Gender": "Male",
59 | "Neural": false,
60 | "Standard": true,
61 | "Newscaster": false
62 | }
63 | ],
64 | "en": [
65 | {
66 | "Language": "English (Australian)",
67 | "FullLanguageCode": "en-AU",
68 | "VoiceId": "Nicole",
69 | "Gender": "Female",
70 | "Neural": false,
71 | "Standard": true,
72 | "Newscaster": false
73 | },
74 | {
75 | "Language": "English (Australian)",
76 | "FullLanguageCode": "en-AU",
77 | "VoiceId": "Olivia",
78 | "Gender": "Female",
79 | "Neural": true,
80 | "Standard": false,
81 | "Newscaster": false
82 | },
83 | {
84 | "Language": "English (Australian)",
85 | "FullLanguageCode": "en-AU",
86 | "VoiceId": "Russell",
87 | "Gender": "Male",
88 | "Neural": false,
89 | "Standard": true,
90 | "Newscaster": false
91 | },
92 | {
93 | "Language": "English (British)",
94 | "FullLanguageCode": "en-GB",
95 | "VoiceId": "Amy",
96 | "Gender": "Female",
97 | "Neural": true,
98 | "Standard": true,
99 | "Newscaster": true
100 | },
101 | {
102 | "Language": "English (British)",
103 | "FullLanguageCode": "en-GB",
104 | "VoiceId": "Emma",
105 | "Gender": "Female",
106 | "Neural": true,
107 | "Standard": true,
108 | "Newscaster": false
109 | },
110 | {
111 | "Language": "English (British)",
112 | "FullLanguageCode": "en-GB",
113 | "VoiceId": "Brian",
114 | "Gender": "Male",
115 | "Neural": true,
116 | "Standard": true,
117 | "Newscaster": false
118 | },
119 | {
120 | "Language": "English (Indian)",
121 | "FullLanguageCode": "en-IN",
122 | "VoiceId": "Aditi",
123 | "Gender": "Female",
124 | "Neural": false,
125 | "Standard": true,
126 | "Newscaster": false
127 | },
128 | {
129 | "Language": "English (Indian)",
130 | "FullLanguageCode": "en-IN",
131 | "VoiceId": "Raveena",
132 | "Gender": "Female",
133 | "Neural": false,
134 | "Standard": true,
135 | "Newscaster": false
136 | },
137 | {
138 | "Language": "English (US)",
139 | "FullLanguageCode": "en-US",
140 | "VoiceId": "Ivy",
141 | "Gender": "Female (child)",
142 | "Neural": true,
143 | "Standard": true,
144 | "Newscaster": false
145 | },
146 | {
147 | "Language": "English (US)",
148 | "FullLanguageCode": "en-US",
149 | "VoiceId": "Joanna",
150 | "Gender": "Female",
151 | "Neural": true,
152 | "Standard": true,
153 | "Newscaster": true
154 | },
155 | {
156 | "Language": "English (US)",
157 | "FullLanguageCode": "en-US",
158 | "VoiceId": "Kendra",
159 | "Gender": "Female",
160 | "Neural": true,
161 | "Standard": true,
162 | "Newscaster": false
163 | },
164 | {
165 | "Language": "English (US)",
166 | "FullLanguageCode": "en-US",
167 | "VoiceId": "Kimberly",
168 | "Gender": "Female",
169 | "Neural": true,
170 | "Standard": true,
171 | "Newscaster": false
172 | },
173 | {
174 | "Language": "English (US)",
175 | "FullLanguageCode": "en-US",
176 | "VoiceId": "Salli",
177 | "Gender": "Female",
178 | "Neural": true,
179 | "Standard": true,
180 | "Newscaster": false
181 | },
182 | {
183 | "Language": "English (US)",
184 | "FullLanguageCode": "en-US",
185 | "VoiceId": "Joey",
186 | "Gender": "Male",
187 | "Neural": true,
188 | "Standard": true,
189 | "Newscaster": false
190 | },
191 | {
192 | "Language": "English (US)",
193 | "FullLanguageCode": "en-US",
194 | "VoiceId": "Justin",
195 | "Gender": "Male (child)",
196 | "Neural": true,
197 | "Standard": true,
198 | "Newscaster": false
199 | },
200 | {
201 | "Language": "English (US)",
202 | "FullLanguageCode": "en-US",
203 | "VoiceId": "Kevin",
204 | "Gender": "Male (child)",
205 | "Neural": true,
206 | "Standard": false,
207 | "Newscaster": false
208 | },
209 | {
210 | "Language": "English (US)",
211 | "FullLanguageCode": "en-US",
212 | "VoiceId": "Matthew",
213 | "Gender": "Male",
214 | "Neural": true,
215 | "Standard": true,
216 | "Newscaster": true
217 | },
218 | {
219 | "Language": "English (Welsh)",
220 | "FullLanguageCode": "en-GB-WLS",
221 | "VoiceId": "Geraint",
222 | "Gender": "Male",
223 | "Neural": false,
224 | "Standard": true,
225 | "Newscaster": false
226 | }
227 | ],
228 | "cy": [
229 | {
230 | "Language": "English (Welsh)",
231 | "FullLanguageCode": "cy-GB",
232 | "VoiceId": "Gwyneth",
233 | "Gender": "Female",
234 | "Neural": false,
235 | "Standard": true,
236 | "Newscaster": false
237 | }
238 | ],
239 | "fr": [
240 | {
241 | "Language": "French",
242 | "FullLanguageCode": "fr-FR",
243 | "VoiceId": "Celine",
244 | "Gender": "Female",
245 | "Neural": false,
246 | "Standard": true,
247 | "Newscaster": false
248 | },
249 | {
250 | "Language": "French",
251 | "FullLanguageCode": "fr-FR",
252 | "VoiceId": "Léa",
253 | "Gender": "Female",
254 | "Neural": false,
255 | "Standard": true,
256 | "Newscaster": false
257 | },
258 | {
259 | "Language": "French",
260 | "FullLanguageCode": "fr-FR",
261 | "VoiceId": "Mathieu",
262 | "Gender": "Male",
263 | "Neural": false,
264 | "Standard": true,
265 | "Newscaster": false
266 | },
267 | {
268 | "Language": "French(Canadian)",
269 | "FullLanguageCode": "fr-CA",
270 | "VoiceId": "Chantal",
271 | "Gender": "Female",
272 | "Neural": false,
273 | "Standard": true,
274 | "Newscaster": false
275 | }
276 | ],
277 | "de": [
278 | {
279 | "Language": "German",
280 | "FullLanguageCode": "de-DE",
281 | "VoiceId": "Marlene",
282 | "Gender": "Female",
283 | "Neural": false,
284 | "Standard": true,
285 | "Newscaster": false
286 | },
287 | {
288 | "Language": "German",
289 | "FullLanguageCode": "de-DE",
290 | "VoiceId": "Vicki",
291 | "Gender": "Female",
292 | "Neural": false,
293 | "Standard": true,
294 | "Newscaster": false
295 | },
296 | {
297 | "Language": "German",
298 | "FullLanguageCode": "de-DE",
299 | "VoiceId": "Hans",
300 | "Gender": "Male",
301 | "Neural": false,
302 | "Standard": true,
303 | "Newscaster": false
304 | }
305 | ],
306 | "hi": [
307 | {
308 | "Language": "Hindi",
309 | "FullLanguageCode": "hi-IN",
310 | "VoiceId": "Aditi",
311 | "Gender": "Female",
312 | "Neural": false,
313 | "Standard": true,
314 | "Newscaster": false
315 | }
316 | ],
317 | "is": [
318 | {
319 | "Language": "Icelandic",
320 | "FullLanguageCode": "is-IS",
321 | "VoiceId": "Dora",
322 | "Gender": "Female",
323 | "Neural": false,
324 | "Standard": true,
325 | "Newscaster": false
326 | },
327 | {
328 | "Language": "Icelandic",
329 | "FullLanguageCode": "is-IS",
330 | "VoiceId": "Karl",
331 | "Gender": "Male",
332 | "Neural": false,
333 | "Standard": true,
334 | "Newscaster": false
335 | }
336 | ],
337 | "it": [
338 | {
339 | "Language": "Italian",
340 | "FullLanguageCode": "it-IT",
341 | "VoiceId": "Carla",
342 | "Gender": "Female",
343 | "Neural": false,
344 | "Standard": true,
345 | "Newscaster": false
346 | },
347 | {
348 | "Language": "Italian",
349 | "FullLanguageCode": "it-IT",
350 | "VoiceId": "Bianca",
351 | "Gender": "Female",
352 | "Neural": false,
353 | "Standard": true,
354 | "Newscaster": false
355 | },
356 | {
357 | "Language": "Italian",
358 | "FullLanguageCode": "it-IT",
359 | "VoiceId": "Giorgio",
360 | "Gender": "Male",
361 | "Neural": false,
362 | "Standard": true,
363 | "Newscaster": false
364 | }
365 | ],
366 | "ja": [
367 | {
368 | "Language": "Japanese",
369 | "FullLanguageCode": "ja-JP",
370 | "VoiceId": "Mizuki",
371 | "Gender": "Female",
372 | "Neural": false,
373 | "Standard": true,
374 | "Newscaster": false
375 | },
376 | {
377 | "Language": "Japanese",
378 | "FullLanguageCode": "ja-JP",
379 | "VoiceId": "Takumi",
380 | "Gender": "Male",
381 | "Neural": false,
382 | "Standard": true,
383 | "Newscaster": false
384 | }
385 | ],
386 | "ko": [
387 | {
388 | "Language": "Korean",
389 | "FullLanguageCode": "ko-KR",
390 | "VoiceId": "Seoyeon",
391 | "Gender": "Female",
392 | "Neural": true,
393 | "Standard": true,
394 | "Newscaster": false
395 | }
396 | ],
397 | "nb": [
398 | {
399 | "Language": "Norwegian",
400 | "FullLanguageCode": "nb-NO",
401 | "VoiceId": "Liv",
402 | "Gender": "Female",
403 | "Neural": false,
404 | "Standard": true,
405 | "Newscaster": false
406 | }
407 | ],
408 | "pl": [
409 | {
410 | "Language": "Polish",
411 | "FullLanguageCode": "pl-PL",
412 | "VoiceId": "Ewa",
413 | "Gender": "Female",
414 | "Neural": false,
415 | "Standard": true,
416 | "Newscaster": false
417 | },
418 | {
419 | "Language": "Polish",
420 | "FullLanguageCode": "pl-PL",
421 | "VoiceId": "Maja",
422 | "Gender": "Female",
423 | "Neural": false,
424 | "Standard": true,
425 | "Newscaster": false
426 | },
427 | {
428 | "Language": "Polish",
429 | "FullLanguageCode": "pl-PL",
430 | "VoiceId": "Jacek",
431 | "Gender": "Male",
432 | "Neural": false,
433 | "Standard": true,
434 | "Newscaster": false
435 | },
436 | {
437 | "Language": "Polish",
438 | "FullLanguageCode": "pl-PL",
439 | "VoiceId": "Jan",
440 | "Gender": "Male",
441 | "Neural": false,
442 | "Standard": true,
443 | "Newscaster": false
444 | }
445 | ],
446 | "pt": [
447 | {
448 | "Language": "Portuguese (Brazilian)",
449 | "FullLanguageCode": "pt-BR",
450 | "VoiceId": "Camila",
451 | "Gender": "Female",
452 | "Neural": true,
453 | "Standard": true,
454 | "Newscaster": false
455 | },
456 | {
457 | "Language": "Portuguese (Brazilian)",
458 | "FullLanguageCode": "pt-BR",
459 | "VoiceId": "Vitoria",
460 | "Gender": "Female",
461 | "Neural": false,
462 | "Standard": true,
463 | "Newscaster": false
464 | },
465 | {
466 | "Language": "Portuguese (Brazilian)",
467 | "FullLanguageCode": "pt-BR",
468 | "VoiceId": "Ricardo",
469 | "Gender": "Male",
470 | "Neural": false,
471 | "Standard": true,
472 | "Newscaster": false
473 | },
474 | {
475 | "Language": "Portuguese (European)",
476 | "FullLanguageCode": "pt-PT",
477 | "VoiceId": "Ines",
478 | "Gender": "Female",
479 | "Neural": false,
480 | "Standard": true,
481 | "Newscaster": false
482 | },
483 | {
484 | "Language": "Portuguese (European)",
485 | "FullLanguageCode": "pt-PT",
486 | "VoiceId": "Cristiano",
487 | "Gender": "Male",
488 | "Neural": false,
489 | "Standard": true,
490 | "Newscaster": false
491 | }
492 | ],
493 | "ro": [
494 | {
495 | "Language": "Romanian",
496 | "FullLanguageCode": "ro-RO",
497 | "VoiceId": "Carmen",
498 | "Gender": "Female",
499 | "Neural": false,
500 | "Standard": true,
501 | "Newscaster": false
502 | }
503 | ],
504 | "ru": [
505 | {
506 | "Language": "Russian",
507 | "FullLanguageCode": "ru-RU",
508 | "VoiceId": "Tatyana",
509 | "Gender": "Female",
510 | "Neural": false,
511 | "Standard": true,
512 | "Newscaster": false
513 | },
514 | {
515 | "Language": "Russian",
516 | "FullLanguageCode": "ru-RU",
517 | "VoiceId": "Maxim",
518 | "Gender": "Male",
519 | "Neural": false,
520 | "Standard": true,
521 | "Newscaster": false
522 | }
523 | ],
524 | "es": [
525 | {
526 | "Language": "Spanish (European)",
527 | "FullLanguageCode": "es-ES",
528 | "VoiceId": "Conchita",
529 | "Gender": "Female",
530 | "Neural": false,
531 | "Standard": true,
532 | "Newscaster": false
533 | },
534 | {
535 | "Language": "Spanish (European)",
536 | "FullLanguageCode": "es-ES",
537 | "VoiceId": "Lucia",
538 | "Gender": "Female",
539 | "Neural": false,
540 | "Standard": true,
541 | "Newscaster": false
542 | },
543 | {
544 | "Language": "Spanish (European)",
545 | "FullLanguageCode": "es-ES",
546 | "VoiceId": "Enrique",
547 | "Gender": "Male",
548 | "Neural": false,
549 | "Standard": true,
550 | "Newscaster": false
551 | },
552 | {
553 | "Language": "Spanish (Mexican)",
554 | "FullLanguageCode": "es-MX",
555 | "VoiceId": "Mia",
556 | "Gender": "Female",
557 | "Neural": false,
558 | "Standard": true,
559 | "Newscaster": false
560 | },
561 | {
562 | "Language": "US Spanish",
563 | "FullLanguageCode": "es-US",
564 | "VoiceId": "Lupe",
565 | "Gender": "Female",
566 | "Neural": true,
567 | "Standard": true,
568 | "Newscaster": true
569 | },
570 | {
571 | "Language": "US Spanish",
572 | "FullLanguageCode": "es-US",
573 | "VoiceId": "Penelope",
574 | "Gender": "Female",
575 | "Neural": false,
576 | "Standard": true,
577 | "Newscaster": false
578 | },
579 | {
580 | "Language": "US Spanish",
581 | "FullLanguageCode": "es-US",
582 | "VoiceId": "Miguel",
583 | "Gender": "Male",
584 | "Neural": false,
585 | "Standard": true,
586 | "Newscaster": false
587 | }
588 | ],
589 | "sv": [
590 | {
591 | "Language": "Swedish",
592 | "FullLanguageCode": "sv-SE",
593 | "VoiceId": "Astrid",
594 | "Gender": "Female",
595 | "Neural": false,
596 | "Standard": true,
597 | "Newscaster": false
598 | }
599 | ],
600 | "tr": [
601 | {
602 | "Language": "Turkish",
603 | "FullLanguageCode": "tr-TR",
604 | "VoiceId": "Filiz",
605 | "Gender": "Female",
606 | "Neural": false,
607 | "Standard": true,
608 | "Newscaster": false
609 | }
610 | ]
611 | }}
--------------------------------------------------------------------------------
/functions/video-lambda/full_hls.json:
--------------------------------------------------------------------------------
1 | {
2 |
3 | "OutputGroups": [
4 | {
5 | "Name": "File Group",
6 | "Outputs": [
7 | {
8 | "ContainerSettings": {
9 | "Container": "M3U8",
10 | "M3u8Settings": {}
11 | },
12 | "VideoDescription": {
13 | "CodecSettings": {
14 | "Codec": "H_264",
15 | "H264Settings": {
16 | "Bitrate": 2000000
17 | }
18 | }
19 | },
20 | "AudioDescriptions": [
21 | {
22 | "CodecSettings": {
23 | "Codec": "AAC",
24 | "AacSettings": {
25 | "Bitrate": 96000,
26 | "CodingMode": "CODING_MODE_2_0",
27 | "SampleRate": 48000
28 | }
29 | }
30 | }
31 | ],
32 | "OutputSettings": {
33 | "HlsSettings": {}
34 |
35 | },
36 | "NameModifier": "narrative"
37 |
38 | }
39 | ],
40 | "OutputGroupSettings": {
41 | "Type": "HLS_GROUP_SETTINGS",
42 | "HlsGroupSettings": {
43 | "Destination": "",
44 | "SegmentLength": 10,
45 | "MinSegmentLength": 0
46 | }
47 | }
48 | }
49 | ],
50 | "AdAvailOffset": 0,
51 | "Inputs": [
52 | {
53 | "InputClippings": [
54 | {
55 | "EndTimecode": "00:00:31:00",
56 | "StartTimecode": "00:00:00:00"
57 | }
58 | ],
59 | "AudioSelectors": {
60 | "Audio Selector 1": {
61 | "DefaultSelection": "DEFAULT",
62 | "ExternalAudioFileInput": "s3://test/client.e56032ba-6893-4be8-8694-a703c5ab1c39(1).mp3"
63 | }
64 | },
65 | "VideoSelector": {},
66 | "TimecodeSource": "ZEROBASED",
67 | "CaptionSelectors": {
68 | "Captions Selector 1": {
69 | "SourceSettings": {
70 | "SourceType": "SRT",
71 | "FileSourceSettings": {
72 | "SourceFile": "s3://test/experiment.srt"
73 | }
74 | }
75 | }
76 | },
77 | "ImageInserter": {
78 | "InsertableImages": [
79 | {
80 | "Width": 1200,
81 | "Height": 900,
82 | "ImageX": 400,
83 | "ImageY": 10,
84 | "Duration": 5000,
85 | "Layer": 0,
86 | "ImageInserterInput": "s3://test/Image1.png",
87 | "StartTime": "00:00:05:00",
88 | "Opacity": 100
89 | },
90 | {
91 | "Width": 1200,
92 | "Height": 900,
93 | "ImageX": 400,
94 | "ImageY": 10,
95 | "Duration": 5000,
96 | "Layer": 1,
97 | "ImageInserterInput": "s3://test/Image2.png",
98 | "StartTime": "00:00:10:00",
99 | "Opacity": 100
100 | },
101 | {
102 | "Width": 1200,
103 | "Height": 900,
104 | "ImageX": 400,
105 | "ImageY": 10,
106 | "Duration": 5000,
107 | "Layer": 2,
108 | "ImageInserterInput": "s3://test/Image3.png",
109 | "StartTime": "00:00:15:00",
110 | "Opacity": 100
111 | },
112 | {
113 | "Width": 1200,
114 | "Height": 900,
115 | "ImageX": 400,
116 | "ImageY": 10,
117 | "Duration": 5000,
118 | "Layer": 3,
119 | "ImageInserterInput": "s3://test/Image4.png",
120 | "StartTime": "00:00:20:10",
121 | "Opacity": 100
122 | }
123 | ]
124 | },
125 | "FileInput": "s3://test/10min_static.mov"
126 | }
127 | ]
128 | }
--------------------------------------------------------------------------------
/functions/video-lambda/preview_mp4.json:
--------------------------------------------------------------------------------
1 | {
2 |
3 | "OutputGroups": [
4 | {
5 | "Name": "File Group",
6 | "Outputs": [
7 | {
8 | "ContainerSettings": {
9 | "Container": "MP4",
10 | "Mp4Settings": {}
11 | },
12 | "VideoDescription": {
13 | "CodecSettings": {
14 | "Codec": "H_264",
15 | "H264Settings": {
16 | "Bitrate": 2000000
17 | }
18 | }
19 | },
20 | "AudioDescriptions": [
21 | {
22 | "CodecSettings": {
23 | "Codec": "AAC",
24 | "AacSettings": {
25 | "Bitrate": 96000,
26 | "CodingMode": "CODING_MODE_2_0",
27 | "SampleRate": 48000
28 | }
29 | }
30 | }
31 | ],
32 | "CaptionDescriptions": [
33 | {
34 | "CaptionSelectorName": "Captions Selector 1",
35 | "DestinationSettings": {
36 | "DestinationType": "BURN_IN",
37 | "BurninDestinationSettings": {
38 | "Alignment": "CENTERED",
39 | "OutlineSize": 0,
40 | "FontOpacity": 255,
41 | "OutlineColor": "BLACK"
42 | }
43 | }
44 | }
45 | ]
46 | }
47 | ],
48 | "OutputGroupSettings": {
49 | "Type": "FILE_GROUP_SETTINGS",
50 | "FileGroupSettings": {
51 | "Destination": ""
52 | }
53 | }
54 | }
55 | ],
56 | "AdAvailOffset": 0,
57 | "Inputs": [
58 | {
59 | "InputClippings": [
60 | {
61 | "EndTimecode": "00:00:31:00",
62 | "StartTimecode": "00:00:00:00"
63 | }
64 | ],
65 | "AudioSelectors": {
66 | "Audio Selector 1": {
67 | "DefaultSelection": "DEFAULT",
68 | "ExternalAudioFileInput": "s3://test/client.e56032ba-6893-4be8-8694-a703c5ab1c39(1).mp3"
69 | }
70 | },
71 | "VideoSelector": {},
72 | "TimecodeSource": "ZEROBASED",
73 | "CaptionSelectors": {
74 | "Captions Selector 1": {
75 | "SourceSettings": {
76 | "SourceType": "SRT",
77 | "FileSourceSettings": {
78 | "SourceFile": "s3://test/experiment.srt"
79 | }
80 | }
81 | }
82 | },
83 | "ImageInserter": {
84 | "InsertableImages": [
85 | {
86 | "Width": 1200,
87 | "Height": 900,
88 | "ImageX": 400,
89 | "ImageY": 10,
90 | "Duration": 5000,
91 | "Layer": 0,
92 | "ImageInserterInput": "s3://test/Image1.png",
93 | "StartTime": "00:00:05:00",
94 | "Opacity": 100
95 | },
96 | {
97 | "Width": 1200,
98 | "Height": 900,
99 | "ImageX": 400,
100 | "ImageY": 10,
101 | "Duration": 5000,
102 | "Layer": 1,
103 | "ImageInserterInput": "s3://test/Image2.png",
104 | "StartTime": "00:00:10:00",
105 | "Opacity": 100
106 | },
107 | {
108 | "Width": 1200,
109 | "Height": 900,
110 | "ImageX": 400,
111 | "ImageY": 10,
112 | "Duration": 5000,
113 | "Layer": 2,
114 | "ImageInserterInput": "s3://test/Image3.png",
115 | "StartTime": "00:00:15:00",
116 | "Opacity": 100
117 | },
118 | {
119 | "Width": 1200,
120 | "Height": 900,
121 | "ImageX": 400,
122 | "ImageY": 10,
123 | "Duration": 5000,
124 | "Layer": 3,
125 | "ImageInserterInput": "s3://test/Image4.png",
126 | "StartTime": "00:00:20:10",
127 | "Opacity": 100
128 | }
129 | ]
130 | },
131 | "FileInput": "s3://test/10min_static.mov"
132 | }
133 | ]
134 | }
--------------------------------------------------------------------------------
/functions/video-lambda/video.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import json
3 | import os
4 | import uuid
5 | import boto3
6 | import datetime
7 | import random
8 | from urllib.parse import urlparse
9 | import logging
10 | from datetime import timedelta
11 |
12 | from botocore.client import ClientError
13 |
14 | logger = logging.getLogger()
15 | logger.setLevel(logging.INFO)
16 |
17 | s3 = boto3.resource('s3')
18 |
19 | def humanize_time(secs):
20 | mins, secs = divmod(secs, 60)
21 | hours, mins = divmod(mins, 60)
22 | return '%02d:%02d:%02d' % (hours, mins, secs)
23 |
24 | def is_successful_ops(job_output):
25 | return job_output['statusCode'] == 200
26 |
27 | def is_failed_ops(job_output):
28 | return job_output['statusCode'] != 200
29 |
30 | def handler(event, context):
31 |
32 | job_outputs = [
33 | create_media_convert_jobs(record) for record in event['Records']
34 | ]
35 |
36 | successful_ops = [
37 | is_successful_ops(job_output) for job_output in job_outputs
38 | ]
39 | failed_ops =[
40 | is_failed_ops(job_output) for job_output in job_outputs
41 | ]
42 |
43 | return {
44 | 'statusCode': 200,
45 | 'body': json.dumps(
46 | {
47 | "JobOutputs":job_outputs,
48 | "FailedOps": failed_ops,
49 | "SuccessfulOps": successful_ops
50 |
51 | },
52 | indent=4, sort_keys=True, default=str
53 | )
54 | }
55 |
56 |
57 | def create_media_convert_jobs(record):
58 |
59 | print(record)
60 |
61 | article_name = record['s3']['object']['key']
62 | article = urlparse(article_name)
63 | article = os.path.basename(article.path)
64 | filename = os.path.splitext(article)[0]
65 |
66 | logger.info("ARTICLE NAME")
67 | logger.info(article)
68 |
69 | # kept like this for readability
70 | assetID = article
71 | assetIDfull = article
72 |
73 | sourceS3Bucket = record['s3']['bucket']['name']
74 | templateS3URL = os.environ.get('TEMPLATE_S3_URL', 's3://gbatt-blogs/narratives/template.mov')
75 | templateS3URL_preview = os.environ.get('TEMPLATE_S3_URL_PREVIEW', 's3://gbatt-blogs/narratives/Template_video_right.mov')
76 |
77 | # reading the article json
78 | content_object = s3.Object(sourceS3Bucket, article_name)
79 | file_content = content_object.get()['Body'].read().decode('utf-8')
80 | json_content = json.loads(file_content)
81 |
82 | audiopreview = (json_content['Metadata']['AudioPreview'])
83 | photopreview1 = (json_content['Metadata']['PostProducedImagesS3Paths'][0])
84 | photopreview2 = (json_content['Metadata']['PostProducedImagesS3Paths'][1])
85 | photopreview3 = (json_content['Metadata']['PostProducedImagesS3Paths'][2])
86 | photopreview4 = (json_content['Metadata']['PostProducedImagesS3Paths'][3])
87 | audiofull = (json_content['Metadata']['FullNarration'])
88 | narrationlenght = (json_content['Metadata']['FullNarrationDurationInSeconds'])
89 |
90 | f = int(float(narrationlenght))
91 | partial=f//4
92 |
93 | framerate1=partial
94 | framerate2=framerate1+partial
95 | framerate3=framerate2+partial
96 | framerate4=framerate3+partial
97 |
98 | imagetimefull1=humanize_time(framerate1)+":00"
99 | imagetimefull2=humanize_time(framerate2)+":00"
100 | imagetimefull3=humanize_time(framerate3)+":00"
101 | imagetimefull4=humanize_time(framerate4)+":00"
102 | fullvideolenght = humanize_time(f+1)+":00"
103 |
104 | destinationS3 = f"s3://{os.environ['DestinationBucket']}"
105 | mediaConvertRole = os.environ['MediaConvertRole']
106 | application = os.environ['Application']
107 | region = os.environ['AWS_DEFAULT_REGION']
108 | statusCode = 200
109 |
110 | jobs = []
111 | jobsfull = []
112 |
113 | job = {}
114 | jobfull = {}
115 |
116 | # Use MediaConvert SDK UserMetadata to tag jobs with the assetID
117 | # Events from MediaConvert will have the assetID in UserMedata
118 | jobMetadata = {}
119 | jobMetadata['assetID'] = assetID
120 | jobMetadata['application'] = application
121 | jobMetadata['input'] = templateS3URL_preview
122 |
123 | jobMetadatafull = {}
124 | jobMetadatafull['assetID'] = assetIDfull
125 | jobMetadatafull['application'] = application
126 | jobMetadatafull['input'] = templateS3URL
127 |
128 | try:
129 |
130 | # Build a list of jobs to run against the input by using the default job in this folder.
131 | jobInput = {}
132 | jobInputfull = {}
133 |
134 | bucket = s3.Bucket(sourceS3Bucket)
135 |
136 | # PREVIEW
137 | with open('preview_mp4.json') as json_data:
138 | jobInput['filename'] = 'Default'
139 | logger.info('jobInput: %s', jobInput['filename'])
140 | jobInput['settings'] = json.load(json_data)
141 | logger.info(json.dumps(jobInput['settings']))
142 |
143 | jobs.append(jobInput)
144 |
145 | # FULL VIDEO
146 | with open('full_hls.json') as json_datafull:
147 | jobInputfull['filename'] = 'Default'
148 | logger.info('jobInputfull: %s', jobInputfull['filename'])
149 | jobInputfull['settings'] = json.load(json_datafull)
150 | logger.info(json.dumps(jobInputfull['settings']))
151 |
152 | jobsfull.append(jobInputfull)
153 |
154 | endpoints = boto3.client('mediaconvert', region_name=region).describe_endpoints()
155 |
156 | client = boto3.client('mediaconvert', region_name=region, endpoint_url=endpoints['Endpoints'][0]['Url'], verify=False)
157 |
158 | for j in jobs:
159 | jobSettings = j['settings']
160 | jobFilename = j['filename']
161 | # Save the name of the settings file in the job userMetadata
162 | jobMetadata['settings'] = jobFilename
163 |
164 | # Update the job settings with the source video from the S3 event
165 | jobSettings['Inputs'][0]['FileInput'] = templateS3URL_preview
166 | jobSettings['Inputs'][0]['ImageInserter']['InsertableImages'][0]['ImageInserterInput'] = photopreview1
167 | jobSettings['Inputs'][0]['ImageInserter']['InsertableImages'][1]['ImageInserterInput'] = photopreview2
168 | jobSettings['Inputs'][0]['ImageInserter']['InsertableImages'][2]['ImageInserterInput'] = photopreview3
169 | jobSettings['Inputs'][0]['ImageInserter']['InsertableImages'][3]['ImageInserterInput'] = photopreview4
170 | jobSettings['Inputs'][0]['AudioSelectors']['Audio Selector 1']['ExternalAudioFileInput'] = audiopreview
171 |
172 | jobSettings['Inputs'][0]['ImageInserter']['InsertableImages'][0]['Width'] = 1100
173 | jobSettings['Inputs'][0]['ImageInserter']['InsertableImages'][1]['Width'] = 1100
174 | jobSettings['Inputs'][0]['ImageInserter']['InsertableImages'][2]['Width'] = 1100
175 | jobSettings['Inputs'][0]['ImageInserter']['InsertableImages'][3]['Width'] = 1100
176 | jobSettings['Inputs'][0]['ImageInserter']['InsertableImages'][0]['Height'] = 800
177 | jobSettings['Inputs'][0]['ImageInserter']['InsertableImages'][1]['Height'] = 800
178 | jobSettings['Inputs'][0]['ImageInserter']['InsertableImages'][2]['Height'] = 800
179 | jobSettings['Inputs'][0]['ImageInserter']['InsertableImages'][3]['Height'] = 800
180 |
181 | jobSettings['Inputs'][0]['ImageInserter']['InsertableImages'][0]['ImageX'] = 10
182 | jobSettings['Inputs'][0]['ImageInserter']['InsertableImages'][1]['ImageX'] = 10
183 | jobSettings['Inputs'][0]['ImageInserter']['InsertableImages'][2]['ImageX'] = 10
184 | jobSettings['Inputs'][0]['ImageInserter']['InsertableImages'][3]['ImageX'] = 10
185 |
186 | jobSettings['Inputs'][0]['ImageInserter']['InsertableImages'][0]['ImageY'] = 10
187 | jobSettings['Inputs'][0]['ImageInserter']['InsertableImages'][1]['ImageY'] = 10
188 | jobSettings['Inputs'][0]['ImageInserter']['InsertableImages'][2]['ImageY'] = 10
189 | jobSettings['Inputs'][0]['ImageInserter']['InsertableImages'][3]['ImageY'] = 10
190 |
191 | jobSettings['Inputs'][0]['CaptionSelectors']['Captions Selector 1']['SourceSettings']['FileSourceSettings']['SourceFile'] = f"s3://{sourceS3Bucket}/srt/preview/{article}.srt"
192 | jobSettings['OutputGroups'][0]['Outputs'][0]['CaptionDescriptions'][0]['DestinationSettings']['BurninDestinationSettings']['FontColor'] = "BLACK"
193 | jobSettings['OutputGroups'][0]['Outputs'][0]['CaptionDescriptions'][0]['DestinationSettings']['BurninDestinationSettings']['YPosition'] = 900
194 |
195 | logger.info('SRT FILE PAHT')
196 | logger.info(jobSettings['Inputs'][0]['CaptionSelectors']['Captions Selector 1']['SourceSettings']['FileSourceSettings']['SourceFile'])
197 | # Update the job settings with the destination paths for converted videos. We want to replace the
198 | # destination bucket of the output paths in the job settings, but keep the rest of the
199 | # path
200 | destinationS3 = f"s3://{os.environ['DestinationBucket']}/output/preview/{filename}"
201 |
202 | for outputGroup in jobSettings['OutputGroups']:
203 |
204 | logger.info("outputGroup['OutputGroupSettings']['Type'] == %s", outputGroup['OutputGroupSettings']['Type'])
205 |
206 | if outputGroup['OutputGroupSettings']['Type'] == 'FILE_GROUP_SETTINGS':
207 | templateDestination = outputGroup['OutputGroupSettings']['FileGroupSettings']['Destination']
208 | templateDestinationKey = urlparse(templateDestination).path
209 | logger.info("templateDestinationKey == %s", templateDestinationKey)
210 | outputGroup['OutputGroupSettings']['FileGroupSettings']['Destination'] = destinationS3
211 | else:
212 | logger.error("Exception: Unknown Output Group Type %s", outputGroup['OutputGroupSettings']['Type'])
213 | statusCode = 500
214 |
215 | logger.info(json.dumps(jobSettings))
216 |
217 | # Convert the video using AWS Elemental MediaConvert
218 | job = client.create_job(Role=mediaConvertRole, UserMetadata=jobMetadata, Settings=jobSettings)
219 |
220 | #full video
221 | for j in jobsfull:
222 | jobSettingsfull = j['settings']
223 | jobFilenamefull = j['filename']
224 |
225 | # Save the name of the settings file in the job userMetadata
226 | jobMetadatafull['settings'] = jobFilenamefull
227 |
228 | # Update the job settings with the source video from the S3 event
229 | jobSettingsfull['Inputs'][0]['FileInput'] = templateS3URL
230 | jobSettingsfull['Inputs'][0]['ImageInserter']['InsertableImages'][0]['ImageInserterInput'] = photopreview1
231 | jobSettingsfull['Inputs'][0]['ImageInserter']['InsertableImages'][1]['ImageInserterInput'] = photopreview2
232 | jobSettingsfull['Inputs'][0]['ImageInserter']['InsertableImages'][2]['ImageInserterInput'] = photopreview3
233 | jobSettingsfull['Inputs'][0]['ImageInserter']['InsertableImages'][3]['ImageInserterInput'] = photopreview4
234 | jobSettingsfull['Inputs'][0]['AudioSelectors']['Audio Selector 1']['ExternalAudioFileInput'] = audiofull
235 | jobSettingsfull['Inputs'][0]['InputClippings'][0]['EndTimecode'] = fullvideolenght
236 | jobSettingsfull['Inputs'][0]['ImageInserter']['InsertableImages'][0]['StartTime'] = "00:00:00:00"
237 | jobSettingsfull['Inputs'][0]['ImageInserter']['InsertableImages'][1]['StartTime'] = imagetimefull1
238 | jobSettingsfull['Inputs'][0]['ImageInserter']['InsertableImages'][2]['StartTime'] = imagetimefull2
239 | jobSettingsfull['Inputs'][0]['ImageInserter']['InsertableImages'][3]['StartTime'] = imagetimefull3
240 | jobSettingsfull['Inputs'][0]['ImageInserter']['InsertableImages'][0]['Duration'] = partial*1000
241 | jobSettingsfull['Inputs'][0]['ImageInserter']['InsertableImages'][1]['Duration'] = partial*1000
242 | jobSettingsfull['Inputs'][0]['ImageInserter']['InsertableImages'][2]['Duration'] = partial*1000
243 | jobSettingsfull['Inputs'][0]['ImageInserter']['InsertableImages'][3]['Duration'] = partial*1000
244 | jobSettingsfull['OutputGroups'][0]['Outputs'][0]['NameModifier'] = filename
245 | jobSettingsfull['Inputs'][0]['ImageInserter']['InsertableImages'][0]['Width'] = 1100
246 | jobSettingsfull['Inputs'][0]['ImageInserter']['InsertableImages'][1]['Width'] = 1100
247 | jobSettingsfull['Inputs'][0]['ImageInserter']['InsertableImages'][2]['Width'] = 1100
248 | jobSettingsfull['Inputs'][0]['ImageInserter']['InsertableImages'][3]['Width'] = 1100
249 | jobSettingsfull['Inputs'][0]['ImageInserter']['InsertableImages'][0]['Height'] = 800
250 | jobSettingsfull['Inputs'][0]['ImageInserter']['InsertableImages'][1]['Height'] = 800
251 | jobSettingsfull['Inputs'][0]['ImageInserter']['InsertableImages'][2]['Height'] = 800
252 | jobSettingsfull['Inputs'][0]['ImageInserter']['InsertableImages'][3]['Height'] = 800
253 |
254 |
255 | # Update the job settings with the destination paths for converted videos. We want to replace the
256 | # destination bucket of the output paths in the job settings, but keep the rest of the
257 | # path
258 | destinationS3full = f"s3://{os.environ['DestinationBucket']}/output/full/hls/{filename}/"
259 |
260 | for outputGroup in jobSettingsfull['OutputGroups']:
261 |
262 | logger.info("outputGroup['OutputGroupSettings']['Type'] == %s", outputGroup['OutputGroupSettings']['Type'])
263 |
264 | if outputGroup['OutputGroupSettings']['Type'] == 'HLS_GROUP_SETTINGS':
265 | templateDestination = outputGroup['OutputGroupSettings']['HlsGroupSettings']['Destination']
266 | templateDestinationKey = urlparse(templateDestination).path
267 | logger.info("templateDestinationKey == %s", templateDestinationKey)
268 | outputGroup['OutputGroupSettings']['HlsGroupSettings']['Destination'] = destinationS3full
269 | else:
270 | logger.error("Exception: Unknown Output Group Type %s", outputGroup['OutputGroupSettings']['Type'])
271 | statusCode = 500
272 |
273 |
274 |
275 | logger.info(json.dumps(jobSettingsfull))
276 |
277 | # Convert the video using AWS Elemental MediaConvert
278 | jobfull = client.create_job(Role=mediaConvertRole, UserMetadata=jobMetadatafull, Settings=jobSettingsfull)
279 |
280 | except Exception as e:
281 | logger.error('Exception: %s', e)
282 | statusCode = 500
283 |
284 | finally:
285 | return {
286 | 'statusCode': statusCode,
287 | 'body': json.dumps({"previewJob":job, "fullJob":jobfull}, indent=4, sort_keys=True, default=str)
288 | }
289 |
290 | # def create_media_tailor_jobs(event, context):
291 |
292 | # # expected $ORIGINAL_KEY/ads/$TYPE/$UUID.vmap.xml
293 | # KEY = urllib.unquote_plus(event["Records"][0]["s3"]["object"]["key"])
294 | # # expected [$ORIGINAL_KEY, $TYPE/$UUID.vmap.xml]
295 | # MEDIA_BUCKET, FILENAME = KEY.split("/ads/")
296 | # TYPE, UUID = FILENAME.split("/")
297 | # UUID = UUID.replace(".vmap.xml", "")
298 |
299 | # print("**********DYNAMO REQUEST**********")
300 | # try:
301 | # item = metadata_table.get_item(Key={"MediaId": UUID})["Item"]
302 | # except ClientError as e:
303 | # print(e)
304 | # return {
305 | # "statusCode": 500,
306 | # "body": f"possible problem with UUID {UUID}: {str(e)}",
307 | # }
308 |
309 | # print("**********DYNAMO RESPONSE**********")
310 | # print(json.dumps(item, indent=2, cls=DateTimeEncoder))
311 |
312 | # print("**********MEDIATAILOR REQUEST**********")
313 | # try:
314 | # mediatailor_response = mediatailor.put_playback_configuration(
315 | # AdDecisionServerUrl=item[f"VMAPUrl-{TYPE}"],
316 | # Name=f"{TYPE}-{UUID}",
317 | # Tags={"OriginTable": METADATA_TABLE, "MediaId": UUID},
318 | # VideoContentSourceUrl=item["PlaylistUrl"].replace("/playlist.m3u8", ""),
319 | # )
320 | # except ClientError as e:
321 | # print(e)
322 | # return {
323 | # "statusCode": 500,
324 | # "body": f"possible problem with UUID {UUID} and ads manifest type {TYPE}: {str(e)}",
325 | # }
326 | # except KeyError as e:
327 | # print(e)
328 | # return {
329 | # "statusCode": 500,
330 | # "body": f"ads manifest type {TYPE} not found for media {UUID}: {str(e)}",
331 | # }
332 |
333 | # print("**********MEDIATAILOR RESPONSE**********")
334 | # print(json.dumps(mediatailor_response, indent=2, cls=DateTimeEncoder))
335 |
336 | # print("**********DYNAMO UPDATE**********")
337 | # try:
338 | # update_response = metadata_table.update_item(
339 | # Key={"MediaId": UUID},
340 | # AttributeUpdates={
341 | # f"StreamUrl-{TYPE}": {
342 | # "Value": mediatailor_response["HlsConfiguration"][
343 | # "ManifestEndpointPrefix"
344 | # ]
345 | # }
346 | # },
347 | # )
348 | # except ClientError as e:
349 | # print(e)
350 | # exit(255)
351 |
352 | # print("**********DYNAMO RESPONSE**********")
353 | # print(json.dumps(update_response, indent=2, cls=DateTimeEncoder))
354 |
355 | # return {"statusCode": 200, "body": "OK"}
--------------------------------------------------------------------------------
/jest.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | roots: ['/test'],
3 | testMatch: ['**/*.test.ts'],
4 | transform: {
5 | '^.+\\.tsx?$': 'ts-jest'
6 | }
7 | };
8 |
--------------------------------------------------------------------------------
/lib/polly-preview-simple-stack.ts:
--------------------------------------------------------------------------------
1 | import * as cdk from '@aws-cdk/core';
2 | import * as lambda from '@aws-cdk/aws-lambda';
3 | import * as s3 from '@aws-cdk/aws-s3';
4 | import * as lambdaEvent from '@aws-cdk/aws-lambda-event-sources';
5 | import * as iam from '@aws-cdk/aws-iam';
6 | import * as dynamo from '@aws-cdk/aws-dynamodb';
7 | import * as apigateway from '@aws-cdk/aws-apigateway';
8 | import { Duration } from '@aws-cdk/core';
9 |
10 | const FFMPEG_PREVIEW_DURATION = "30";
11 | const FFMPEG_FADEOUT_DURATION = "3";
12 |
13 | export class PollyPreviewSimpleStack extends cdk.Stack {
14 | constructor(scope: cdk.App, id: string, props?: cdk.StackProps) {
15 | super(scope, id, props);
16 |
17 | const PollyAssetStore = new s3.Bucket(this, "PollyAssetStore", {
18 | enforceSSL: true,
19 | versioned: true,
20 | encryption: s3.BucketEncryption.S3_MANAGED
21 | });
22 | const PollyMetadataStore = new dynamo.Table(this, "PollyMetadataStore", {
23 | partitionKey:{
24 | name: 'AssetId',
25 | type: dynamo.AttributeType.STRING
26 | }
27 | });
28 |
29 | const MediaConvertManagedPolicy = iam.ManagedPolicy.fromManagedPolicyArn(
30 | this, "MediaConvertManagedPolicy",
31 | "arn:aws:iam::aws:policy/AWSElementalMediaConvertFullAccess"
32 | );
33 |
34 | const S3MediaConvertPolicyStatementRead = new iam.PolicyStatement({
35 | effect: iam.Effect.ALLOW,
36 | actions: [
37 | "s3:Get*",
38 | "s3:List*"
39 | ],
40 | resources: [
41 | PollyAssetStore.bucketArn,
42 | `${PollyAssetStore.bucketArn}/*`,
43 | "arn:aws:s3:::gbatt-blogs/narratives",
44 | "arn:aws:s3:::gbatt-blogs/narratives/*"
45 | ]
46 | });
47 |
48 | const S3MediaConvertPolicyStatementWrite = new iam.PolicyStatement({
49 | effect: iam.Effect.ALLOW,
50 | actions: [
51 | "s3:Put*",
52 | "s3:*MultipartUpload*"
53 | ],
54 | resources: [
55 | PollyAssetStore.bucketArn,
56 | `${PollyAssetStore.bucketArn}/*`
57 | ]
58 | });
59 |
60 | const S3MediaConvertInlinePolicy = new iam.PolicyDocument({
61 | statements: [
62 | S3MediaConvertPolicyStatementRead,
63 | S3MediaConvertPolicyStatementWrite
64 | ]
65 | });
66 |
67 | const MediaconvertPassDownRole = new iam.Role(
68 | this,
69 | "MediaconvertPassDownRole",
70 | {
71 | assumedBy: new iam.ServicePrincipal('mediaconvert.amazonaws.com'),
72 | managedPolicies: [
73 | MediaConvertManagedPolicy
74 | ],
75 | inlinePolicies: {
76 | "S3MediaConvertInline": S3MediaConvertInlinePolicy
77 | }
78 | });
79 |
80 | const ScrapeLambda = new lambda.Function(this, "ScrapeLambda", {
81 | code: lambda.Code.fromAsset("functions/scrape-lambda"),
82 | handler: "scrape.handler",
83 | runtime: lambda.Runtime.NODEJS_14_X,
84 | memorySize: 512,
85 | timeout: Duration.seconds(29),
86 | environment: {
87 | Table : PollyMetadataStore.tableName,
88 | OutputS3BucketName: PollyAssetStore.bucketName,
89 | FFMPEG_PREVIEW_DURATION,
90 | FFMPEG_FADEOUT_DURATION
91 | }
92 | });
93 |
94 | const ScrapeApi = new apigateway.RestApi(this, "scrape-api", {
95 | restApiName: "Scraping Service",
96 | description: "This service starts scraping an article."
97 | });
98 |
99 | const postScrapeIntegration = new apigateway.LambdaIntegration(ScrapeLambda, {
100 | requestTemplates: { "application/json": '{ "statusCode": "200" }' }
101 | });
102 |
103 | ScrapeApi.root.addMethod("POST", postScrapeIntegration);
104 |
105 | const PollyLambda = new lambda.Function(this, "PollyLambda", {
106 | code: lambda.Code.fromAsset("functions/polly-lambda"),
107 | handler: "polly.handler",
108 | runtime: lambda.Runtime.NODEJS_14_X,
109 | memorySize: 512,
110 | environment: {
111 | Table : PollyMetadataStore.tableName,
112 | OutputS3BucketName: PollyAssetStore.bucketName
113 | }
114 | });
115 |
116 | const FadeOutLambda = new lambda.Function(this, "FadeOutLambda", {
117 | runtime: lambda.Runtime.PYTHON_3_8,
118 | code: lambda.Code.fromAsset("functions/postprod-lambda"),
119 | handler: "fadeout.handler",
120 | memorySize: 512,
121 | environment: {
122 | POLLY_METADATA_STORE : PollyMetadataStore.tableName,
123 | FFMPEG_PREVIEW_DURATION,
124 | FFMPEG_FADEOUT_DURATION
125 | }
126 | });
127 |
128 | const ImagesLambda = new lambda.Function(this, "ImagesLambda", {
129 | runtime: lambda.Runtime.PYTHON_3_8,
130 | code: lambda.Code.fromAsset("functions/postprod-lambda"),
131 | handler: "images.handler",
132 | timeout: Duration.seconds(90),
133 | memorySize: 2048,
134 | environment: {
135 | POLLY_METADATA_STORE : PollyMetadataStore.tableName
136 | }
137 | });
138 |
139 | const VideoLambda = new lambda.Function(this, "VideoLambda", {
140 | runtime: lambda.Runtime.PYTHON_3_8,
141 | code: lambda.Code.fromAsset("functions/video-lambda"),
142 | handler: "video.handler",
143 | memorySize: 512,
144 | environment: {
145 | POLLY_METADATA_STORE : PollyMetadataStore.tableName,
146 | DestinationBucket: PollyAssetStore.bucketName,
147 | Application: "VOD",
148 | MediaConvertRole: MediaconvertPassDownRole.roleArn,
149 | // TEMPLATE_S3_URL: "s3://your/custom/template/here.mp4",
150 | // TEMPLATE_S3_URL_PREVIEW: "s3://your/custom/template/here.mp4",
151 | TEMPLATE_S3_URL: `s3://${PollyAssetStore.bucketName}/custom/template/template.mov`,
152 | TEMPLATE_S3_URL_PREVIEW: `s3://${PollyAssetStore.bucketName}/custom/template/Template_video_right.mov`,
153 | }
154 | });
155 |
156 | const FinalizeUpdateLambda = new lambda.Function(this, "FinalizeUpdateLambda", {
157 | runtime: lambda.Runtime.PYTHON_3_8,
158 | code: lambda.Code.fromAsset("functions/finalize-lambda"),
159 | handler: "finalize.handler",
160 | memorySize: 512,
161 | environment: {
162 | POLLY_METADATA_STORE : PollyMetadataStore.tableName
163 | }
164 | });
165 |
166 | const OnTextUpload = new lambdaEvent.S3EventSource(PollyAssetStore, {
167 | events: [
168 | s3.EventType.OBJECT_CREATED
169 | ],
170 | filters: [
171 | {
172 | prefix: 'text'
173 | },
174 | {
175 | suffix: 'json'
176 | }
177 | ]
178 | });
179 |
180 | const OnFullAudioUpload = new lambdaEvent.S3EventSource(PollyAssetStore, {
181 | events: [
182 | s3.EventType.OBJECT_CREATED
183 | ],
184 | filters: [
185 | {
186 | prefix: 'audio/full'
187 | },
188 | {
189 | suffix: 'mp3'
190 | }
191 | ]
192 | });
193 |
194 | const OnPreviewAudioUpload = new lambdaEvent.S3EventSource(PollyAssetStore, {
195 | events: [
196 | s3.EventType.OBJECT_CREATED
197 | ],
198 | filters: [
199 | {
200 | prefix: 'audio/preview'
201 | },
202 | {
203 | suffix: 'wav'
204 | }
205 | ]
206 | });
207 |
208 | const OnVideoTriggerUpload = new lambdaEvent.S3EventSource(PollyAssetStore, {
209 | events: [
210 | s3.EventType.OBJECT_CREATED
211 | ],
212 | filters: [
213 | {
214 | prefix: 'video-trigger'
215 | },
216 | {
217 | suffix: 'json'
218 | }
219 | ]
220 | });
221 |
222 | const OnVideoPreviewUpload = new lambdaEvent.S3EventSource(PollyAssetStore, {
223 | events: [
224 | s3.EventType.OBJECT_CREATED
225 | ],
226 | filters: [
227 | {
228 | prefix: 'output/preview'
229 | },
230 | {
231 | suffix: 'mp4'
232 | }
233 | ]
234 | });
235 |
236 | const OnVideoFullNarrationUpload = new lambdaEvent.S3EventSource(PollyAssetStore, {
237 | events: [
238 | s3.EventType.OBJECT_CREATED
239 | ],
240 | filters: [
241 | {
242 | prefix: 'output/full/hls'
243 | },
244 | {
245 | suffix: 'm3u8'
246 | }
247 | ]
248 | });
249 |
250 | PollyLambda.addEventSource(OnTextUpload);
251 | FadeOutLambda.addEventSource(OnFullAudioUpload);
252 | ImagesLambda.addEventSource(OnPreviewAudioUpload);
253 | VideoLambda.addEventSource(OnVideoTriggerUpload);
254 | FinalizeUpdateLambda.addEventSource(OnVideoPreviewUpload);
255 | FinalizeUpdateLambda.addEventSource(OnVideoFullNarrationUpload);
256 |
257 | const pollyPolicy : iam.PolicyStatement = new iam.PolicyStatement();
258 | pollyPolicy.addActions("polly:startSpeechSynthesisTask");
259 | pollyPolicy.addResources("*");
260 |
261 | const comprehendPolicy : iam.PolicyStatement = new iam.PolicyStatement();
262 | comprehendPolicy.addActions("comprehend:detect*");
263 | comprehendPolicy.addResources("*");
264 |
265 | const passDownRole : iam.PolicyStatement = new iam.PolicyStatement();
266 | passDownRole.addActions('iam:PassRole');
267 | passDownRole.addResources(MediaconvertPassDownRole.roleArn);
268 |
269 | VideoLambda.role?.addManagedPolicy(MediaConvertManagedPolicy);
270 | VideoLambda.addToRolePolicy(passDownRole);
271 |
272 | PollyLambda.addToRolePolicy(pollyPolicy);
273 | ScrapeLambda.addToRolePolicy(comprehendPolicy);
274 |
275 | PollyAssetStore.grantRead(PollyLambda);
276 | PollyAssetStore.grantRead(FadeOutLambda);
277 | PollyAssetStore.grantRead(ScrapeLambda);
278 | PollyAssetStore.grantRead(ImagesLambda);
279 | PollyAssetStore.grantRead(VideoLambda);
280 |
281 | PollyAssetStore.grantPut(PollyLambda);
282 | PollyAssetStore.grantPut(FadeOutLambda);
283 | PollyAssetStore.grantPut(ScrapeLambda);
284 | PollyAssetStore.grantPut(ImagesLambda);
285 | PollyAssetStore.grantPut(VideoLambda);
286 |
287 | PollyMetadataStore.grantReadWriteData(PollyLambda);
288 | PollyMetadataStore.grantReadWriteData(FadeOutLambda);
289 | PollyMetadataStore.grantReadWriteData(ScrapeLambda);
290 | PollyMetadataStore.grantReadWriteData(ImagesLambda);
291 | PollyMetadataStore.grantReadWriteData(VideoLambda);
292 | PollyMetadataStore.grantReadWriteData(FinalizeUpdateLambda);
293 |
294 | const APIEndpointOutput = new cdk.CfnOutput(
295 | this, 'APIEndpoint', {
296 | value: ScrapeApi.url
297 | }
298 | );
299 |
300 | const AssetStoreBucketNameOutput = new cdk.CfnOutput(
301 | this, 'AssetStoreBucketName', {
302 | value: PollyAssetStore.bucketName
303 | });
304 |
305 | const AssetStoreBucketArnOutput = new cdk.CfnOutput(
306 | this, 'AssetStoreBucketArn', {
307 | value: PollyAssetStore.bucketArn
308 | });
309 |
310 | const MetadataStoreOutput = new cdk.CfnOutput(
311 | this, 'MetadataStoreName', {
312 | value: PollyMetadataStore.tableName
313 | }
314 | );
315 | }
316 | }
317 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "polly-preview-simple",
3 | "version": "0.1.0",
4 | "bin": {
5 | "polly-preview-simple": "bin/polly-preview-simple.js"
6 | },
7 | "scripts": {
8 | "build": "tsc",
9 | "watch": "tsc -w",
10 | "test": "jest",
11 | "cdk": "cdk"
12 | },
13 | "devDependencies": {
14 | "@aws-cdk/assert": "1.189.0",
15 | "@types/jest": "^26.0.10",
16 | "@types/node": "10.17.27",
17 | "aws-cdk": "^1.124.0",
18 | "jest": "^26.4.2",
19 | "ts-jest": "^26.2.0",
20 | "ts-node": "^9.0.0",
21 | "typescript": "~3.9.7"
22 | },
23 | "dependencies": {
24 | "@aws-cdk/aws-apigateway": "^1.105.0",
25 | "@aws-cdk/aws-dynamodb": "^1.105.0",
26 | "@aws-cdk/aws-iam": "^1.105.0",
27 | "@aws-cdk/aws-lambda": "^1.105.0",
28 | "@aws-cdk/aws-lambda-event-sources": "^1.105.0",
29 | "@aws-cdk/aws-s3": "^1.105.0",
30 | "@aws-cdk/aws-sns": "^1.105.0",
31 | "@aws-cdk/aws-sns-subscriptions": "^1.105.0",
32 | "@aws-cdk/core": "^1.105.0",
33 | "json-schema": ">=0.4.0"
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/template/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
--------------------------------------------------------------------------------
/test/polly-preview-simple.test.ts:
--------------------------------------------------------------------------------
1 | import { expect as expectCDK, haveResource } from '@aws-cdk/assert';
2 | import * as cdk from '@aws-cdk/core';
3 | import * as PollyPreviewSimple from '../lib/polly-preview-simple-stack';
4 |
5 | // test('SQS Queue Created', () => {
6 | // const app = new cdk.App();
7 | // // WHEN
8 | // const stack = new PollyPreviewSimple.PollyPreviewSimpleStack(app, 'MyTestStack');
9 | // // THEN
10 | // expectCDK(stack).to(haveResource("AWS::SQS::Queue",{
11 | // VisibilityTimeout: 300
12 | // }));
13 | // });
14 |
15 | // test('SNS Topic Created', () => {
16 | // const app = new cdk.App();
17 | // // WHEN
18 | // const stack = new PollyPreviewSimple.PollyPreviewSimpleStack(app, 'MyTestStack');
19 | // // THEN
20 | // expectCDK(stack).to(haveResource("AWS::SNS::Topic"));
21 | // });
22 |
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "target":"ES2018",
4 | "module": "commonjs",
5 | "lib": ["es2018"],
6 | "declaration": true,
7 | "strict": true,
8 | "noImplicitAny": true,
9 | "strictNullChecks": true,
10 | "noImplicitThis": true,
11 | "alwaysStrict": true,
12 | "noUnusedLocals": false,
13 | "noUnusedParameters": false,
14 | "noImplicitReturns": true,
15 | "noFallthroughCasesInSwitch": false,
16 | "inlineSourceMap": true,
17 | "inlineSources": true,
18 | "experimentalDecorators": true,
19 | "strictPropertyInitialization":false,
20 | "typeRoots": ["./node_modules/@types"]
21 | },
22 | "exclude": ["cdk.out"]
23 | }
24 |
--------------------------------------------------------------------------------