├── .eslintignore ├── .eslintrc.js ├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── aggregate ├── aggregate.js ├── index.js └── podcast-feed.js ├── convert ├── convert.js ├── index.js └── text-to-speech.js ├── images └── architecture.png ├── package.json ├── serverless.yml ├── templates └── function.ejs ├── test ├── aggregate.js └── convert.js └── webpack.config.js /.eslintignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | tmp -------------------------------------------------------------------------------- /.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | "extends": "airbnb", 3 | "plugins": [], 4 | "rules": { 5 | "func-names": "off", 6 | 7 | // doesn't work in node v4 :( 8 | "strict": "off", 9 | "prefer-rest-params": "off", 10 | "react/require-extension" : "off", 11 | "import/no-extraneous-dependencies" : "off" 12 | }, 13 | "env": { 14 | "mocha": true 15 | } 16 | }; 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log 5 | 6 | # Runtime data 7 | pids 8 | *.pid 9 | *.seed 10 | dist 11 | 12 | # Directory for instrumented libs generated by jscoverage/JSCover 13 | lib-cov 14 | 15 | # Coverage directory used by tools like istanbul 16 | coverage 17 | 18 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 19 | .grunt 20 | 21 | # node-waf configuration 22 | .lock-wscript 23 | 24 | # Compiled binary addons (http://nodejs.org/api/addons.html) 25 | build/Release 26 | 27 | # Dependency directory 28 | # https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git 29 | node_modules 30 | 31 | #IDE Stuff 32 | **/.idea 33 | 34 | #OS STUFF 35 | .DS_Store 36 | .tmp 37 | 38 | #SERVERLESS STUFF 39 | admin.env 40 | .env 41 | _meta 42 | .serverless 43 | 44 | .deploy.sh 45 | 46 | .webpack 47 | 48 | test/s3-event.json -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - '4.3.2' 4 | script: 5 | - npm test 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 SC5 Online Ltd 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Amazon Polly - Blog to Podcast 2 | 3 | ## Architecture 4 | 5 | ![architecture](https://raw.githubusercontent.com/SC5/serverless-blog-to-podcast/master/images/architecture.png) 6 | 7 | ## Installation 8 | 9 | With Serverless 1.5 and later use 10 | 11 | ``` 12 | sls install -u https://github.com/SC5/serverless-blog-to-podcast -n my-podcast-service 13 | cd my-podcast-service 14 | npm install 15 | ``` 16 | 17 | ## Deployment 18 | 19 | Amazon Polly is available in following regions: us-east-1, us-east-2, us-west-2, and eu-west-1. 20 | 21 | ``` 22 | sls deploy --region us-east-1 23 | ``` 24 | 25 | ## Structure 26 | 27 | `aggregate/aggregate.js` contains the logic which RSS feed is used in service. It loads the feed and saves entries as json files into S3 bucket. It also writes the podcast rss.xml for feed subscription. 28 | 29 | `convert/convert.js` is triggered by S3 object create events and it sends text saved in blog bucket to Amazon Polly. The mp3 file which Polly returns is then saved to podcast bucket. 30 | -------------------------------------------------------------------------------- /aggregate/aggregate.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | require('request'); 4 | const request = require('request-promise'); 5 | const crypto = require('crypto'); 6 | const AWS = require('aws-sdk'); 7 | const franc = require('franc'); 8 | const BbPromise = require('bluebird'); 9 | const parseString = BbPromise.promisify(require('xml2js').parseString); 10 | const podcastFeed = require('./podcast-feed'); 11 | 12 | const s3 = new AWS.S3(); 13 | 14 | /** 15 | * Writes blog item to S3 if not yet exists 16 | * @param item 17 | * @returns {*} 18 | */ 19 | const writeItem = (item) => { 20 | const Key = `${item.id}.json`; 21 | const lang = franc.all(item.title, { whitelist: ['eng', 'fin'] })[0]; 22 | 23 | if (lang[0] === 'fin') { 24 | return 0; 25 | } 26 | 27 | Object.assign(item, { lang: lang[0] }); 28 | const params = { 29 | Bucket: process.env.BLOG_BUCKET, 30 | Key, 31 | }; 32 | 33 | return s3.getObject(params).promise() 34 | .then(() => item) 35 | .catch(() => 36 | s3.putObject( 37 | Object.assign(params, { 38 | Body: JSON.stringify(item), 39 | ContentType: 'application/json', 40 | }) 41 | ).promise() 42 | .then(() => item)); 43 | }; 44 | 45 | /** 46 | * Loads rss feed, saves items and podcast xml to s3 47 | */ 48 | module.exports = () => new Promise((resolve, reject) => { 49 | request('https://sc5.io/blog/feed/') 50 | .then((data) => { 51 | parseString(data) 52 | .then((feed) => { 53 | const items = feed.rss.channel[0].item.map(item => 54 | writeItem({ 55 | title: item.title[0], 56 | creator: item['dc:creator'][0], 57 | date: (new Date(item.pubDate[0])).toJSON(), 58 | description: item.description[0], 59 | content: item['content:encoded'][0], 60 | guid: item.guid[0]._, 61 | id: crypto.createHash('md5').update(item.guid[0]._).digest('hex'), 62 | })); 63 | 64 | Promise.all(items) 65 | .then(podcastFeed) 66 | .then('ok') 67 | .catch(reject); 68 | }); 69 | }); 70 | }); 71 | -------------------------------------------------------------------------------- /aggregate/index.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const aggregate = require('./aggregate'); 4 | 5 | module.exports.handler = 6 | (event, context, callback) => 7 | aggregate() 8 | .then(data => callback(null, data)) 9 | .catch(callback); 10 | -------------------------------------------------------------------------------- /aggregate/podcast-feed.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const AWS = require('aws-sdk'); 4 | const Podcast = require('podcast'); 5 | 6 | const s3 = new AWS.S3(); 7 | 8 | const striptags = require('striptags'); 9 | const Entities = require('html-entities').XmlEntities; 10 | 11 | const entities = new Entities(); 12 | 13 | 14 | /** 15 | * Creates and saves podcast xml 16 | * @param items 17 | * @returns {Promise.} 18 | */ 19 | module.exports = (items) => { 20 | const feed = new Podcast({ 21 | title: 'SC5 Podcast', 22 | feed_url: `https://${process.env.PODCAST_BUCKET}.s3.amazonaws.com/rss.xml`, 23 | site_url: 'https://sc5.io/blog/', 24 | language: 'en', 25 | ttl: 1, 26 | itunesOwner: { 27 | name: 'SC5', 28 | email: 'blog@sc5.io', 29 | }, 30 | itunesCategory: [{ 31 | text: 'Technology', 32 | subcats: [{ 33 | text: 'Tech News', 34 | }], 35 | }], 36 | itunesImage: 'https://logo.sc5.io/images/sc5logo-dark-outline-3399x1440.png', 37 | }); 38 | 39 | items.forEach((item) => { 40 | if (item) { 41 | const title = striptags(item.title); 42 | const description = entities.decode(striptags(item.description)); 43 | const date = item.date; 44 | const url = `https://${process.env.PODCAST_BUCKET}.s3.amazonaws.com/${item.id}.mp3`; 45 | feed.item({ 46 | title, 47 | description, 48 | url, 49 | date, 50 | enclosure: { url }, 51 | author: item.creator, 52 | itunesSubtitle: title, 53 | itunesDuration: 1, 54 | itunesKeywords: ['technology'], 55 | }); 56 | } 57 | }); 58 | 59 | const xml = feed.xml(); 60 | 61 | return s3.putObject({ 62 | Bucket: process.env.PODCAST_BUCKET, 63 | Key: 'rss.xml', 64 | Body: xml, 65 | ContentType: 'application/rss+xml', 66 | }).promise() 67 | .then(() => xml); 68 | }; 69 | -------------------------------------------------------------------------------- /convert/convert.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const textToSpeech = require('./text-to-speech'); 4 | const AWS = require('aws-sdk'); 5 | const striptags = require('striptags'); 6 | const path = require('path'); 7 | const Entities = require('html-entities').XmlEntities; 8 | 9 | const entities = new Entities(); 10 | 11 | const s3 = new AWS.S3(); 12 | 13 | /** 14 | * Converts blog text to mp3 and saves to S3 bucket 15 | * @param event 16 | * @returns {Promise.} 17 | */ 18 | module.exports = (event) => { 19 | const s3data = event.Records[0].s3; 20 | const id = path.basename(s3data.object.key, '.json'); 21 | 22 | let json; 23 | 24 | return s3.getObject({ 25 | Bucket: s3data.bucket.name, 26 | Key: s3data.object.key, 27 | }).promise() 28 | .then((data) => { 29 | json = JSON.parse(data.Body); 30 | const text = entities.decode(striptags(json.description)); 31 | return textToSpeech(`${json.title}. ${text}`); 32 | }) 33 | .then(data => s3.putObject({ 34 | Bucket: process.env.PODCAST_BUCKET, 35 | Key: `${id}.mp3`, 36 | Body: data.AudioStream, 37 | ContentType: 'audio/mpeg', 38 | }).promise()) 39 | .then(() => `${id}.mp3 created`); 40 | }; -------------------------------------------------------------------------------- /convert/index.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const convert = require('./convert'); 4 | 5 | module.exports.handler = 6 | (event, context, callback) => 7 | convert(event) 8 | .then(data => callback(null, data)) 9 | .catch(callback); 10 | -------------------------------------------------------------------------------- /convert/text-to-speech.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const AWS = require('aws-sdk'); 4 | 5 | const polly = new AWS.Polly(); 6 | 7 | /** 8 | * Sends text to Polly and returns result 9 | * @param text 10 | * @returns {Promise} 11 | */ 12 | module.exports = (text) => { 13 | const params = { 14 | OutputFormat: 'mp3', 15 | Text: text, 16 | // VoiceId: 'Geraint | Gwyneth | Mads | Naja 17 | // | Hans | Marlene | Nicole | Russell | Amy 18 | // | Brian | Emma | Raveena | Ivy | Joanna | Joey 19 | // | Justin | Kendra | Kimberly | Salli | Conchita 20 | // | Enrique | Miguel | Penelope | Chantal | Celine 21 | // | Mathieu | Dora | Karl | Carla | Giorgio | Mizuki 22 | // | Liv | Lotte | Ruben | Ewa | Jacek | Jan | Maja 23 | // | Ricardo | Vitoria | Cristiano | Ines | Carmen 24 | // | Maxim | Tatyana | Astrid | Filiz' 25 | VoiceId: 'Brian', 26 | // LexiconNames: [ 27 | // 'STRING_VALUE', 28 | // /* more items */ 29 | // ], 30 | // SampleRate: 'STRING_VALUE', 31 | TextType: 'text', 32 | }; 33 | return polly.synthesizeSpeech(params).promise(); 34 | }; 35 | -------------------------------------------------------------------------------- /images/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SC5/serverless-blog-to-podcast/72043775003b82df0da49e878128fade43c1d3e0/images/architecture.png -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "et-blog-polly", 3 | "version": "1.2.0", 4 | "description": "A boilerplate for Serverless applications by SC5 Online", 5 | "main": "fnHello/handler.js", 6 | "keywords": [ 7 | "serverless", 8 | "aws" 9 | ], 10 | "author": "Mikael Puittinen", 11 | "contributors": [ 12 | "Eetu Tuomala" 13 | ], 14 | "license": "MIT", 15 | "repository": { 16 | "type": "git", 17 | "url": "https://github.com/SC5/blog-polly.git" 18 | }, 19 | "dependencies": { 20 | "bluebird": "^3.4.7", 21 | "franc": "^2.0.0", 22 | "html-entities": "^1.2.0", 23 | "podcast": "^0.2.2", 24 | "request": "^2.79.0", 25 | "request-promise": "^4.1.1", 26 | "striptags": "^2.1.1", 27 | "xml2js": "^0.4.17" 28 | }, 29 | "devDependencies": { 30 | "aws-sdk": "^2.7.10", 31 | "copy-webpack-plugin": "^3.0.1", 32 | "eslint": "^3.3.1", 33 | "eslint-config-airbnb": "^10.0.1", 34 | "eslint-config-airbnb-base": "^5.0.2", 35 | "eslint-plugin-import": "^1.13.0", 36 | "eslint-plugin-jsx-a11y": "^2.1.0", 37 | "eslint-plugin-react": "^6.1.1", 38 | "json-loader": "^0.5.4", 39 | "node-yaml": "^3.0.3", 40 | "serverless-mocha-plugin": "^1.3.1", 41 | "serverless-offline": "^3.2.1", 42 | "serverless-webpack": "^1.0.0-rc.3", 43 | "webpack-node-externals": "^1.5.4" 44 | }, 45 | "scripts": { 46 | "test": "SLS_DEBUG=true serverless invoke test", 47 | "lint": "eslint ." 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /serverless.yml: -------------------------------------------------------------------------------- 1 | frameworkVersion: ">=1.2.0 <2.0.0" 2 | 3 | service: blog-polly # NOTE: update this with your service name 4 | 5 | provider: 6 | name: aws 7 | runtime: nodejs4.3 8 | cfLogs: true 9 | environment: 10 | SERVERLESS_STAGE: ${opt:stage, self:provider.stage} 11 | SERVERLESS_PROJECT: ${self:service} 12 | SERVERLESS_REGION: ${opt:region, self:provider.region} 13 | BLOG_BUCKET: ${self:provider.environment.SERVERLESS_PROJECT}-blog-${self:provider.environment.SERVERLESS_STAGE} 14 | PODCAST_BUCKET: ${self:provider.environment.SERVERLESS_PROJECT}-podcast-${self:provider.environment.SERVERLESS_STAGE} 15 | TABLE_NAME: ${self:provider.environment.SERVERLESS_PROJECT}-blog-podcast-${self:provider.environment.SERVERLESS_STAGE} 16 | iamRoleStatements: 17 | - Effect: Allow 18 | Action: 19 | - dynamodb:* 20 | Resource: arn:aws:dynamodb:${self:provider.environment.SERVERLESS_REGION}:*:* 21 | - Effect: Allow 22 | Action: 23 | - s3:ListBucket 24 | Resource: 25 | - "*" 26 | - Effect: Allow 27 | Action: 28 | - s3:PutObject 29 | - s3:GetObject 30 | - s3:DeleteObject 31 | Resource: 32 | - "*" 33 | - Effect: Allow 34 | Action: 35 | - SNS:* 36 | Resource: arn:aws:sns:${self:provider.environment.SERVERLESS_REGION}:*:* 37 | - Effect: Allow 38 | Action: 39 | - polly:SynthesizeSpeech 40 | Resource: "*" 41 | 42 | package: 43 | exclude: 44 | - test/** 45 | - .git/** 46 | 47 | functions: 48 | aggregate: 49 | handler: aggregate/index.handler 50 | timeout: 30 51 | events: 52 | - schedule: rate(10 minutes) 53 | convert: 54 | handler: convert/index.handler 55 | events: 56 | - s3: 57 | bucket: ${self:provider.environment.BLOG_BUCKET} # ALSO CREATES THE BUCKET !!! 58 | event: s3:ObjectCreated:* 59 | 60 | plugins: 61 | - serverless-mocha-plugin 62 | - serverless-webpack 63 | - serverless-offline 64 | 65 | custom: 66 | serverless-mocha-plugin: 67 | functionTemplate: templates/function.ejs 68 | webpackIncludeModules: true 69 | 70 | resources: 71 | Resources: 72 | PodcastBucket: 73 | Type: AWS::S3::Bucket 74 | Properties: 75 | BucketName: ${self:provider.environment.PODCAST_BUCKET} 76 | PodcastBucketPolicy: 77 | Type: AWS::S3::BucketPolicy 78 | Properties: 79 | Bucket: 80 | Ref: PodcastBucket 81 | PolicyDocument: 82 | Version: '2012-10-17' 83 | Statement: 84 | - Sid: PublicReadGetObject 85 | Effect: Allow 86 | Principal: "*" 87 | Action: s3:GetObject 88 | Resource: 89 | Fn::Join: 90 | - '' 91 | - - 'arn:aws:s3:::' 92 | - Ref: PodcastBucket 93 | - "/*" 94 | # BlogPodcastTable: 95 | # Type: AWS::DynamoDB::Table 96 | # DeletionPolicy: Retain 97 | # Properties: 98 | # AttributeDefinitions: 99 | # - AttributeName: id 100 | # AttributeType: S 101 | # KeySchema: 102 | # - AttributeName: id 103 | # KeyType: HASH 104 | # ProvisionedThroughput: 105 | # ReadCapacityUnits: 1 106 | # WriteCapacityUnits: 1 107 | # TableName: ${self:provider.environment.TABLE_NAME} 108 | -------------------------------------------------------------------------------- /templates/function.ejs: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | // Template from sc5-serverless-boilerplate 4 | // Set the env vars for the function 5 | 6 | module.exports.<%= handlerFunction %> = (event, context, callback) => { 7 | const response = { 8 | statusCode: 200, 9 | body: JSON.stringify({ 10 | message: 'Go Serverless v1.0! Your function executed successfully!', 11 | input: event, 12 | }), 13 | }; 14 | 15 | callback(null, response); 16 | 17 | // Use this code if you don't use the http event with the LAMBDA-PROXY integration 18 | // callback(null, { message: 'Go Serverless v1.0! Your function executed successfully!', event }); 19 | }; 20 | -------------------------------------------------------------------------------- /test/aggregate.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | // tests for aggregate 4 | // Generated by serverless-mocha-plugin 5 | 6 | const mod = require('../aggregate/index.js'); 7 | const mochaPlugin = require('serverless-mocha-plugin'); 8 | 9 | const lambdaWrapper = mochaPlugin.lambdaWrapper; 10 | const expect = mochaPlugin.chai.expect; 11 | const wrapped = lambdaWrapper.wrap(mod, { handler: 'handler' }); 12 | 13 | describe('aggregate', () => { 14 | before((done) => { 15 | // lambdaWrapper.init(liveFunction); // Run the deployed lambda 16 | 17 | done(); 18 | }); 19 | 20 | it('implement tests here', () => 21 | wrapped.run({}).then((response) => { 22 | // console.log(response); 23 | response.forEach((d) => { 24 | console.log(d); 25 | }); 26 | 27 | expect(response).to.not.be.empty; 28 | })); 29 | }); 30 | -------------------------------------------------------------------------------- /test/convert.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | // tests for convert 4 | // Generated by serverless-mocha-plugin 5 | 6 | const mod = require('../convert/index.js'); 7 | const mochaPlugin = require('serverless-mocha-plugin'); 8 | 9 | const fs = require('fs'); 10 | 11 | const lambdaWrapper = mochaPlugin.lambdaWrapper; 12 | const expect = mochaPlugin.chai.expect; 13 | const wrapped = lambdaWrapper.wrap(mod, { handler: 'handler' }); 14 | 15 | const event = require('./s3-event.json'); 16 | 17 | describe('convert', () => { 18 | before((done) => { 19 | // lambdaWrapper.init(liveFunction); // Run the deployed lambda 20 | done(); 21 | }); 22 | 23 | it('implement tests here', () => 24 | wrapped.run(event).then((response) => { 25 | fs.writeFileSync('temp.mp3', response.AudioStream); 26 | expect(response).to.not.be.empty; 27 | })); 28 | }); 29 | -------------------------------------------------------------------------------- /webpack.config.js: -------------------------------------------------------------------------------- 1 | const path = require('path'); 2 | // const CopyWebpackPlugin = require('copy-webpack-plugin'); 3 | const yaml = require('node-yaml'); 4 | 5 | const nodeExternals = require('webpack-node-externals'); 6 | 7 | function getFunctions() { 8 | const serverlessYml = yaml.readSync('serverless.yml'); 9 | const webPackFunctions = {}; 10 | const functionNames = Object.keys(serverlessYml.functions || {}); 11 | functionNames.forEach((name) => { 12 | const handlerFile = serverlessYml.functions[name].handler.replace(/.[^.]*$/, ''); 13 | webPackFunctions[handlerFile] = `./${handlerFile}.js`; 14 | }); 15 | return webPackFunctions; 16 | } 17 | 18 | module.exports = { 19 | entry: getFunctions(), 20 | target: 'node', 21 | module: { 22 | loaders: [ 23 | { test: /\.json/, loader: 'json-loader' }, 24 | ], 25 | }, 26 | // plugins: [ 27 | // new CopyWebpackPlugin([ 28 | // { from: '.env' }, 29 | // ]), 30 | // ], 31 | output: { 32 | libraryTarget: 'commonjs', 33 | path: path.join(__dirname, '.webpack'), 34 | filename: '[name].js', 35 | }, 36 | externals: [nodeExternals()], 37 | }; 38 | --------------------------------------------------------------------------------