├── scripts
├── lint
├── test
├── test-watch
├── thumbnails
│ ├── Dockerfile
│ └── run
├── index.js
└── new-config.js
├── .gitignore
├── src
├── pulse.js
├── test-helper.js
├── algolia.js
├── disk-logger.js
├── globals.js
├── algolia.settings.js
├── __tests__
│ ├── fileutils.js
│ ├── utils.js
│ ├── transformer.js
│ ├── language.js
│ └── youtube.js
├── fileutils.js
├── utils.js
├── progress.js
├── transformer.js
├── language.js
└── youtube.js
├── jest.config.js
├── configs
├── dataxday.js
├── kiwiparty.js
├── config.sample.js
├── saastr.js
├── paris-container-day.js
├── googleio.js
├── __tests__
│ ├── __snapshots__
│ │ ├── usi.js.snap
│ │ └── algolia-education.js.snap
│ ├── usi.js
│ ├── algolia-education.js
│ ├── takeoffconference.js
│ ├── hackference.js
│ ├── dotconferences.js
│ ├── algolia-meetups.js
│ ├── chatbot_summit.js
│ ├── voice_summit.js
│ ├── criticalrole.js
│ ├── laracon.js
│ ├── writethedocs.js
│ └── config-helper.js
├── odessajs.js
├── takeoffconference.js
├── hackference.js
├── usi.js
├── algolia-meetups.js
├── algolia-education.js
├── chatbot_summit.js
├── voice_summit.js
├── laracon.js
├── writethedocs.js
├── criticalrole.js
├── dotconferences.js
└── config-helper.js
├── .eslintrc.js
├── .babelrc
├── README.md
└── package.json
/scripts/lint:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 | set -e
3 |
4 | eslint ./src/*.js ./configs/*.js
5 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | .env
3 | .envrc
4 | google.service-account-file.json
5 | cache
6 | logs
7 |
--------------------------------------------------------------------------------
/scripts/test:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 | jest \
3 | --config ./jest.config.js \
4 | --no-cache \
5 | ./src/ ./configs
6 |
--------------------------------------------------------------------------------
/src/pulse.js:
--------------------------------------------------------------------------------
1 | import EventEmitter from 'events';
2 | const pulse = new EventEmitter();
3 |
4 | export default pulse;
5 |
--------------------------------------------------------------------------------
/jest.config.js:
--------------------------------------------------------------------------------
1 | /* eslint-disable import/no-commonjs */
2 | module.exports = {
3 | bail: true,
4 | resetMocks: true,
5 | restoreMocks: true,
6 | };
7 |
--------------------------------------------------------------------------------
/configs/dataxday.js:
--------------------------------------------------------------------------------
1 | export default {
2 | indexName: 'dataxday',
3 | playlists: [
4 | 'PL-Wbj9VN8zDRMzeWZUv0AUEfs-r_t4HyN', // DataXDay'18
5 | ],
6 | };
7 |
--------------------------------------------------------------------------------
/configs/kiwiparty.js:
--------------------------------------------------------------------------------
1 | export default {
2 | indexName: 'kiwiparty',
3 | playlists: [
4 | 'PL-U84vmvcJwUdcIOpIDXgNoVhNWLLpxRc', // KiwiParty 2018
5 | ],
6 | };
7 |
--------------------------------------------------------------------------------
/.eslintrc.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | extends: ['algolia', 'algolia/jest'],
3 | rules: {
4 | 'no-console': 0,
5 | 'no-unused-vars': ['error', { argsIgnorePattern: '^_' }],
6 | },
7 | };
8 |
--------------------------------------------------------------------------------
/.babelrc:
--------------------------------------------------------------------------------
1 | {
2 | "presets": [["@babel/preset-env", { "targets": { "node": 6 } }]],
3 | "plugins": [
4 | "dynamic-import-node-sync",
5 | "@babel/plugin-proposal-object-rest-spread"
6 | ]
7 | }
8 |
--------------------------------------------------------------------------------
/configs/config.sample.js:
--------------------------------------------------------------------------------
1 | export default {
2 | indexName: '{{indexName}}',
3 | playlists: ['{{playlistIds}}'],
4 | // transformData(rawRecord, helper) {
5 | // let record = rawRecord;
6 |
7 | // return record;
8 | // }
9 | };
10 |
--------------------------------------------------------------------------------
/configs/saastr.js:
--------------------------------------------------------------------------------
1 | export default {
2 | indexName: 'saastr',
3 | playlists: [
4 | 'PLGlmLTbngJa87gZrq0LohHNQnG_a5t760', // 2018
5 | 'PLGlmLTbngJa9fbcOjinh4FZHVYsizzhdX', // 2017
6 | 'PLGlmLTbngJa-TjQk_B-qAhrjjNu29ydff', // 2016
7 | ],
8 | };
9 |
--------------------------------------------------------------------------------
/scripts/test-watch:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 | # --no-watchman is needed otherwise Jest got confused as to which file changed
3 | # and run tests for the wrong files
4 | jest \
5 | --config ./jest.config.js \
6 | --watch \
7 | --no-watchman \
8 | ./src ./configs
9 |
--------------------------------------------------------------------------------
/configs/paris-container-day.js:
--------------------------------------------------------------------------------
1 | export default {
2 | indexName: 'paris-container-day',
3 | playlists: [
4 | 'PLTQhofNmqyEf6IUbcCtaUPkHk1nJXnQzL', // Paris Container Day 2017
5 | 'PLTQhofNmqyEe_hSmvYaP_EKpR7m0Bk2Je', // Paris Container Day 2018
6 | ],
7 | };
8 |
--------------------------------------------------------------------------------
/configs/googleio.js:
--------------------------------------------------------------------------------
1 | export default {
2 | indexName: 'googleio',
3 | playlists: [
4 | 'PLOU2XLYxmsIInFRc3M44HUTQc3b_YJ4-Y', // 2018
5 | 'PLOU2XLYxmsIKC8eODk_RNCWv3fBcLvMMy', // 2017
6 | 'PLOU2XLYxmsILe6_eGvDN3GyiodoV3qNSC', // 2016
7 | 'PLOU2XLYxmsIKLNUPiFCWVtcO7mZRZ9MmS', // 2015
8 | ],
9 | };
10 |
--------------------------------------------------------------------------------
/configs/__tests__/__snapshots__/usi.js.snap:
--------------------------------------------------------------------------------
1 | // Jest Snapshot v1, https://goo.gl/fbAQLP
2 |
3 | exports[`USI transformData Pour faire naître une idée - Cédric Villani, à l'USI 1`] = `
4 | Object {
5 | "speakers": Array [
6 | Object {
7 | "name": "Cédric Villani",
8 | },
9 | ],
10 | "video": Object {
11 | "title": "Pour faire naître une idée",
12 | },
13 | }
14 | `;
15 |
--------------------------------------------------------------------------------
/configs/__tests__/__snapshots__/algolia-education.js.snap:
--------------------------------------------------------------------------------
1 | // Jest Snapshot v1, https://goo.gl/fbAQLP
2 |
3 | exports[`Algolia Education transformData Algolia Build 101 - Push Data - for Javascript developers 1`] = `
4 | Object {
5 | "language": "Javascript",
6 | "speakers": Array [],
7 | "video": Object {
8 | "title": "Push Data - for Javascript developers",
9 | },
10 | }
11 | `;
12 |
--------------------------------------------------------------------------------
/configs/odessajs.js:
--------------------------------------------------------------------------------
1 | export default {
2 | indexName: 'odessajs',
3 | playlists: [
4 | 'PLUF1zRLAgrPF9ZvT-MJFNdkX4uUi0oPnC', // OdessaJS 2014
5 | 'PLUF1zRLAgrPF5-5Puh3kRpF7Tf1LMGCDv', // OdessaJS 2015
6 | 'PLUF1zRLAgrPET1qRvSeKCraJxsjHZUSjw', // OdessaJS 2016
7 | 'PLUF1zRLAgrPHwKYzXbAprzO3-Ykbq1xkY', // OdessaJS 2017 - talks
8 | 'PLUF1zRLAgrPGnLTqOXYU1Sqy4NNVwAWfd', // OdessaJS'2018 - talks
9 | ],
10 | };
11 |
--------------------------------------------------------------------------------
/src/test-helper.js:
--------------------------------------------------------------------------------
1 | const module = {
2 | /**
3 | * Returns a method to mock the specified module
4 | * @param {Object} moduleToMock The module to mock
5 | * @returns {Function} Function to call with methodName and (optional) return value
6 | **/
7 | mock(moduleToMock) {
8 | return function(methodName, value) {
9 | return jest.spyOn(moduleToMock, methodName).mockReturnValue(value);
10 | };
11 | },
12 | };
13 |
14 | export default module;
15 |
--------------------------------------------------------------------------------
/scripts/thumbnails/Dockerfile:
--------------------------------------------------------------------------------
1 | from ubuntu:18.10
2 |
3 | # Install Python
4 | RUN apt-get update -y && \
5 | apt-get install -y \
6 | python \
7 | python-dev \
8 | python-pip
9 |
10 | # Install youtube-dl
11 | RUN pip install youtube_dl
12 |
13 | # Install ffmpeg
14 | RUN apt-get install -y ffmpeg
15 |
16 | # Install AWS cli
17 | RUN pip install awscli
18 |
19 | # Put executable script at root
20 | COPY run /root/
21 | RUN chmod +x /root/run
22 | ENTRYPOINT ["/root/run"]
23 |
--------------------------------------------------------------------------------
/configs/takeoffconference.js:
--------------------------------------------------------------------------------
1 | export default {
2 | indexName: 'takeoffconference',
3 | playlists: [
4 | 'PLuMK2S9sZg71QqVzwepG-bLBxcJWEzcW9', // 2018
5 | 'PLMz7qMiFSV91TlCtopuwEtoMaPhRx96Tg', // 2014
6 | 'PLMz7qMiFSV93QQUFSDRFWPBcdGHfkySqN', // 2013
7 | ],
8 | transformData(rawRecord, helper) {
9 | let record = rawRecord;
10 |
11 | // Videos all follow the same
12 | record = helper.enrich(
13 | record,
14 | 'video.title',
15 | '{_} - {video.title} - {_speakers_}'
16 | );
17 |
18 | return record;
19 | },
20 | };
21 |
--------------------------------------------------------------------------------
/configs/__tests__/usi.js:
--------------------------------------------------------------------------------
1 | import config from '../usi.js';
2 | import helper from '../config-helper.js';
3 |
4 | describe('USI', () => {
5 | describe('transformData', () => {
6 | let current;
7 | beforeEach(() => {
8 | current = input => config.transformData(input, helper);
9 | });
10 |
11 | it("Pour faire naître une idée - Cédric Villani, à l'USI", () => {
12 | const input = {
13 | video: {
14 | title: "Pour faire naître une idée - Cédric Villani, à l'USI",
15 | },
16 | };
17 |
18 | const actual = current(input);
19 |
20 | expect(actual).toMatchSnapshot();
21 | });
22 | });
23 | });
24 |
--------------------------------------------------------------------------------
/configs/__tests__/algolia-education.js:
--------------------------------------------------------------------------------
1 | import config from '../algolia-education.js';
2 | import helper from '../config-helper.js';
3 |
4 | describe('Algolia Education', () => {
5 | describe('transformData', () => {
6 | let current;
7 | beforeEach(() => {
8 | current = input => config.transformData(input, helper);
9 | });
10 |
11 | it('Algolia Build 101 - Push Data - for Javascript developers', () => {
12 | const input = {
13 | video: {
14 | title: 'Algolia Build 101 - Push Data - for Javascript developers',
15 | },
16 | };
17 |
18 | const actual = current(input);
19 |
20 | expect(actual).toMatchSnapshot();
21 | });
22 | });
23 | });
24 |
--------------------------------------------------------------------------------
/configs/hackference.js:
--------------------------------------------------------------------------------
1 | import _ from 'lodash';
2 | export default {
3 | indexName: 'hackference',
4 | playlists: [
5 | 'PLJK9M6xgJ-uYeAO4rGRB_yDRFTXwVNWQY', // 2017
6 | ],
7 | transformData(rawRecord, helper) {
8 | let record = rawRecord;
9 |
10 | // Finding conference and year from playlist name
11 | record = helper.enrich(
12 | record,
13 | 'playlist.title',
14 | '{conference.name} {conference.year}'
15 | );
16 | _.update(record, 'conference.year', _.parseInt);
17 |
18 | // Sample:
19 | // Lorna Mitchell - Building a Serverless Data Pipeline #hackference2017
20 | record = helper.enrich(
21 | record,
22 | 'video.title',
23 | '{author.name} - {video.title} #hackference2017'
24 | );
25 |
26 | return record;
27 | },
28 | };
29 |
--------------------------------------------------------------------------------
/configs/usi.js:
--------------------------------------------------------------------------------
1 | export default {
2 | indexName: 'usi',
3 | playlists: [
4 | 'PLyzb9DL11tdZbjRpEDyP4s1pQsxeFg6x2', // 2017
5 | 'PLyzb9DL11tdYqsgu0kQICQKpt0lMz0Nl5', // 2016
6 | 'PLyzb9DL11tdbBE9jpIm76GPcANwSG7Otf', // 2015
7 | ],
8 | transformData(rawRecord, helper) {
9 | let record = rawRecord;
10 |
11 | // Remove mentions of USI in the title
12 | helper.trimKey(
13 | record,
14 | 'video.title',
15 | ", à l'USI",
16 | ', at USI',
17 | 'USI 2015 - ',
18 | 'USI 2016 - ',
19 | 'USI 2016 : ',
20 | 'USI 2017'
21 | );
22 |
23 | // Parse title and speaker name
24 | record = helper.enrich(
25 | record,
26 | 'video.title',
27 | '{video.title} - {_speakers_}'
28 | );
29 |
30 | return record;
31 | },
32 | };
33 |
--------------------------------------------------------------------------------
/configs/__tests__/takeoffconference.js:
--------------------------------------------------------------------------------
1 | import config from '../takeoffconference.js';
2 | import helper from '../config-helper.js';
3 |
4 | describe('takeoffconference', () => {
5 | describe('transformData', () => {
6 | let current;
7 | beforeEach(() => {
8 | current = input => config.transformData(input, helper);
9 | });
10 |
11 | it('should extract author and title from the title', () => {
12 | const input = {
13 | video: {
14 | title: 'TakeOff 2013 - JSONiq - William Candillon',
15 | },
16 | };
17 |
18 | const actual = current(input);
19 |
20 | expect(actual).toHaveProperty('speakers', [
21 | { name: 'William Candillon' },
22 | ]);
23 | expect(actual).toHaveProperty('video.title', 'JSONiq');
24 | });
25 | });
26 | });
27 |
--------------------------------------------------------------------------------
/src/algolia.js:
--------------------------------------------------------------------------------
1 | import indexing from 'algolia-indexing';
2 | import _ from 'lodash';
3 | import globals from './globals';
4 | import chalk from 'chalk';
5 | import defaultIndexSettings from './algolia.settings';
6 |
7 | export default {
8 | run(records) {
9 | const credentials = {
10 | apiKey: globals.algoliaApiKey(),
11 | appId: globals.algoliaAppId(),
12 | indexName: globals.configName(),
13 | };
14 |
15 | let settings = defaultIndexSettings;
16 | const transformSettings = _.get(globals.config(), 'transformSettings');
17 | if (transformSettings) {
18 | settings = transformSettings(settings);
19 | }
20 |
21 | console.info(chalk.blue('Pushing to Algolia'));
22 | indexing.verbose();
23 | indexing.config({ batchMaxSize: 100 });
24 | indexing.fullAtomic(credentials, records, settings);
25 | },
26 | };
27 |
--------------------------------------------------------------------------------
/configs/algolia-meetups.js:
--------------------------------------------------------------------------------
1 | import _ from 'lodash';
2 | export default {
3 | indexName: 'algolia-meetups',
4 | playlists: [
5 | 'PLuHdbqhRgWHIosfqQ-9whwXzN5sgY7NAk', // TechLunch
6 | 'PLuHdbqhRgWHJg9eOFCl5dgLvVjd_DFz8O', // Search Party
7 | 'PLuHdbqhRgWHJAnKsYLIYB5MV2Srj2dEz3', // Meetups
8 | ],
9 | transformData(rawRecord, helper) {
10 | const record = rawRecord;
11 |
12 | // Get meetup name from playlist id
13 | const playlistName = _.get(record, 'playlist.title');
14 | const nameHashes = {
15 | 'TechLunch videos': 'TechLunch',
16 | 'Algolia Search Party': 'Search Party',
17 | Meetups: 'Meetups',
18 | };
19 | _.set(record, 'conference.name', nameHashes[playlistName]);
20 |
21 | // Get year from published date
22 | const publishedDate = _.get(record, 'video.publishedDate.timestamp');
23 | _.set(record, 'conference.year', helper.year(publishedDate));
24 |
25 | return record;
26 | },
27 | };
28 |
--------------------------------------------------------------------------------
/src/disk-logger.js:
--------------------------------------------------------------------------------
1 | const WRITE_RESPONSE_LOGS = process.env.WRITE_RESPONSE_LOGS;
2 | import fileutils from './fileutils';
3 | import _ from 'lodash';
4 |
5 | const module = {
6 | /**
7 | * Log the API return data to disk
8 | *
9 | * @param {String} destination File path to save the file (in the ./logs
10 | * directory)
11 | * @param {Object|String} content Content to store on disk
12 | * @returns {Promise} Write on disk promise
13 | *
14 | * Note that if the content is an object, it will be saved as pretty printed
15 | * JSON, otherwise it will be saved as raw text.
16 | **/
17 | async write(destination, content) {
18 | if (!WRITE_RESPONSE_LOGS) {
19 | return false;
20 | }
21 |
22 | const writeMethod = _.isObject(content)
23 | ? fileutils.writeJson
24 | : fileutils.write;
25 | const writing = await writeMethod(`./logs/${destination}`, content);
26 | return writing;
27 | },
28 | };
29 |
30 | export default module;
31 |
--------------------------------------------------------------------------------
/scripts/index.js:
--------------------------------------------------------------------------------
1 | import youtube from '../src/youtube';
2 | import globals from '../src/globals';
3 | import transformer from '../src/transformer';
4 | import progress from '../src/progress';
5 | import algolia from '../src/algolia';
6 | import yargs from 'yargs';
7 |
8 | /**
9 | * Parsing command line arguments
10 | **/
11 | const argv = yargs
12 | .usage('Usage: yarn index [config]')
13 | .command('$0 config', 'Index the videos of the specified config')
14 | .help(false)
15 | .version(false).argv;
16 |
17 | (async () => {
18 | try {
19 | globals.init(argv.config);
20 |
21 | // Get all video data from YouTube
22 | const videos = await youtube.getVideos();
23 | progress.displayWarnings();
24 |
25 | // Transform videos in records
26 | const records = await transformer.run(videos);
27 | progress.displayWarnings();
28 |
29 | // Push records
30 | await algolia.run(records);
31 | } catch (err) {
32 | console.info(err);
33 | }
34 | })();
35 |
--------------------------------------------------------------------------------
/src/globals.js:
--------------------------------------------------------------------------------
1 | let CONFIG = {};
2 | let CONFIG_NAME = null;
3 | const ALGOLIA_API_KEY = process.env.ALGOLIA_API_KEY;
4 | const ALGOLIA_APP_ID = process.env.ALGOLIA_APP_ID;
5 | const READ_FROM_CACHE = process.env.READ_FROM_CACHE || false;
6 | const WRITE_RESPONSE_LOGS = process.env.WRITE_RESPONSE_LOGS || false;
7 | const YOUTUBE_API_KEY = process.env.YOUTUBE_API_KEY;
8 |
9 | const globals = {
10 | init(configName) {
11 | CONFIG_NAME = configName;
12 | CONFIG = import(`../configs/${configName}.js`).default;
13 | },
14 | readFromCache() {
15 | return READ_FROM_CACHE;
16 | },
17 | writeResponseLogs() {
18 | return WRITE_RESPONSE_LOGS;
19 | },
20 | config() {
21 | return CONFIG;
22 | },
23 | configName() {
24 | return CONFIG_NAME;
25 | },
26 | youtubeApiKey() {
27 | return YOUTUBE_API_KEY;
28 | },
29 | algoliaAppId() {
30 | return ALGOLIA_APP_ID;
31 | },
32 | algoliaApiKey() {
33 | return ALGOLIA_API_KEY;
34 | },
35 | };
36 |
37 | export default globals;
38 |
--------------------------------------------------------------------------------
/scripts/thumbnails/run:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 |
3 | # Needed environment variables:
4 | # VIDEO_ID: The YouTube video id to target
5 | # AWS_ACCESS_KEY_ID: AWS Access Key
6 | # AWS_SECRET_ACCESS_KEY: AWS Secret Access Key
7 |
8 | # TODO:
9 | # Stop if thumbnails already in S3
10 |
11 | # Work in /tmp/{VIDEO_ID}
12 | cd /tmp
13 | mkdir -p "talksearch/$VIDEO_ID"
14 | cd "./talksearch/$VIDEO_ID"
15 |
16 | # Download the video
17 | youtube-dl \
18 | --output "video.mp4" \
19 | --format 133 \
20 | --continue \
21 | "$VIDEO_ID"
22 |
23 | # Extract one thumbnail for every minute of video
24 | ffmpeg \
25 | -i "video.mp4" \
26 | -vf fps=1/60 \
27 | "%d.jpg"
28 |
29 | # Push thumbnails to S3
30 | aws s3 \
31 | cp . \
32 | "s3://talksearch/thumbnails/${VIDEO_ID}/" \
33 | --recursive \
34 | --include "*.jpg"
35 | # video_id="$1"
36 | # bucket_name="pixelastic-talksearch"
37 | # path_tmp="/tmp/talksearch"
38 | # path_destination="${path_tmp}/${video_id}"
39 |
40 |
41 |
42 | ## Push all thumbnails to S3, under the videoId directory
43 |
44 |
--------------------------------------------------------------------------------
/configs/__tests__/hackference.js:
--------------------------------------------------------------------------------
1 | import config from '../hackference.js';
2 | import helper from '../config-helper.js';
3 |
4 | describe('hackference', () => {
5 | describe('transformData', () => {
6 | let current;
7 | beforeEach(() => {
8 | current = input => config.transformData(input, helper);
9 | });
10 |
11 | it('should extract author and title from the title', () => {
12 | const input = {
13 | video: {
14 | title:
15 | 'Lorna Mitchell - Building a Serverless Data Pipeline #hackference2017',
16 | },
17 | };
18 |
19 | const actual = current(input);
20 |
21 | expect(actual).toHaveProperty('author.name', 'Lorna Mitchell');
22 | expect(actual).toHaveProperty(
23 | 'video.title',
24 | 'Building a Serverless Data Pipeline'
25 | );
26 | });
27 |
28 | it('should extract the conference name and year from the playlist', () => {
29 | const input = {
30 | playlist: {
31 | title: 'Hackference 2017',
32 | },
33 | };
34 |
35 | const actual = current(input);
36 |
37 | expect(actual).toHaveProperty('conference.name', 'Hackference');
38 | expect(actual).toHaveProperty('conference.year', 2017);
39 | });
40 | });
41 | });
42 |
--------------------------------------------------------------------------------
/configs/algolia-education.js:
--------------------------------------------------------------------------------
1 | import _ from 'lodash';
2 | export default {
3 | indexName: 'algolia-education',
4 | playlists: [
5 | 'PLuHdbqhRgWHIVm1e43_7mKUJw3UIreV84', // Algolia 101
6 | 'PLuHdbqhRgWHJDATsVq_Mrj3NjIRjREiM1', // Discover Algolia
7 | ],
8 | transformData(rawRecord, helper) {
9 | let record = rawRecord;
10 |
11 | // Trim the "Algolia Build 101" from the start
12 | record = helper.trimKey(record, 'video.title', 'Algolia Build 101 - ');
13 |
14 | // Extract the language if one is defined
15 | const videoTitle = _.get(record, 'video.title');
16 | const matches = videoTitle.match(/for (.*) developers/);
17 | if (!_.isEmpty(matches)) {
18 | const [, language] = matches;
19 | _.set(record, 'language', language);
20 | }
21 |
22 | // Remove the speakers
23 | _.set(record, 'speakers', []);
24 |
25 | return record;
26 | },
27 | transformSettings(rawSettings) {
28 | const settings = rawSettings;
29 |
30 | // Adding custom faceting on the language
31 | const attributesForFaceting = _.get(rawSettings, 'attributesForFaceting');
32 | attributesForFaceting.push('language');
33 | _.set(settings, 'attributesForFaceting', attributesForFaceting);
34 |
35 | return settings;
36 | },
37 | };
38 |
--------------------------------------------------------------------------------
/configs/chatbot_summit.js:
--------------------------------------------------------------------------------
1 | import _ from 'lodash';
2 | export default {
3 | indexName: 'chatbot_summit',
4 | playlists: [
5 | 'PLTr6zBI1qE6ZJLibC66IsVpfkW9LGC6j_', // 2018
6 | 'PLTr6zBI1qE6YLYSi05CYy3O5qYy_M9oze', // 2017
7 | ],
8 | transformData(rawRecord, helper) {
9 | let record = rawRecord;
10 |
11 | // Remove conference name from video titles
12 | record = helper.trimKey(
13 | record,
14 | 'video.title',
15 | 'The 2nd International Chatbot Summit',
16 | 'Chatbot Summit Tel Aviv 2018',
17 | 'Chatbot Summit Berlin 2017'
18 | );
19 |
20 | // Remove speaker name from titles
21 | const speakerNames = _.map(_.get(record, 'speakers'), 'name');
22 | let videoTitle = _.get(record, 'video.title');
23 | if (speakerNames.length === 1) {
24 | _.each(speakerNames, speakerName => {
25 | videoTitle = _.replace(videoTitle, `${speakerName} //`, '');
26 | videoTitle = _.replace(videoTitle, `// ${speakerName}`, '');
27 | });
28 | }
29 |
30 | // remove other cruft
31 | videoTitle = _.replace(videoTitle, '| |', '|');
32 | videoTitle = _.trim(videoTitle, '/|');
33 | videoTitle = _.trim(videoTitle);
34 | _.set(record, 'video.title', videoTitle);
35 |
36 | return record;
37 | },
38 | };
39 |
--------------------------------------------------------------------------------
/src/algolia.settings.js:
--------------------------------------------------------------------------------
1 | import _ from 'lodash';
2 | // We manually disable typo on years
3 | const yearsTypoDisabled = _.times(60, year => `${1970 + year}`);
4 |
5 | const module = {
6 | searchableAttributes: [
7 | 'unordered(video.title)',
8 | 'unordered(speakers.name)',
9 | 'unordered(caption.content)',
10 | 'unordered(conference.name)',
11 | ],
12 | customRanking: [
13 | 'desc(video.hasCaptions)',
14 | 'desc(video.popularity.score)',
15 | 'desc(video.hasManualCaptions)',
16 | 'desc(video.publishedDate.day)',
17 | 'desc(video.duration.minutes)',
18 | 'asc(video.positionInPlaylist)',
19 | 'asc(caption.start)',
20 | ],
21 | attributesForFaceting: [
22 | 'speakers.name',
23 | 'conference.name',
24 | 'conference.year',
25 | 'video.hasManualCaptions',
26 | 'video.id',
27 | 'video.languageCode',
28 | 'caption.languageCode',
29 | 'playlist.id',
30 | 'playlist.title',
31 | 'channel.id',
32 | 'channel.title',
33 | ],
34 | attributesToSnippet: ['caption.content:8'],
35 | distinct: true,
36 | attributeForDistinct: 'video.id',
37 | highlightPreTag: '',
38 | highlightPostTag: '',
39 | advancedSyntax: true,
40 | disableTypoToleranceOnWords: yearsTypoDisabled,
41 | };
42 |
43 | export default module;
44 |
--------------------------------------------------------------------------------
/configs/__tests__/dotconferences.js:
--------------------------------------------------------------------------------
1 | import config from '../dotconferences.js';
2 | import helper from '../config-helper.js';
3 |
4 | describe('dotconferences', () => {
5 | describe('transformData', () => {
6 | let current;
7 | beforeEach(() => {
8 | current = input => config.transformData(input, helper);
9 | });
10 |
11 | it('should extract the conference name from the playlist', () => {
12 | const input = {
13 | playlist: {
14 | title: 'dotJS 2017',
15 | },
16 | };
17 |
18 | const actual = current(input);
19 |
20 | expect(actual).toHaveProperty('conference.name', 'dotJS');
21 | });
22 |
23 | it('should extract title and speaker information', () => {
24 | const input = {
25 | video: {
26 | title: 'dotJS 2013 - Remy Sharp - iframe abuse',
27 | },
28 | };
29 |
30 | const actual = current(input);
31 |
32 | expect(actual).toHaveProperty('speakers', [{ name: 'Remy Sharp' }]);
33 | expect(actual).toHaveProperty('video.title', 'iframe abuse');
34 | });
35 |
36 | it('should keep the title as-is if not following the pattern', () => {
37 | const input = {
38 | video: {
39 | title: 'A day at dotJS 2017',
40 | },
41 | };
42 |
43 | const actual = current(input);
44 |
45 | expect(actual).toHaveProperty('video.title', 'A day at dotJS 2017');
46 | });
47 | });
48 | });
49 |
--------------------------------------------------------------------------------
/configs/voice_summit.js:
--------------------------------------------------------------------------------
1 | import _ from 'lodash';
2 | export default {
3 | indexName: 'voice_summit',
4 | playlists: [
5 | 'PLn51IO3rbkV1E1a6WjgvFtW3VaOCRxzov', // VOICE Summit 2018: Speakers
6 | ],
7 | transformData(rawRecord, helper) {
8 | const record = rawRecord;
9 |
10 | function capitalizeName(speakerName) {
11 | return _.map(_.words(speakerName), _.capitalize).join(' ');
12 | }
13 |
14 | const isPanel = _.startsWith(record.video.title, 'panel');
15 |
16 | // Cleaning title
17 | helper.trimKey(record, 'video.title', 'keynote', 'panel', 'enterprise-');
18 | let originalTitle = _.get(record, 'video.title');
19 | originalTitle = _.trimEnd(originalTitle, '-');
20 |
21 | const split = helper.split(originalTitle, '- ');
22 | let videoTitle;
23 | let speakers;
24 |
25 | // Panels
26 | if (isPanel) {
27 | videoTitle = _.capitalize(split[0]);
28 | speakers = _.map(_.split(split[1], ','), speakerName => ({
29 | name: capitalizeName(speakerName),
30 | }));
31 | }
32 |
33 | if (!isPanel) {
34 | videoTitle = _.map(_.slice(split, 1), _.capitalize).join(' - ');
35 |
36 | speakers = _.map(helper.split(split[0], 'and', ','), speakerName => ({
37 | name: capitalizeName(speakerName),
38 | }));
39 | }
40 |
41 | _.set(record, 'video.title', videoTitle);
42 | _.set(record, 'speakers', speakers);
43 |
44 | return record;
45 | },
46 | };
47 |
--------------------------------------------------------------------------------
/configs/__tests__/algolia-meetups.js:
--------------------------------------------------------------------------------
1 | import _ from 'lodash';
2 | import config from '../algolia-meetups.js';
3 | import helper from '../config-helper.js';
4 |
5 | describe('Algolia Meetups', () => {
6 | describe('transformData', () => {
7 | let current;
8 | beforeEach(() => {
9 | current = input => config.transformData(input, helper);
10 | });
11 |
12 | it('sets the year to 2017', () => {
13 | const input = {};
14 | _.set(input, 'video.publishedDate.timestamp', 1490979292);
15 |
16 | const actual = current(input);
17 |
18 | expect(actual).toHaveProperty('conference.year', 2017);
19 | });
20 |
21 | it('sets the conference name to TechLunch', () => {
22 | const input = {};
23 | _.set(input, 'playlist.title', 'TechLunch videos');
24 |
25 | const actual = current(input);
26 |
27 | expect(actual).toHaveProperty('conference.name', 'TechLunch');
28 | });
29 |
30 | it('sets the conference name to Search Party', () => {
31 | const input = {};
32 | _.set(input, 'playlist.title', 'Algolia Search Party');
33 |
34 | const actual = current(input);
35 |
36 | expect(actual).toHaveProperty('conference.name', 'Search Party');
37 | });
38 |
39 | it('sets the conference name to Meetups', () => {
40 | const input = {};
41 | _.set(input, 'playlist.title', 'Meetups');
42 |
43 | const actual = current(input);
44 |
45 | expect(actual).toHaveProperty('conference.name', 'Meetups');
46 | });
47 | });
48 | });
49 |
--------------------------------------------------------------------------------
/configs/laracon.js:
--------------------------------------------------------------------------------
1 | import _ from 'lodash';
2 | export default {
3 | indexName: 'laracon',
4 | playlists: [
5 | 'PLMdXHJK-lGoB-CIVsiQt0WU8WcYrb5eoe', // Laracon EU 2013 - Full Playlist
6 | 'PLMdXHJK-lGoCYhxlU3OJ5bOGhcKtDMkcN', // Laracon EU 2014 - Full Playlist
7 | 'PLMdXHJK-lGoA9SIsuFy0UWL8PZD1G3YFZ', // Laracon EU 2015 - Full Playlist
8 | 'PLMdXHJK-lGoCMkOxqe82hOC8tgthqhHCN', // Laracon EU 2016 - Full Playlist
9 | 'PLMdXHJK-lGoBFZgG2juDXF6LiikpQeLx2', // Laracon EU 2017 - Full Playlist
10 | ],
11 | transformData(rawRecord, helper) {
12 | let record = rawRecord;
13 |
14 | // Get the place and year for the playlist
15 | record = helper.enrich(
16 | record,
17 | 'playlist.title',
18 | 'Laracon {conference.year} - Full Playlist'
19 | );
20 |
21 | // 2013
22 | if (_.get(record, 'conference.year') === 'EU 2013') {
23 | record = helper.enrich(
24 | record,
25 | 'video.title',
26 | '{_speakers_} - {video.title}'
27 | );
28 | }
29 |
30 | // 2014
31 | if (_.get(record, 'conference.year') === 'EU 2014') {
32 | record = helper.enrich(
33 | record,
34 | 'video.title',
35 | '{_speakers_} - {video.title} at Laracon EU 2014'
36 | );
37 | }
38 |
39 | // 2015
40 | if (_.get(record, 'conference.year') === 'EU 2015') {
41 | record = helper.enrich(
42 | record,
43 | 'video.title',
44 | '{video.title} - {_speakers_} - {_}'
45 | );
46 | }
47 |
48 | // 2016-2017
49 | if (_.includes(['EU 2016', 'EU 2017'], _.get(record, 'conference.year'))) {
50 | record = helper.enrich(
51 | record,
52 | 'video.title',
53 | '{_speakers_} - {video.title} - {_}'
54 | );
55 | }
56 |
57 | return record;
58 | },
59 | };
60 |
--------------------------------------------------------------------------------
/src/__tests__/fileutils.js:
--------------------------------------------------------------------------------
1 | import module from '../fileutils';
2 | import helper from '../test-helper';
3 | const mock = helper.mock(module);
4 |
5 | jest.mock('glob');
6 | import glob from 'glob';
7 | jest.mock('fs');
8 | import fs from 'fs';
9 | jest.mock('pify');
10 | import pify from 'pify';
11 |
12 | describe('fileutils', () => {
13 | describe('glob', () => {
14 | it('is a promise wrapper around glob', async () => {
15 | module._glob = null;
16 | const mockGlob = jest.fn().mockReturnValue('foo');
17 | pify.mockReturnValue(mockGlob);
18 |
19 | const actual = await module.glob('pattern');
20 |
21 | expect(actual).toEqual('foo');
22 | expect(pify).toHaveBeenCalledWith(glob);
23 | expect(mockGlob).toHaveBeenCalledWith('pattern');
24 | });
25 | });
26 |
27 | describe('read', () => {
28 | it('is a promise wrapper around fs.readFile', async () => {
29 | module._readFile = null;
30 | const mockReadFile = jest.fn().mockReturnValue('foo');
31 | pify.mockReturnValue(mockReadFile);
32 |
33 | const actual = await module.read('filepath');
34 |
35 | expect(actual).toEqual('foo');
36 | expect(pify).toHaveBeenCalledWith(fs.readFile);
37 | expect(mockReadFile).toHaveBeenCalledWith('filepath');
38 | });
39 | });
40 |
41 | describe('readJson', () => {
42 | it('should return null if no such file', async () => {
43 | mock('read').mockImplementation(() => {
44 | throw new Error();
45 | });
46 |
47 | const actual = await module.readJson();
48 |
49 | expect(actual).toEqual(null);
50 | });
51 |
52 | it('should return null if not a Json file', async () => {
53 | mock('read', 'foo');
54 |
55 | const actual = await module.readJson();
56 |
57 | expect(actual).toEqual(null);
58 | });
59 |
60 | it('should parse the JSON content as an object', async () => {
61 | mock('read', '{"foo": "bar"}');
62 |
63 | const actual = await module.readJson();
64 |
65 | expect(actual).toHaveProperty('foo', 'bar');
66 | });
67 | });
68 | });
69 |
--------------------------------------------------------------------------------
/configs/writethedocs.js:
--------------------------------------------------------------------------------
1 | import _ from 'lodash';
2 |
3 | export default {
4 | indexName: 'writethedocs',
5 | playlists: [
6 | 'PLZAeFn6dfHpnHBLE4qEUwg1LjhDZEvC2A', // Write the Docs EU 2014
7 | 'PLZAeFn6dfHplFNTsVdBuHk6vPZbsvHtDw', // Write the Docs Europe 2015
8 | 'PLZAeFn6dfHpnN8fXXHwPtPY33aLGGhYLJ', // Write the Docs Europe 2016
9 | 'PLZAeFn6dfHpkBld-70TsOoYToM3CaTxRC', // Write the Docs Portland 2017
10 | 'PLZAeFn6dfHplBYPCwJt6ItkMDt7JSgUiL', // Write the Docs Prague 2017
11 | 'PLZAeFn6dfHplUgfLOLEuHHAm1HdrIyaZ7', // Write the Docs Portland 2018
12 | ],
13 | transformData(rawRecord, helper) {
14 | let record = rawRecord;
15 | const videoTitle = _.get(record, 'video.title');
16 |
17 | // Get the place and year for the year
18 | record = helper.enrich(
19 | record,
20 | 'playlist.title',
21 | 'Write the Docs {conference.year}'
22 | );
23 |
24 | // Keep lightning talks
25 | if (videoTitle && videoTitle.match(/lightning talks/i)) {
26 | return record;
27 | }
28 |
29 | // Portland 2018
30 | if (_.get(record, 'conference.year') === 'Portland 2018') {
31 | record = helper.enrich(
32 | record,
33 | 'video.title',
34 | '{video.title} - {_speakers_} - Write the Docs Portland 2018'
35 | );
36 | return record;
37 | }
38 |
39 | // Prague 2017
40 | if (_.get(record, 'conference.year') === 'Prague 2017') {
41 | record = helper.enrich(
42 | record,
43 | 'video.title',
44 | 'Write the Docs Prague 2017: {video.title} by {_speakers_}'
45 | );
46 | return record;
47 | }
48 |
49 | // Portland 2017
50 | if (_.get(record, 'conference.year') === 'Portland 2017') {
51 | record = helper.enrich(
52 | record,
53 | 'video.title',
54 | 'Write the Docs Portland 2017: {video.title} by {_speakers_}'
55 | );
56 | return record;
57 | }
58 |
59 | // Older conferences
60 | record = helper.enrich(
61 | record,
62 | 'video.title',
63 | '{_speakers_} - {video.title}'
64 | );
65 | return record;
66 | },
67 | };
68 |
--------------------------------------------------------------------------------
/configs/criticalrole.js:
--------------------------------------------------------------------------------
1 | import _ from 'lodash';
2 |
3 | export default {
4 | indexName: 'criticalrole',
5 | playlists: [
6 | 'PL1tiwbzkOjQz7D0l_eLJGAISVtcL7oRu_', // Campaign 1: Vox Machina
7 | 'PL1tiwbzkOjQxD0jjAE7PsWoaCrs0EkBH2', // Campaign 2: The Mighty Nein
8 | ],
9 | transformSettings(rawSettings) {
10 | return {
11 | ...rawSettings,
12 | customRanking: [
13 | 'desc(video.hasCaptions)',
14 | 'asc(video.campaignNumber)',
15 | 'asc(video.episodeNumber)',
16 | 'asc(caption.start)',
17 | ],
18 | attributesForFaceting: [
19 | 'video.id',
20 | 'caption.languageCode',
21 | 'caption.playerName',
22 | 'playlist.id',
23 | 'playlist.title',
24 | 'channel.id',
25 | 'channel.title',
26 | ],
27 | };
28 | },
29 | transformData(rawRecord, helper) {
30 | let record = rawRecord;
31 |
32 | const initialTitle = record.video.title;
33 |
34 | // Campaign 2
35 | if (_.includes(initialTitle, 'Campaign 2')) {
36 | record = helper.enrich(
37 | record,
38 | 'video.title',
39 | '{video.title} | Critical Role | Campaign 2, Episode {video.episodeNumber}'
40 | );
41 | record.video.campaignNumber = 2;
42 | record.video.episodeNumber = _.parseInt(record.video.episodeNumber);
43 | return record;
44 | }
45 |
46 | // Campaign 1
47 | record.video.campaignNumber = 1;
48 | record = helper.enrich(
49 | record,
50 | 'video.title',
51 | '{_} Episode {video.episodeNumber}'
52 | );
53 | const episodeNumber = _.get(record, 'video.episodeNumber');
54 |
55 | const parts = helper.split(initialTitle, '-', '|');
56 | let videoTitle = parts[0];
57 |
58 | const episodePartRegexp = new RegExp('[0-9]* pt. (.*)');
59 | const severalPartsMatch = episodeNumber.match(episodePartRegexp);
60 | if (severalPartsMatch) {
61 | videoTitle = `${videoTitle}, part ${severalPartsMatch[1]}`;
62 | }
63 |
64 | _.set(record, 'video.episodeNumber', _.parseInt(episodeNumber));
65 | _.set(record, 'video.title', videoTitle);
66 |
67 | return record;
68 | },
69 | };
70 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # TalkSearch scraper
2 |
3 | This scraper is a command-line tool that extract information from YouTube
4 | playlists and push them to Algolia.
5 |
6 | ## Usage
7 |
8 | ```shell
9 | yarn index {config_name}
10 | ```
11 |
12 | ## How it works
13 |
14 | The `./configs/` folder contain custom configs, each containing a list of
15 | playlists to index.
16 |
17 | The command will use the YouTube API to fetch data about the defined playlists
18 | and push them to Algolia.
19 |
20 | Captions will be extracted from the videos if they are available. Each record in
21 | Algolia will represent one caption, also containing a `.video`, `.playlist` and
22 | `.channel` key. The `distinct` feature of Algolia is used to group records of
23 | the same video together, to display the most relevant caption each time.
24 |
25 | Each channel will have its own index called `{channel_name}_{channel_id}`. All
26 | videos of all playlists will be saved in this index, but can be filtered based
27 | on the `channel.id` and `playlist.id` keys of the records.
28 |
29 | ## Development
30 |
31 | Start with `yarn install` to load all the dependencies.
32 |
33 | The project will need `ENV` variables to connect to the services.
34 |
35 | * `ALGOLIA_APP_ID` and `ALGOLIA_API_KEY` for pushing records to Algolia
36 | * `YOUTUBE_API_KEY` to connect to the YouTube API
37 | * `GOOGLE_APPLICATION_CREDENTIALS` that point to the path to your
38 | `google.service-account-file.json` ([create one here][2])
39 |
40 | We suggest using a tool like [direnv][1] to load those variables through the use
41 | of a `.envrc` file.
42 |
43 | Once everything is installed, you can run `yarn index {config_name}`
44 |
45 | ## Debug calls
46 |
47 | ### `yarn run index:cache`
48 |
49 | This will read data from a disk cache of previous requests instead of making
50 | actual HTTP calls. If there is no cache hit for the request, it will do it for
51 | real.
52 |
53 | This should be the preferred way of running the command for debugging purposes.
54 |
55 | ### `yarn run index:logs`
56 |
57 | This will log all HTTP calls raw responses to disk. This is useful when
58 | debugging, as it allow to dig into the responses of the APIs called.
59 |
60 | [1]: https://direnv.net/
61 | [2]: https://console.cloud.google.com/apis/credentials/serviceaccountkey
62 |
--------------------------------------------------------------------------------
/configs/dotconferences.js:
--------------------------------------------------------------------------------
1 | export default {
2 | indexName: 'dotconferences',
3 | playlists: [
4 | 'PLMW8Xq7bXrG7LL-bLSweRFmFw7y2HhypC', // dotJS 2018
5 | 'PLMW8Xq7bXrG702XVNfv_zqfFdt-498iV_', // dotCSS 2018
6 | 'PLMW8Xq7bXrG4zEMLdfZTpS9VCKjXeD--h', // dotScale 2018
7 | 'PLMW8Xq7bXrG6M2Nabwt3LuBxZyHVHRZhf', // dotAI 2018
8 | 'PLMW8Xq7bXrG4OC1CZW7m-davg4p4ZCBmZ', // dotSwift 2018
9 |
10 | 'PLMW8Xq7bXrG4gs_BDyI7q009IVDUMQRXB', // dotJS 2017
11 | 'PLMW8Xq7bXrG7acNjsU5YMGl5MMK5gl2vn', // dotGo 2017
12 | 'PLMW8Xq7bXrG7xzLo4j6bDznWzH7ZDc3wx', // dotSecurity 2017
13 | 'PLMW8Xq7bXrG4AcSG9ZcqvMQSp6f0C7mi5', // dotSwift 2017
14 | 'PLMW8Xq7bXrG78Xxnlxov8N_M9mNUN-1Ny', // dotCSS 2017
15 | 'PLMW8Xq7bXrG7fNNYHvpeagKHw4DaUkgud', // dotScale 2017
16 | 'PLMW8Xq7bXrG6-vlD0QFfFf0oi5vtTDcmQ', // dotAI 2017
17 |
18 | 'PLMW8Xq7bXrG6tcAXDsAVATUbrflLOsIG_', // dotGo 2016
19 | 'PLMW8Xq7bXrG7AAvnkys8joKEq8uMGykx7', // dotScale 2016
20 | 'PLMW8Xq7bXrG7XSuKb3M3bSJ4d1XM0Z-gI', // dotCSS 2016
21 | 'PLMW8Xq7bXrG7rZnRaYCel_RJY5yAXLQ2H', // dotJS 2016
22 | 'PLMW8Xq7bXrG4jymjKULrw5_yEvK3uzATe', // dotSecurity 2016
23 |
24 | 'PLMW8Xq7bXrG70G62mxQR0OC4GkUcNLRnC', // dotJS 2015
25 | 'PLMW8Xq7bXrG5kujoYQdw94ip3cnV4WR59', // dotCSS 2015
26 | 'PLMW8Xq7bXrG4Vw-JAnBmqA2IqzM2sf2Na', // dotGo 2015
27 | 'PLMW8Xq7bXrG64KRc6PC0JLWFX2ygzFJDG', // dotScale 2015
28 |
29 | 'PLMW8Xq7bXrG5B_oW-EX8AuLDG0BCwouis', // dotCSS 2014
30 | 'PLMW8Xq7bXrG4bTkovexbhgrcD8BVyHmiS', // dotJS 2014
31 | 'PLMW8Xq7bXrG58Qk-9QSy2HRh2WVeIrs7e', // dotGo 2014
32 | 'PLMW8Xq7bXrG4pl13YVsKkaAUDeLdnrEQZ', // dotScale 2014
33 |
34 | 'PLMW8Xq7bXrG6ZItH9Oq2tceeTS0fjXyii', // dotRB 2013
35 | 'PLMW8Xq7bXrG77SV1VAAiAciRyq3VSC2Gq', // dotJS 2012
36 | 'PLMW8Xq7bXrG486Mh95hKjiXRdci60zUlL', // dotJS 2013
37 | 'PLMW8Xq7bXrG7XGG29sXso2hYYNW_14s_A', // dotScale 2013
38 | ],
39 | transformData(rawRecord, helper) {
40 | let record = rawRecord;
41 |
42 | // Get conference name from the playlist title
43 | record = helper.enrich(record, 'playlist.title', '{conference.name} {_}');
44 |
45 | // Extract speaker name and video title from title
46 | record = helper.enrich(
47 | record,
48 | 'video.title',
49 | '{_} - {_speakers_} - {video.title}'
50 | );
51 |
52 | return record;
53 | },
54 | };
55 |
--------------------------------------------------------------------------------
/configs/__tests__/chatbot_summit.js:
--------------------------------------------------------------------------------
1 | import config from '../chatbot_summit.js';
2 | import helper from '../config-helper.js';
3 |
4 | describe('Chatbot Summit', () => {
5 | describe('transformData', () => {
6 | let current;
7 | beforeEach(() => {
8 | current = input => config.transformData(input, helper);
9 | });
10 |
11 | it('2017, Chatbots ready for enterprise', () => {
12 | const input = {
13 | speakers: [{ name: 'Piyush Chandra' }],
14 | video: {
15 | title:
16 | 'Piyush Chandra // Are Chatbots ready for Enterprise? // Chatbot Summit Berlin 2017',
17 | },
18 | };
19 |
20 | const actual = current(input);
21 |
22 | expect(actual).toHaveProperty(
23 | 'video.title',
24 | 'Are Chatbots ready for Enterprise?'
25 | );
26 | });
27 |
28 | it('2017, The Ethical Beliefs of Machines', () => {
29 | const input = {
30 | speakers: [{ name: 'Nicolai Andersen' }],
31 | video: {
32 | title:
33 | 'Chatbot Summit Berlin 2017 // Nicolai Andersen // The Ethical Beliefs of Machines',
34 | },
35 | };
36 |
37 | const actual = current(input);
38 |
39 | expect(actual).toHaveProperty(
40 | 'video.title',
41 | 'The Ethical Beliefs of Machines'
42 | );
43 | });
44 |
45 | it('2017, Opening Keynote', () => {
46 | const input = {
47 | video: {
48 | title:
49 | 'Yoav Barel, Founder & CEO Chatbot Summit | The 2nd International Chatbot Summit | Opening Keynote',
50 | },
51 | };
52 |
53 | const actual = current(input);
54 |
55 | expect(actual).toHaveProperty(
56 | 'video.title',
57 | 'Yoav Barel, Founder & CEO Chatbot Summit | Opening Keynote'
58 | );
59 | });
60 |
61 | it('2017, The Secrets of Bots at Scale', () => {
62 | const input = {
63 | speakers: [{ name: 'Eran Vanounou' }, { name: 'Adam Orentlicher' }],
64 | video: {
65 | title:
66 | 'Eran Vanounou and Adam Orentlicher // The Secrets of Bots at Scale',
67 | },
68 | };
69 |
70 | const actual = current(input);
71 |
72 | expect(actual).toHaveProperty(
73 | 'video.title',
74 | 'Eran Vanounou and Adam Orentlicher // The Secrets of Bots at Scale'
75 | );
76 | });
77 | });
78 | });
79 |
--------------------------------------------------------------------------------
/src/fileutils.js:
--------------------------------------------------------------------------------
1 | import stringify from 'json-stable-stringify';
2 | import _ from 'lodash';
3 | import fs from 'fs';
4 | import path from 'path';
5 | import mkdirpCallback from 'mkdirp';
6 | import glob from 'glob';
7 | import pify from 'pify';
8 | const writeFile = pify(fs.writeFile);
9 | const mkdirp = pify(mkdirpCallback);
10 |
11 | const module = {
12 | /**
13 | * Wrapper around glob() to work as a promise
14 | * @param {String} pattern Glob pattern to match
15 | * @returns {Array} Array of files matching
16 | **/
17 | async glob(pattern) {
18 | if (!this._glob) {
19 | this._glob = pify(glob);
20 | }
21 | return await this._glob(pattern);
22 | },
23 |
24 | /**
25 | * Read anyfile on disk
26 | * @param {String} filepath Filepath of the file to read
27 | * @returns {String} Content of the file read
28 | **/
29 | async read(filepath) {
30 | if (!this._readFile) {
31 | this._readFile = pify(fs.readFile);
32 | }
33 | return await this._readFile(filepath);
34 | },
35 |
36 | /**
37 | * Read a JSON file on disk and return its parsed content.
38 | * @param {String} source Path to the Json file
39 | * @return {Promise.