├── .nvmrc
├── .nojekyll
├── docs
    ├── .keep
    ├── QA
    │   ├── .keep
    │   ├── apple-script-testing.md
    │   └── README.md
    ├── guides
    │   ├── .keep
    │   ├── README.md
    │   ├── npm-tags.md
    │   ├── dpe-transcript-format.md
    │   └── features-list.md
    ├── notes
    │   ├── .keep
    │   ├── README.md
    │   ├── pause-while-typing.md
    │   ├── insert-text-at-selection.md
    │   ├── web-workers.md
    │   ├── insert-slate-functions.md
    │   ├── pause-while-typing-timer.md
    │   ├── OHMS.md
    │   ├── debounce.md
    │   ├── css-injection-karaoke.md
    │   ├── set-selection.md
    │   ├── deconstructing word timing computation.md
    │   ├── verbose-generate-previous-timings-up-to-current-func.md
    │   ├── draftjs-vs-slatejs.md
    │   ├── notes.md
    │   └── alternative-alignment-approaches.md
    ├── adr
    │   ├── README.md
    │   └── adr-template.md
    ├── README.md
    └── SUMMARY.md
├── .gitbook.yaml
├── .prettierignore
├── .prettierrc
├── src
    ├── components
    │   ├── slate-helpers
    │   │   ├── handle-split-paragraph
    │   │   │   ├── is-same-block.js
    │   │   │   ├── is-selection-collapsed.js
    │   │   │   ├── is-beginning-of-the-block.js
    │   │   │   ├── split-text-at-offset.js
    │   │   │   ├── split-words-list-at-offset.js
    │   │   │   ├── is-end-of-the-block.js
    │   │   │   ├── is-text-same-as-words-list.js
    │   │   │   └── index.js
    │   │   ├── break-paragraph
    │   │   │   └── index.js
    │   │   ├── insert-text
    │   │   │   └── index.js
    │   │   ├── README.md
    │   │   ├── set-node
    │   │   │   └── index.js
    │   │   ├── set-selection
    │   │   │   └── index.js
    │   │   ├── collapse-selection-to-a-single-point
    │   │   │   └── index.js
    │   │   ├── get-node-by-path
    │   │   │   └── index.js
    │   │   ├── merge-nodes
    │   │   │   └── index.js
    │   │   ├── remove-nodes
    │   │   │   └── index.js
    │   │   ├── get-closest-block
    │   │   │   └── index.js
    │   │   ├── split-nodes
    │   │   │   └── index.js
    │   │   ├── create-new-paragraph-block
    │   │   │   └── index.js
    │   │   ├── index.js
    │   │   ├── insert-nodes-at-selection
    │   │   │   └── index.js
    │   │   ├── get-selection-nodes
    │   │   │   └── index.js
    │   │   └── handle-delete-in-paragraph
    │   │   │   └── index.js
    │   ├── 3-SlateSimpleEditor.stories.js
    │   ├── 2-Longer.stories.js
    │   ├── 5-Saving.stories.js
    │   ├── 4-Live.stories.js
    │   ├── 6-CustomTheme.stories.js
    │   └── 1-SlateTranscriptEditor.stories.js
    ├── util
    │   ├── export-adapters
    │   │   ├── subtitles-generator
    │   │   │   ├── compose-subtitles
    │   │   │   │   ├── util
    │   │   │   │   │   ├── format-seconds.js
    │   │   │   │   │   ├── escape-text.js
    │   │   │   │   │   └── tc-format.js
    │   │   │   │   ├── srt.js
    │   │   │   │   ├── vtt.js
    │   │   │   │   ├── ttml.js
    │   │   │   │   ├── premiere.js
    │   │   │   │   ├── csv
    │   │   │   │   │   ├── index.test.js
    │   │   │   │   │   └── index.js
    │   │   │   │   └── itt.js
    │   │   │   ├── presegment-text
    │   │   │   │   ├── line-break-between-sentences
    │   │   │   │   │   ├── index.js
    │   │   │   │   │   ├── index.test.js
    │   │   │   │   │   └── README.md
    │   │   │   │   ├── util
    │   │   │   │   │   ├── remove-space-after-carriage-return.js
    │   │   │   │   │   └── remove-space-at-beginning-of-line.js
    │   │   │   │   ├── text-segmentation
    │   │   │   │   │   ├── HONORIFICS.txt
    │   │   │   │   │   ├── index.js
    │   │   │   │   │   ├── index.test.js
    │   │   │   │   │   └── README.md
    │   │   │   │   ├── index.test.js
    │   │   │   │   ├── divide-into-two-lines
    │   │   │   │   │   ├── index.js
    │   │   │   │   │   ├── index.test.js
    │   │   │   │   │   └── README.md
    │   │   │   │   ├── fold
    │   │   │   │   │   ├── index.test.js
    │   │   │   │   │   ├── README.md
    │   │   │   │   │   └── index.js
    │   │   │   │   ├── index.js
    │   │   │   │   ├── README.md
    │   │   │   │   └── steps.md
    │   │   │   ├── list.js
    │   │   │   ├── example-usage.js
    │   │   │   ├── index.js
    │   │   │   └── sample
    │   │   │   │   └── test.sample.txt
    │   │   ├── txt
    │   │   │   └── index.js
    │   │   ├── index.js
    │   │   ├── slate-to-dpe
    │   │   │   ├── update-timestamps
    │   │   │   │   ├── plain-text-align-to-slate.js
    │   │   │   │   └── update-bloocks-timestamps.js
    │   │   │   └── index.js
    │   │   └── docx
    │   │   │   └── index.js
    │   ├── is-empty
    │   │   └── index.js
    │   ├── pluk
    │   │   ├── index.js
    │   │   └── README.md
    │   ├── convert-words-to-text
    │   │   └── index.js
    │   ├── get-media-type
    │   │   └── index.js
    │   ├── get-words-for-paragraph
    │   │   └── index.js
    │   ├── timecode-converter
    │   │   ├── src
    │   │   │   ├── timecodeToSeconds.test.js
    │   │   │   ├── secondsToTimecode.test.js
    │   │   │   ├── timecodeToSeconds.js
    │   │   │   ├── padTimeToTimecode.js
    │   │   │   ├── padTimeToTimecode.test.js
    │   │   │   └── secondsToTimecode.js
    │   │   ├── index.js
    │   │   └── index.test.js
    │   ├── downlaod
    │   │   └── index.js
    │   ├── count-words
    │   │   └── index.js
    │   ├── dpe-to-slate
    │   │   ├── generate-previous-timings-up-to-current
    │   │   │   └── index.js
    │   │   └── index.js
    │   └── insert-timecodes-in-line-in-words-list
    │   │   └── index.js
    ├── index.js
    └── sample-data
    │   └── segmented-transcript.js
├── .babelrc
├── .storybook
    └── main.js
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── question.md
    │   ├── feature_request.md
    │   ├── bug_report.md
    │   ├── qa_individual_issue_report.md
    │   └── qa_report.md
    └── PULL_REQUEST_TEMPLATE.md
├── .npmignore
├── LICENCE.md
├── .gitignore
├── package.json
├── CONTRIBUTING.md
└── CODE_OF_CONDUCT.md


/.nvmrc:
--------------------------------------------------------------------------------
1 | 12


--------------------------------------------------------------------------------
/.nojekyll:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/.keep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/QA/.keep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/guides/.keep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/notes/.keep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitbook.yaml:
--------------------------------------------------------------------------------
1 | root: ./docs/


--------------------------------------------------------------------------------
/docs/adr/README.md:
--------------------------------------------------------------------------------
1 | # ADR
2 | 
3 | 


--------------------------------------------------------------------------------
/docs/notes/README.md:
--------------------------------------------------------------------------------
1 | # notes
2 | 
3 | 


--------------------------------------------------------------------------------
/docs/guides/README.md:
--------------------------------------------------------------------------------
1 | # Guides
2 | 
3 | 


--------------------------------------------------------------------------------
/.prettierignore:
--------------------------------------------------------------------------------
1 | **/node_modules
2 | *.json
3 | .out/
4 | dist/


--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 |   "tabWidth": 2,
3 |   "singleQuote": true,
4 |   "trailingComma": "es5",
5 |   "printWidth": 150,
6 |   "bracketSpacing": true,
7 |   "jsxBracketSameLine": false
8 | }


--------------------------------------------------------------------------------
/docs/notes/pause-while-typing.md:
--------------------------------------------------------------------------------
1 | # pause-while-typing
2 | 
3 | [Wait for User to Stop Typing, in JavaScript](https://schier.co/blog/wait-for-user-to-stop-typing-using-javascript)
4 | 
5 | 


--------------------------------------------------------------------------------
/src/components/slate-helpers/handle-split-paragraph/is-same-block.js:
--------------------------------------------------------------------------------
1 | function isSameBlock(anchorPath, focusPath) {
2 |   return anchorPath[0] === focusPath[0];
3 | }
4 | export default isSameBlock;
5 | 


--------------------------------------------------------------------------------
/.babelrc:
--------------------------------------------------------------------------------
1 | {
2 |     "plugins": ["transform-react-jsx"],
3 |     "ignore": [
4 |       "src/components/*.storiesjs",
5 |       "src/components/sample-data/**"
6 |     ],
7 |     "presets":["@babel/preset-react"]
8 |   }


--------------------------------------------------------------------------------
/src/components/slate-helpers/break-paragraph/index.js:
--------------------------------------------------------------------------------
1 | import { Editor } from 'slate';
2 | 
3 | const breakParagraph = (editor) => {
4 |   Editor.insertBreak(editor);
5 | };
6 | 
7 | export default breakParagraph;
8 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/compose-subtitles/util/format-seconds.js:
--------------------------------------------------------------------------------
1 | const formatSeconds = seconds => new Date(seconds.toFixed(3) * 1000).toISOString().substr(11, 12);
2 | 
3 | export default formatSeconds;
4 | 


--------------------------------------------------------------------------------
/src/components/slate-helpers/handle-split-paragraph/is-selection-collapsed.js:
--------------------------------------------------------------------------------
1 | function isSelectionCollapsed(anchorOffset, focusOffset) {
2 |   return anchorOffset === focusOffset;
3 | }
4 | export default isSelectionCollapsed;
5 | 


--------------------------------------------------------------------------------
/src/util/is-empty/index.js:
--------------------------------------------------------------------------------
1 | // https://stackoverflow.com/questions/679915/how-do-i-test-for-an-empty-javascript-object
2 | function isEmpty(obj) {
3 |   return Object.keys(obj).length === 0;
4 | }
5 | 
6 | export default isEmpty;
7 | 


--------------------------------------------------------------------------------
/src/components/slate-helpers/insert-text/index.js:
--------------------------------------------------------------------------------
1 | import { Transforms } from 'slate';
2 | const insertText = ({ editor, text = '[INAUDIBLE]' }) => {
3 |   Transforms.insertText(editor, text);
4 | };
5 | 
6 | export default insertText;
7 | 


--------------------------------------------------------------------------------
/.storybook/main.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   stories: ['../src/components/**/*.stories.js'],
3 |   addons: ['@storybook/addon-actions', '@storybook/addon-links', '@storybook/addon-knobs/register', '@storybook/addon-storysource'],
4 | };
5 | 


--------------------------------------------------------------------------------
/docs/notes/insert-text-at-selection.md:
--------------------------------------------------------------------------------
1 | # insert-text-at-selection
2 | 
3 | ```javascript
4 | Transforms.insertText(editor, 'res');
5 | ```
6 | 
7 | [https://github.com/pietrop/slate-snippets](https://github.com/pietrop/slate-snippets)
8 | 
9 | 


--------------------------------------------------------------------------------
/src/components/slate-helpers/handle-split-paragraph/is-beginning-of-the-block.js:
--------------------------------------------------------------------------------
1 | function isBeginningOftheBlock(anchorOffset, focusOffset) {
2 |   return anchorOffset === 0 && focusOffset === 0;
3 | }
4 | 
5 | export default isBeginningOftheBlock;
6 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/question.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Question
 3 | about: Ask a question about this project
 4 | title: ''
 5 | labels: bug
 6 | assignees: 
 7 | 
 8 | ---
 9 | 
10 | <!-- _where applicable, please provide context and use case around your question._ -->


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/presegment-text/line-break-between-sentences/index.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 | 
3 | function addLineBreakBetweenSentences(text) {
4 |   return text.replace(/\n/g, '\n\n');
5 | }
6 | 
7 | export default addLineBreakBetweenSentences;
8 | 


--------------------------------------------------------------------------------
/src/util/pluk/index.js:
--------------------------------------------------------------------------------
1 | /*
2 |  * Pluck Unique Values from Array of Javascript Objects
3 |  * https://gist.github.com/JamieMason/bed71c73576ba8d70a4671ea91b6178e
4 |  */
5 | const pluck = key => array => Array.from(new Set(array.map(obj => obj[key])));
6 | 
7 | export default pluck;
8 | 


--------------------------------------------------------------------------------
/docs/notes/web-workers.md:
--------------------------------------------------------------------------------
1 | # web workers
2 | 
3 | * [Parallel programming in JavaScript using Web Workers](https://itnext.io/achieving-parallelism-in-javascript-using-web-workers-8f921f2d26db)
4 | * [Electron Documentation  - Multithreading - Web Workers](https://www.electronjs.org/docs/tutorial/multithreading)
5 | 
6 | 


--------------------------------------------------------------------------------
/docs/notes/insert-slate-functions.md:
--------------------------------------------------------------------------------
 1 | ```js
 2 | const breakParagraph = () => {
 3 |   Editor.insertBreak(editor);
 4 | };
 5 | const insertTextInaudible = () => {
 6 |   Transforms.insertText(editor, '[INAUDIBLE]');
 7 | };
 8 | 
 9 | const handleInsertMusicNote = () => {
10 |   Transforms.insertText(editor, '♫'); // or ♪
11 | };
12 | ```
13 | 


--------------------------------------------------------------------------------
/docs/notes/pause-while-typing-timer.md:
--------------------------------------------------------------------------------
 1 | ```js
 2 | if (saveTimer !== null) {
 3 |   clearTimeout(saveTimer);
 4 | }
 5 | const tmpSaveTimer = setTimeout(() => {
 6 |   if (mediaRef && mediaRef.current) {
 7 |     mediaRef.current.play();
 8 |   }
 9 | }, PAUSE_WHILTE_TYPING_TIMEOUT_MILLISECONDS);
10 | setSaveTimer(tmpSaveTimer);
11 | ```
12 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/compose-subtitles/util/escape-text.js:
--------------------------------------------------------------------------------
 1 | const AMP_REGEX = /&/g;
 2 | const LT_REGEX = /</g;
 3 | const GT_REGEX = />/g;
 4 | const escapeText = str =>
 5 |   str
 6 |     .replace(AMP_REGEX, '&amp;')
 7 |     .replace(LT_REGEX, '&lt;')
 8 |     .replace(GT_REGEX, '&gt;');
 9 | 
10 | export default escapeText;
11 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/compose-subtitles/util/tc-format.js:
--------------------------------------------------------------------------------
 1 | // for itt
 2 | import TC from 'smpte-timecode';
 3 | 
 4 | const tcFormat = (frames, FPS) => {
 5 |   const tc = TC(Math.round(frames), FPS, false);
 6 | 
 7 |   return tc.toString().replace(/^00/, '01'); // FIXME this breaks on videos longer than 1h!
 8 | };
 9 | 
10 | export default tcFormat;
11 | 


--------------------------------------------------------------------------------
/src/components/slate-helpers/README.md:
--------------------------------------------------------------------------------
1 | Helpers are modules specifically for slateJs.
2 | 
3 | sSome are simple wrapper around slateJs utilities, to keep some flexibility if the slateJS interface where to change in future versions.
4 | 
5 | Others are slightly more complex operations. Specific of the timed text domain.
6 | 
7 | More generic modules live under `src/utils` folder.
8 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/presegment-text/util/remove-space-after-carriage-return.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Helper function to remove space after carriage return \n in lines
 3 |  * @param {string} text
 4 |  */
 5 | function removeSpaceAfterCarriageReturn(text) {
 6 |   return text.replace(/\n /g, '\n');
 7 | }
 8 | 
 9 | export default removeSpaceAfterCarriageReturn;
10 | 


--------------------------------------------------------------------------------
/src/components/slate-helpers/set-node/index.js:
--------------------------------------------------------------------------------
 1 | /*
 2 | https://github.com/dylans/slate-snippets#set-node
 3 | 
 4 | Transforms.setNodes(editor, { type: 'paragraph' }, { at: path });
 5 | */
 6 | 
 7 | import { Transforms } from 'slate';
 8 | 
 9 | function setNode({ editor, block, path }) {
10 |   Transforms.setNodes(editor, block, { at: path });
11 | }
12 | 
13 | export default setNode;
14 | 


--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
 1 | src
 2 | demo
 3 | lib
 4 | packages
 5 | build
 6 | .babelrc
 7 | .babel.config.js
 8 | webpack.config.js
 9 | 
10 | CONTRIBUTING.md  
11 | CODE_OF_CONDUCT.md      
12 | .github/
13 | docs/
14 | src/sample-data/
15 | .out/
16 | .gitbook.yaml 
17 | .nvmrc  
18 | .nojekyll  
19 | .storybook/main.js  
20 | *.sample.json
21 | *.sample.xml
22 | *.sample.txt     
23 | *.stories.js
24 | *.test.js 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # Slate Transcript Editor - Docs
 2 | 
 3 | _work in progress_
 4 | 
 5 | Docs for [pietrop/slate-transcript-editor](https://github.com/pietrop/slate-transcript-editor)
 6 | 
 7 | - [github repo](https://github.com/pietrop/slate-transcript-editor)
 8 | - [storybook](https://pietropassarelli.com/slate-transcript-editor)
 9 | - [gitbook](https://autoedit.gitbook.io/slate-transcript-editor-docs/)
10 | 


--------------------------------------------------------------------------------
/src/util/convert-words-to-text/index.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Helper function
 3 |  * @param {array} words - dpe word objeect, with at list text attribute to be able to convert to string of text
 4 |  */
 5 | function convertWordsToText(words) {
 6 |   return words
 7 |     .map((word) => {
 8 |       return word.text ? word.text.trim() : '';
 9 |     })
10 |     .join(' ');
11 | }
12 | 
13 | export default convertWordsToText;
14 | 


--------------------------------------------------------------------------------
/src/components/slate-helpers/set-selection/index.js:
--------------------------------------------------------------------------------
1 | // https://docs.slatejs.org/api/transforms#transforms-setselection-editor-editor-props-partial-less-than-range-greater-than
2 | // Set new properties on the selection.
3 | import { Transforms } from 'slate';
4 | function setSelection({ editor, nextPoint }) {
5 |   Transforms.setSelection(editor, { anchor: nextPoint, focus: nextPoint });
6 | }
7 | export default setSelection;
8 | 


--------------------------------------------------------------------------------
/src/util/get-media-type/index.js:
--------------------------------------------------------------------------------
 1 | import path from 'path';
 2 | 
 3 | const getMediaType = (mediaUrl) => {
 4 |   const clipExt = path.extname(mediaUrl);
 5 |   let tmpMediaType = 'video';
 6 |   if (clipExt === '.wav' || clipExt === '.mp3' || clipExt === '.m4a' || clipExt === '.flac' || clipExt === '.aiff') {
 7 |     tmpMediaType = 'audio';
 8 |   }
 9 |   return tmpMediaType;
10 | };
11 | 
12 | export default getMediaType;
13 | 


--------------------------------------------------------------------------------
/src/components/slate-helpers/handle-split-paragraph/split-text-at-offset.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  *
 3 |  * @param {string} text -text string
 4 |  * @param {number} offset - offset char number position/index
 5 |  */
 6 | function splitTextAtOffset(text, offset) {
 7 |   const textBefore = text.slice(0, offset);
 8 |   const textAfter = text.slice(offset);
 9 |   return [textBefore, textAfter];
10 | }
11 | 
12 | export default splitTextAtOffset;
13 | 


--------------------------------------------------------------------------------
/src/components/slate-helpers/collapse-selection-to-a-single-point/index.js:
--------------------------------------------------------------------------------
 1 | // https://docs.slatejs.org/api/transforms#transforms-collapse-editor-editor-options
 2 | // Collapse the selection to a single point.
 3 | // Options: {edge?: 'anchor' | 'focus' | 'start' | 'end'}
 4 | 
 5 | import { Transforms } from 'slate';
 6 | function collapseSelectionToAsinglePoint(editor) {
 7 |   Transforms.collapse(editor, { edge: 'start' });
 8 | }
 9 | 
10 | export default collapseSelectionToAsinglePoint;
11 | 


--------------------------------------------------------------------------------
/src/components/slate-helpers/handle-split-paragraph/split-words-list-at-offset.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  *
 3 |  * @param {string} text -text string
 4 |  * @param {number} offset - offset char number position/index
 5 |  */
 6 | function splitWordsListAtOffset(words, offset) {
 7 |   const tmpWords = JSON.parse(JSON.stringify(words));
 8 |   const wordsAfter = tmpWords.splice(offset);
 9 |   const wordsBefore = tmpWords;
10 |   return [wordsBefore, wordsAfter];
11 | }
12 | 
13 | export default splitWordsListAtOffset;
14 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/compose-subtitles/srt.js:
--------------------------------------------------------------------------------
 1 | import formatSeconds from './util/format-seconds.js';
 2 | const srtGenerator = vttJSON => {
 3 |   let srtOut = '';
 4 |   vttJSON.forEach((v, i) => {
 5 |     srtOut += `${i + 1}\n${formatSeconds(parseFloat(v.start)).replace('.', ',')} --> ${formatSeconds(parseFloat(v.end)).replace(
 6 |       '.',
 7 |       ','
 8 |     )}\n${v.text.trim()}\n\n`;
 9 |   });
10 | 
11 |   return srtOut;
12 | };
13 | 
14 | export default srtGenerator;
15 | 


--------------------------------------------------------------------------------
/src/components/slate-helpers/get-node-by-path/index.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Get node by path
 3 |  * https://github.com/dylans/slate-snippets#get-node-by-path
 4 |  * Get the descendant node referred to by a specific path. If the path is an empty array, get the root node itself.
 5 |  * https://docs.slatejs.org/api/nodes
 6 |  */
 7 | import { Node } from 'slate';
 8 | function getNodebyPath({ editor, path }) {
 9 |   const node = Node.get(editor, path);
10 |   return node;
11 | }
12 | 
13 | export default getNodebyPath;
14 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/presegment-text/util/remove-space-at-beginning-of-line.js:
--------------------------------------------------------------------------------
 1 | // Remove preceding empty space a beginning of line
 2 | // without removing carriage returns
 3 | // https://stackoverflow.com/questions/24282158/javascript-how-to-remove-the-white-space-at-the-start-of-the-string
 4 | 
 5 | function removeSpaceAtBeginningOfLine(text) {
 6 |   return text.map(r => {
 7 |     return r.replace(/^\s+/g, '');
 8 |   });
 9 | }
10 | 
11 | export default removeSpaceAtBeginningOfLine;
12 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/compose-subtitles/vtt.js:
--------------------------------------------------------------------------------
 1 | import formatSeconds from './util/format-seconds.js';
 2 | 
 3 | const vttGenerator = (vttJSON, speakers = false) => {
 4 |   let vttOut = 'WEBVTT\n\n';
 5 |   vttJSON.forEach((v, i) => {
 6 |     vttOut += `${i + 1}\n${formatSeconds(parseFloat(v.start))} --> ${formatSeconds(parseFloat(v.end))}\n${speakers ? `<v ${v.speaker}>` : ``}${
 7 |       v.text
 8 |     }\n\n`;
 9 |   });
10 | 
11 |   return vttOut;
12 | };
13 | 
14 | export default vttGenerator;
15 | 


--------------------------------------------------------------------------------
/src/components/slate-helpers/merge-nodes/index.js:
--------------------------------------------------------------------------------
 1 | /*
 2 | https://docs.slatejs.org/api/transforms#transforms-mergenodes-editor-editor-options
 3 | Merge a node at the specified location with the previous node at the same depth. If no location is specified, use the selection. Resulting empty container nodes are removed.
 4 | Options supported: NodeOptions & {hanging?: boolean}
 5 | */
 6 | import { Transforms } from 'slate';
 7 | 
 8 | function mergeNodes({ editor, options = {} }) {
 9 |   Transforms.mergeNodes(editor, options);
10 | }
11 | export default mergeNodes;
12 | 


--------------------------------------------------------------------------------
/src/index.js:
--------------------------------------------------------------------------------
 1 | import SlateTranscriptEditor from './components/index.js';
 2 | import { secondsToTimecode, timecodeToSeconds, shortTimecode } from './util/timecode-converter/index.js';
 3 | import convertDpeToSlate from './util/dpe-to-slate/index.js';
 4 | import converSlateToDpe from './util/export-adapters/slate-to-dpe/index.js';
 5 | import slateToText from './util/export-adapters/txt';
 6 | 
 7 | export default SlateTranscriptEditor;
 8 | 
 9 | export { SlateTranscriptEditor, secondsToTimecode, timecodeToSeconds, shortTimecode, convertDpeToSlate, converSlateToDpe, slateToText };
10 | 


--------------------------------------------------------------------------------
/src/components/slate-helpers/remove-nodes/index.js:
--------------------------------------------------------------------------------
 1 | /*
 2 | https://docs.slatejs.org/api/transforms#transforms-removenodes-editor-editor-options
 3 | 
 4 | Transforms.removeNodes(editor: Editor, options?)
 5 | Remove nodes at the specified location in the document. If no location is specified, remove the nodes in the selection.
 6 | Options supported: NodeOptions & {hanging?: boolean}
 7 | 
 8 | */
 9 | import { Transforms } from 'slate';
10 | 
11 | function removeNodes({ editor, options = {} }) {
12 |   Transforms.removeNodes(editor, options);
13 | }
14 | export default removeNodes;
15 | 


--------------------------------------------------------------------------------
/src/util/get-words-for-paragraph/index.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  *
 3 |  * @param {*} currentParagraph a dpe paragraph object, with start, and end attribute eg in seconds
 4 |  * @param {*} words a list of word objects with start and end attributes
 5 |  * @returns a lsit of words obejcts that are included in the given paragraphs
 6 |  */
 7 | const getWordsForParagraph = (currentParagraph, words) => {
 8 |   const { start, end } = currentParagraph;
 9 |   return words.filter((word) => {
10 |     return word.start >= start && word.end <= end;
11 |   });
12 | };
13 | 
14 | export default getWordsForParagraph;
15 | 


--------------------------------------------------------------------------------
/docs/notes/OHMS.md:
--------------------------------------------------------------------------------
 1 | OHMS
 2 | 
 3 | OHMS is an open source indexing tool created by the University of Kentucky, which is used by a number of cultural heritage institutions
 4 | 
 5 | https://www.oralhistoryonline.org/
 6 | 
 7 | Example
 8 | 
 9 | https://kentuckyoralhistory.org/ark:/16417/xt71d837kj8dp
10 | you have to toggle to “Play Interview”
11 | 
12 | it uses xml for the the index and a Word doc for the transcript (if a transcript exists) with timecodes at 30 second or 60 second intervals written in-line in the format of [hh:mm:ss]
13 | 
14 | `slate-transcript-editor` OHMS export option exports the word part.
15 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | **Is your Pull Request request related to another issue in this repository ?**      
 2 | <!-- _If so please link to other issues and PRs as appropriate_ -->
 3 | 
 4 | **Describe what the PR does**    
 5 | <!-- _A clear and concise description of what the PR does. Feel free to use bulletpoints and checkboxes if needed [...]_ -->
 6 | 
 7 | 
 8 | **State whether the PR is ready for review or whether it needs extra work**    
 9 | <!-- _If you are still working on it and just setting it up for later review, or if it's ready to be reviewed for merging_ -->
10 | 
11 | **Additional context**    
12 | <!-- Add any other context or screenshots about the PR. -->


--------------------------------------------------------------------------------
/src/util/timecode-converter/src/timecodeToSeconds.test.js:
--------------------------------------------------------------------------------
 1 | import timecodeToSecondsHelper from './timecodeToSeconds';
 2 | 
 3 | describe('Timecode conversion TC- convertToSeconds', () => {
 4 |   it('Should be defined', () => {
 5 |     const demoTcValue = '00:10:00:00';
 6 |     const result = timecodeToSecondsHelper(demoTcValue);
 7 |     expect(result).toBeDefined();
 8 |   });
 9 | 
10 |   it('Should be able to convert from: hh:mm:ss:ff ', () => {
11 |     const demoTcValue = '00:10:00:00';
12 |     const demoExpectedResultInSeconds = 600;
13 |     const result = timecodeToSecondsHelper(demoTcValue);
14 |     expect(result).toEqual(demoExpectedResultInSeconds);
15 |   });
16 | });
17 | 


--------------------------------------------------------------------------------
/src/util/timecode-converter/src/secondsToTimecode.test.js:
--------------------------------------------------------------------------------
 1 | import secondsToTimecode from './secondsToTimecode';
 2 | 
 3 | describe('Timecode conversion TC- convertToSeconds', () => {
 4 |   it('Should be defined', () => {
 5 |     const dmoSecondsValue = 600;
 6 |     // const demoExpectedTc = '00:10:00:00';
 7 |     const result = secondsToTimecode(dmoSecondsValue);
 8 |     expect(result).toBeDefined();
 9 |   });
10 | 
11 |   it('Should be able to convert to: hh:mm:ss:ff ', () => {
12 |     const dmoSecondsValue = 600;
13 |     const demoExpectedTc = '00:10:00:00';
14 |     const result = secondsToTimecode(dmoSecondsValue);
15 |     expect(result).toEqual(demoExpectedTc);
16 |   });
17 | });
18 | 


--------------------------------------------------------------------------------
/src/components/slate-helpers/get-closest-block/index.js:
--------------------------------------------------------------------------------
 1 | /*
 2 |  from ~https://github.com/dylans/slate-snippets#get-closest-block~
 3 | from https://github.com/ianstormtaylor/slate/blob/228f4fa94f61f42ca41feae2b3029ebb570e0480/packages/slate/src/transforms/text.ts#L108-L112
 4 |  const startBlock = Editor.above(editor, {
 5 |    match: (n) => Editor.isBlock(editor, n),
 6 |      at: start,
 7 |      voids,
 8 |  });
 9 |  return startBlock;
10 | */
11 | import { Editor } from 'slate';
12 | 
13 | function getClosestBlock(editor) {
14 |   const [blockNode, path] = Editor.above(editor, { match: (n) => Editor.isBlock(editor, n) });
15 |   return [blockNode, path];
16 | }
17 | export default getClosestBlock;
18 | 


--------------------------------------------------------------------------------
/src/components/slate-helpers/handle-split-paragraph/is-end-of-the-block.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * This helper function checks if the cursor/caret is at the end of a line
 3 |  * by comparing the anchros offset with the focus offset and seeing if they are equal to the total number
 4 |  * of chars in that block
 5 |  *
 6 |  * There seems to be an alternative way of doing this that could also be exploreed
 7 |  * https://github.com/udecode/slate-plugins/blob/master/packages/slate-plugins/src/common/queries/isSelectionAtBlockEnd.ts
 8 |  */
 9 | function isEndOftheBlock({ anchorOffset, focusOffset, totlaChar }) {
10 |   return anchorOffset === focusOffset && anchorOffset === totlaChar;
11 | }
12 | 
13 | export default isEndOftheBlock;
14 | 


--------------------------------------------------------------------------------
/docs/QA/apple-script-testing.md:
--------------------------------------------------------------------------------
 1 | # Apple script testing
 2 | 
 3 | Script to use with [apple script](https://en.wikipedia.org/wiki/AppleScript) to test and simulate correcting the text in the editor over extended period of time.
 4 | 
 5 | ```js
 6 | delay 2
 7 | repeat 3000 times
 8 |     repeat 30 times
 9 |         tell application "System Events" to keystroke "SOME TEXT "
10 |         delay 3
11 |     end repeat
12 |     delay 6
13 |     tell application "System Events" to keystroke (ASCII character 31) --down arrow
14 |     tell application "System Events" to keystroke (ASCII character 31) --down arrow
15 |     tell application "System Events" to keystroke (ASCII character 31) --down arrow
16 | end repeat
17 | ```
18 | 
19 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/compose-subtitles/ttml.js:
--------------------------------------------------------------------------------
 1 | import escapeText from './util/escape-text.js';
 2 | import formatSeconds from './util/format-seconds.js';
 3 | 
 4 | const ttmlGenerator = vttJSON => {
 5 |   let ttmlOut = `<?xml version="1.0" encoding="UTF-8"?>
 6 |     <tt xmlns="http://www.w3.org/ns/ttml">
 7 |     <head></head>
 8 |     <body>
 9 |     <div>`;
10 |   vttJSON.forEach(v => {
11 |     ttmlOut += `<p begin="${formatSeconds(parseFloat(v.start))}" end="${formatSeconds(parseFloat(v.end))}">${escapeText(v.text).replace(
12 |       /\n/g,
13 |       '<br />'
14 |     )}</p>\n`;
15 |   });
16 |   ttmlOut += '</div>\n</body>\n</tt>';
17 | 
18 |   return ttmlOut;
19 | };
20 | 
21 | export default ttmlGenerator;
22 | 


--------------------------------------------------------------------------------
/docs/QA/README.md:
--------------------------------------------------------------------------------
1 | # QA Report
2 | 
3 | To run QA, raise a QA issue to document the process as a [QA Report issue](https://github.com/pietrop/slate-transcript-editor/issues/new?assignees=&labels=QA%20Report&template=qa_report.md&title=[QA]%20Main%20check%20list), it will have the checklist below and you can run through it and check each item as you follow the steps.
4 | 
5 | If you run into issues with any of the individual items, raise a separate issue for each as a [QA Report - individual issue](https://github.com/pietrop/slate-transcript-editor/issues/new?assignees=&labels=QA%20Issue&template=qa_individual_issue_report.md&title=[QA]%20Issue%20#1.1%20Can%20edit%20the%0text). Write a note of the item numnber, and "title" in the issue title and description
6 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: Enhancement
 6 | assignees: 
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | <!-- A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] -->
12 | 
13 | **Describe the solution you'd like**
14 | <!-- A clear and concise description of what you want to happen. -->
15 | 
16 | **Describe alternatives you've considered**
17 | <!-- A clear and concise description of any alternative solutions or features you've considered. -->
18 | 
19 | **Additional context**
20 | <!-- Add any other context or screenshots about the feature request here. -->


--------------------------------------------------------------------------------
/docs/guides/npm-tags.md:
--------------------------------------------------------------------------------
 1 | # Npm tags
 2 | 
 3 | First make sure you have done a commit of latest changes then
 4 | 
 5 | > You can run `npm version 0.1.2-alpha.1` to update `package.json` and create a git tag in one go (see https://docs.npmjs.com/cli/version).
 6 | 
 7 | - [Publishing a beta or alpha version to NPM](https://medium.com/@kevinkreuzer/publishing-a-beta-or-alpha-version-to-npm-46035b630dd7)
 8 | 
 9 | this changes `package.json` version to be
10 | 
11 | ```json
12 |  "version": "1.0.4-alpha.0",
13 | ```
14 | 
15 | then you can run `npm run publish:public` which under the hood preps the files and folder and runs `npm publish dist --access public`.
16 | 
17 | To install in another repo
18 | 
19 | ```
20 | npm install slate-transcript-editor@alpha
21 | ```
22 | 


--------------------------------------------------------------------------------
/docs/notes/debounce.md:
--------------------------------------------------------------------------------
 1 | # notes on debounce
 2 | 
 3 | This worked, to do auto/align when the user stops typing. It only calls it once.
 4 | 
 5 | Outside of the component
 6 | 
 7 | ```js
 8 | import pDebounce from 'p-debounce';
 9 | ...
10 | const debouncedSave = pDebounce(updateBloocksTimestamps, 3000);
11 | ```
12 | 
13 | inside the component keydown
14 | 
15 | ```js
16 | const handleOnKeyDown = async (event) => {
17 |       ...
18 |   // value is the content of slateJS
19 |   const alignedSlateData = await debouncedSave(value);
20 |   setValue(alignedSlateData);
21 |   setIsContentIsModified(false);
22 | ```
23 | 
24 | seems like having it inside the component was being effected by the components re-renders.
25 | 
26 | This could be used for pause while typing as well.
27 | 


--------------------------------------------------------------------------------
/src/util/downlaod/index.js:
--------------------------------------------------------------------------------
 1 | // https://stackoverflow.com/questions/2897619/using-html5-javascript-to-generate-and-save-a-file
 2 | const download = (content, filename, contentType) => {
 3 |   const type = contentType || 'application/octet-stream';
 4 |   const link = document.createElement('a');
 5 |   const blob = new Blob([content], { type: type });
 6 | 
 7 |   link.href = window.URL.createObjectURL(blob);
 8 |   link.download = filename;
 9 |   // Firefox fix - cannot do link.click() if it's not attached to DOM in firefox
10 |   // https://stackoverflow.com/questions/32225904/programmatical-click-on-a-tag-not-working-in-firefox
11 |   document.body.appendChild(link);
12 |   link.click();
13 |   document.body.removeChild(link);
14 | };
15 | 
16 | export default download;
17 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/list.js:
--------------------------------------------------------------------------------
 1 | const subtitlesExportOptionsList = [
 2 |   { type: 'srt', label: 'Srt', ext: 'srt' },
 3 |   { type: 'vtt', label: 'VTT', ext: 'vtt' },
 4 |   { type: 'vtt_speakers', label: 'VTT with speakers', ext: 'vtt' },
 5 |   { type: 'vtt_speakers_paragraphs', label: 'VTT with speakers and paragraphs', ext: 'vtt' },
 6 |   { type: 'itt', label: 'iTT', ext: 'itt' },
 7 |   { type: 'ttml', label: 'TTML', ext: 'ttml' },
 8 |   { type: 'premiereTTML', label: 'TTML for Adobe Premiere', ext: 'ttml' },
 9 |   { type: 'csv', label: 'CSV', ext: 'csv' },
10 |   { type: 'pre-segment-txt', label: 'Pre segmented txt', ext: 'txt' },
11 |   { type: 'json', label: 'Json', ext: 'json' },
12 | ];
13 | 
14 | export default subtitlesExportOptionsList;
15 | 


--------------------------------------------------------------------------------
/src/util/count-words/index.js:
--------------------------------------------------------------------------------
 1 | export const removeExtraWhiteSpaces = (text) => {
 2 |   return text.trim().replace(/\s\s+/g, ' ');
 3 | };
 4 | 
 5 | export const splitOnWhiteSpaces = (text) => {
 6 |   return removeExtraWhiteSpaces(text).split(' ');
 7 | };
 8 | 
 9 | export const countChar = (text) => {
10 |   // remove white spaces and count chat
11 |   return splitOnWhiteSpaces(text).join('').length;
12 | };
13 | 
14 | const countWords = (text) => {
15 |   // return text.trim().replace(/\n /g, '').replace(/\n/g, ' ').split(' ').length;
16 |   // Don't count multiple spaces as multiple words
17 |   // https://www.w3schools.com/jsref/jsref_regexp_whitespace.asp
18 |   // Do a global search for whitespace characters in a string
19 |   return splitOnWhiteSpaces(text).length;
20 | };
21 | 
22 | export default countWords;
23 | 


--------------------------------------------------------------------------------
/docs/SUMMARY.md:
--------------------------------------------------------------------------------
 1 | # Table of contents
 2 | 
 3 | * [Slate Transcript Editor - Docs](README.md)
 4 | * [notes](notes/README.md)
 5 |   * [Apple script testing](notes/apple-script-testing.md)
 6 |   * [css-injection-karaoke](notes/css-injection-karaoke.md)
 7 |   * [insert-text-at-selection](notes/insert-text-at-selection.md)
 8 |   * [set-selection](notes/set-selection.md)
 9 |   * [notes](notes/notes.md)
10 |   * [web workers](notes/web-workers.md)
11 |   * [Takeaways form draftJs vs Slate](notes/draftjs-vs-slatejs.md)
12 |   * [pause-while-typing](notes/pause-while-typing.md)
13 |   * [verbose-generate-previous-timings-up-to-current-func](notes/verbose-generate-previous-timings-up-to-current-func.md)
14 | * [ADR](adr/README.md)
15 |   * [\[short title of solved problem and solution\] - ADR Template](adr/adr-template.md)
16 | 
17 | 


--------------------------------------------------------------------------------
/docs/notes/css-injection-karaoke.md:
--------------------------------------------------------------------------------
 1 | # css-injection-karaoke
 2 | 
 3 | ```jsx
 4 | <style scoped>
 5 | {`
 6 |     /* Next words */
 7 |     .timecode[data-previous-timings~="${parseInt(currentTime)}"]{
 8 |         color:  #6c757d; /*Bootstrap grey for secondary*/
 9 |     }
10 | 
11 |     /* Previous words */
12 |     .timecode:not([data-previous-timings~="${parseInt(currentTime)}"]){
13 |         color: #343a40!important /* Bootstrap black, for dark */
14 |     }
15 | 
16 |     .timecode:not([data-start^="${parseInt(currentTime)}"]){
17 |         color: orange!important// #6c757d; /*Bootstrap grey for secondary*/
18 |     }  
19 | 
20 |     .timecode[data-start^="${parseInt(currentTime)}"]{
21 |         color: red!important //#343a40!important /* Bootstrap black, for dark */
22 |     }
23 | `}
24 | </style>
25 | ```
26 | 
27 | 


--------------------------------------------------------------------------------
/src/util/pluk/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Pluck Unique Values from Array of Javascript Objects
 3 | from [JamieMason/pluck-unique-values-from-array-of-javascript-objects.md](https://gist.github.com/JamieMason/bed71c73576ba8d70a4671ea91b6178e)
 4 | ## Implementation
 5 | 
 6 | ```js
 7 | const pluck = key => array => Array.from(new Set(array.map(obj => obj[key])));
 8 | ```
 9 | 
10 | ## Usage
11 | 
12 | ```js
13 | const cars = [
14 |   { brand: 'Audi', color: 'black' },
15 |   { brand: 'Audi', color: 'white' },
16 |   { brand: 'Ferarri', color: 'red' },
17 |   { brand: 'Ford', color: 'white' },
18 |   { brand: 'Peugot', color: 'white' }
19 | ];
20 | 
21 | const getBrands = pluck('brand');
22 | 
23 | console.log(getBrands(cars));
24 | ```
25 | 
26 | ### Output
27 | 
28 | ```json
29 | [
30 |   "Audi",
31 |   "Ferarri",
32 |   "Ford",
33 |   "Peugot"
34 | ]
35 | ```


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/presegment-text/text-segmentation/HONORIFICS.txt:
--------------------------------------------------------------------------------
 1 | A.
 2 | Adj.
 3 | Adm.
 4 | Adv.
 5 | Asst.
 6 | B.
 7 | Bart.
 8 | Bldg.
 9 | Brig.
10 | Bros.
11 | C.
12 | Capt.
13 | Cmdr.
14 | Col.
15 | Comdr.
16 | Con.
17 | Cpl.
18 | D.
19 | DR.
20 | Dr.
21 | E.
22 | Ens.
23 | F.
24 | Fr.
25 | G.
26 | Gen.
27 | Gov.
28 | H.
29 | Hon.
30 | Hosp.
31 | I.
32 | Insp.
33 | J.
34 | K.
35 | L.
36 | Lt.
37 | M.
38 | M.
39 | MM.
40 | MR.
41 | MRS.
42 | MS.
43 | Maj.
44 | Messrs.
45 | Mlle.
46 | Mme.
47 | Mr.
48 | Mrs.
49 | Ms.
50 | Msgr.
51 | N.
52 | O.
53 | Op.
54 | Ord.
55 | P.
56 | Pfc.
57 | Ph.
58 | Prof.
59 | Pvt.
60 | Q.
61 | R.
62 | Rep.
63 | Reps.
64 | Res.
65 | Rev.
66 | Rt.
67 | S.
68 | Sen.
69 | Sens.
70 | Sfc.
71 | Sgt.
72 | Sr.
73 | St.
74 | Supt.
75 | Surg.
76 | T.
77 | U.
78 | V.
79 | W.
80 | X.
81 | Y.
82 | Z.
83 | v.
84 | vs.


--------------------------------------------------------------------------------
/docs/notes/set-selection.md:
--------------------------------------------------------------------------------
 1 | # set-selection
 2 | 
 3 | [https://docs.slatejs.org/concepts/05-operations](https://docs.slatejs.org/concepts/05-operations)
 4 | 
 5 | ```javascript
 6 | editor.apply({
 7 |   type: 'set_selection',
 8 |   properties: {
 9 |     anchor: { path: [0, 0], offset: 0 },
10 |   },
11 |   newProperties: {
12 |     anchor: { path: [0, 0], offset: 15 },
13 |   },
14 | })
15 | ```
16 | 
17 | break on selection
18 | 
19 | ```javascript
20 | Editor.insertBreak(editor)
21 | ```
22 | 
23 | select whole editor for range, from [Slate slack](https://slate-js.slack.com/archives/C1RH7AXSS/p1581298796206700?thread_ts=1581290922.206500&cid=C1RH7AXSS)
24 | 
25 | ```javascript
26 | Editor.range(editor, [])
27 | ```
28 | 
29 | ```js
30 | const wholeTranscriptSelection =  Editor.range(editor, []);
31 | Transforms.setSelection(editor, wholeTranscriptSelection)
32 | ```
33 | 


--------------------------------------------------------------------------------
/src/util/dpe-to-slate/generate-previous-timings-up-to-current/index.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * See explanation in `src/utils/dpe-to-slate/index.js` for how this function works with css injection
 3 |  * to provide current paragaph's highlight.
 4 |  */
 5 | 
 6 | /**
 7 |  * Generate a list of times, each rounded up to int.
 8 |  * from zero to the provided `time`.
 9 |  * eg if `time` is 6, the list would be [0, 1, 2, 3, 4, 5]
10 |  * @param {Number} time - float or int, time in seconds
11 |  */
12 | 
13 | function generatePreviousTimingsUpToCurrent(start) {
14 |   const startTimeInt = parseInt(start);
15 |   if (start === 0) {
16 |     return '';
17 |   }
18 |   if (start === 1) {
19 |     return '0 1';
20 |   }
21 |   return new Array(startTimeInt)
22 |     .fill(1)
23 |     .map((_, i) => i + 1)
24 |     .join(' ');
25 | }
26 | 
27 | export default generatePreviousTimingsUpToCurrent;
28 | 


--------------------------------------------------------------------------------
/src/components/slate-helpers/split-nodes/index.js:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | https://github.com/ianstormtaylor/slate/blob/b5859b7e2ef97cdc5d5aaa675b807c4783b2e83c/packages/slate/src/transforms/node.ts#L584-L595
 4 | 
 5 | const splitMode = mode === 'lowest' ? 'lowest' : 'highest'
 6 | 
 7 | Transforms.splitNodes(editor, {
 8 |     at: end,
 9 |     match,
10 |     mode: splitMode,
11 |     voids,
12 | })
13 | 
14 | https://docs.slatejs.org/api/transforms#transforms-splitnodes-editor-editor-options
15 | 
16 | Split nodes at the specified location. If no location is specified, split the selection.
17 | Options supported: NodeOptions & {height?: number, always?: boolean}
18 | 
19 | Transforms.splitNodes(editor: Editor, options?)
20 |  */
21 | 
22 | import { Transforms } from 'slate';
23 | function splitNotdes(editor, options = {}) {
24 |   Transforms.splitNodes(editor, options);
25 | }
26 | 
27 | export default splitNotdes;
28 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/presegment-text/text-segmentation/index.js:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | import tokenizer from 'sbd';
 3 | 
 4 | function textSegmentation(text, honorifics) {
 5 |   var optionalHonorifics = null;
 6 | 
 7 |   if (honorifics !== undefined) {
 8 |     optionalHonorifics = honorifics;
 9 |   }
10 | 
11 |   var options = {
12 |     newline_boundaries: true,
13 |     html_boundaries: false,
14 |     sanitize: false,
15 |     allowed_tags: false,
16 |     //TODO: Here could open HONORIFICS file and pass them in here I think
17 |     //abbreviations: list of abbreviations to override the original ones for use with other languages. Don't put dots in abbreviations.
18 |     abbreviations: optionalHonorifics,
19 |   };
20 | 
21 |   var sentences = tokenizer.sentences(text, options);
22 |   var sentencesWithLineSpaces = sentences.join('\n');
23 | 
24 |   return sentencesWithLineSpaces;
25 | }
26 | 
27 | export default textSegmentation;
28 | 


--------------------------------------------------------------------------------
/src/sample-data/segmented-transcript.js:
--------------------------------------------------------------------------------
 1 | const fs = require('fs');
 2 | const DEMO_SOLEIO = require('../sample-data/soleio-dpe.json');
 3 | /**
 4 |  *
 5 |  * helper funciton to simulate data structure for live
 6 |  */
 7 | function findWordsRangeForQuoteInTranscript({ paragraph, words }) {
 8 |   const paragraphStart = paragraph.start;
 9 |   const paragraphEnd = paragraph.end;
10 |   const wordResults = words.filter(word => {
11 |     return word.start >= paragraphStart && word.end <= paragraphEnd;
12 |   });
13 |   return wordResults;
14 | }
15 | 
16 | function segmentedTranscript(transcript) {
17 |   return transcript.paragraphs.map(paragraph => {
18 |     const wordsResult = findWordsRangeForQuoteInTranscript({ paragraph, words: transcript.words });
19 |     return { words: wordsResult, paragraphs: [paragraph] };
20 |   });
21 | }
22 | 
23 | const result = segmentedTranscript(DEMO_SOLEIO);
24 | fs.writeFileSync('./src/sample-data/segmented-transcript-soleio-dpe.json', JSON.stringify(result, null, 2));
25 | 


--------------------------------------------------------------------------------
/src/util/timecode-converter/src/timecodeToSeconds.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Helperf unction
 3 |  * @param {*} tc
 4 |  * @param {*} fps
 5 |  */
 6 | const timecodeToFrames = function(tc, fps) {
 7 |   // TODO make 29.97 fps drop-frame aware - works for 25 only.
 8 | 
 9 |   const s = tc.split(':');
10 |   let frames = parseInt(s[3]);
11 |   frames += parseInt(s[2]) * fps;
12 |   frames += parseInt(s[1]) * (fps * 60);
13 |   frames += parseInt(s[0]) * (fps * 60 * 60);
14 | 
15 |   return frames;
16 | };
17 | 
18 | /**
19 |  * Convert broadcast timecodes to seconds
20 |  * @param {*} tc - `hh:mm:ss:ff`
21 |  * @param {*} framePerSeconds - defaults to 25 if not provided
22 |  */
23 | const timecodeToSecondsHelper = function(tc, framePerSeconds) {
24 |   let fps = 25;
25 |   if (framePerSeconds !== undefined) {
26 |     fps = framePerSeconds;
27 |   }
28 |   const frames = timecodeToFrames(tc, fps);
29 | 
30 |   return Number(Number(frames / fps).toFixed(2));
31 | };
32 | 
33 | export default timecodeToSecondsHelper;
34 | 


--------------------------------------------------------------------------------
/src/components/3-SlateSimpleEditor.stories.js:
--------------------------------------------------------------------------------
 1 | import React, { useState, useEffect, useMemo } from 'react';
 2 | import { createEditor } from 'slate';
 3 | // https://docs.slatejs.org/walkthroughs/01-installing-slate
 4 | // Import the Slate components and React plugin.
 5 | import { Slate, Editable, withReact } from 'slate-react';
 6 | 
 7 | export default {
 8 |   title: 'SlateSimpleEditor',
 9 |   component: SlateSimpleEditor,
10 | };
11 | 
12 | const SlateSimpleEditor = () => {
13 |   const editor = useMemo(() => withReact(createEditor()), []);
14 |   // Add the initial value when setting up our state.
15 |   const [value, setValue] = useState([
16 |     {
17 |       type: 'paragraph',
18 |       children: [{ text: 'A line of text in a paragraph.' }],
19 |     },
20 |   ]);
21 | 
22 |   return (
23 |     <Slate editor={editor} value={value} onChange={value => setValue(value)}>
24 |       <Editable />
25 |     </Slate>
26 |   );
27 | };
28 | 
29 | export const SlateSimpleDemo = () => <SlateSimpleEditor />;
30 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/presegment-text/index.test.js:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | import fs from 'fs';
 3 | import preSegmentText from './index.js';
 4 | // requrie on js and json is relative to current file path
 5 | import { words as sampleWords } from '../sample/words-list.sample.json';
 6 | // fs path is relative to where the node process start
 7 | const sampleSegmentedOutput = fs.readFileSync('./packages/export-adapters/subtitles-generator/sample/test-presegment.sample.txt').toString();
 8 | 
 9 | const numberOfCharPerLine35 = 35;
10 | // TODO: not sure why Jest is having issues running this test
11 | describe.skip('presegment text', () => {
12 |   test('presegment text ', () => {
13 |     const result = preSegmentText(sampleWords);
14 |     expect(result).toEqual(sampleSegmentedOutput);
15 |   });
16 | 
17 |   test('presegment text - 35', () => {
18 |     const result = preSegmentText(sampleWords, numberOfCharPerLine35);
19 |     expect(result).toEqual(sampleSegmentedOutput);
20 |   });
21 | });
22 | 


--------------------------------------------------------------------------------
/src/components/slate-helpers/create-new-paragraph-block/index.js:
--------------------------------------------------------------------------------
 1 | import { shortTimecode } from '../../../util/timecode-converter'; //'../../../timecode-converter';
 2 | import generatePreviousTimingsUpToCurrent from '../../../util/dpe-to-slate/generate-previous-timings-up-to-current';
 3 | 
 4 | function createNewParagraphBlock({ speaker, start, text = '', words = [], previousTimings, startTimecode }) {
 5 |   let newPreviousTimings = previousTimings;
 6 |   if (!newPreviousTimings) {
 7 |     newPreviousTimings = generatePreviousTimingsUpToCurrent(start);
 8 |   }
 9 |   let newStartTimecode = startTimecode;
10 |   if (!newStartTimecode) {
11 |     newStartTimecode = shortTimecode(start);
12 |   }
13 |   return {
14 |     speaker,
15 |     start,
16 |     previousTimings: newPreviousTimings,
17 |     startTimecode: newStartTimecode,
18 |     type: 'timedText',
19 |     children: [
20 |       {
21 |         text,
22 |         words,
23 |       },
24 |     ],
25 |   };
26 | }
27 | 
28 | export default createNewParagraphBlock;
29 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/presegment-text/divide-into-two-lines/index.js:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | import removeSpaceAtBeginningOfLine from '../util/remove-space-at-beginning-of-line.js';
 3 | 
 4 | function divideIntoTwoLines(text) {
 5 |   var lines = text.split('\n');
 6 | 
 7 |   var counter = 0;
 8 | 
 9 |   var result = lines.map(l => {
10 |     if (l === '') {
11 |       return l;
12 |     } else {
13 |       if (counter === 0) {
14 |         counter += 1;
15 |         if (l[l.length - 1][0] === '.') {
16 |           return l + '\n\n';
17 |         }
18 | 
19 |         return l + '\n';
20 |       } else if (counter === 1) {
21 |         counter = 0;
22 | 
23 |         return l + '\n\n';
24 |       }
25 |     }
26 |   });
27 | 
28 |   result = removeSpaceAtBeginningOfLine(result);
29 |   // remove empty lines from list to avoid unwanted space a beginning of line
30 |   result = result.filter(line => line.length !== 0);
31 | 
32 |   result = result.join('').trim();
33 | 
34 |   return result;
35 | }
36 | 
37 | export default divideIntoTwoLines;
38 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: bug
 6 | assignees: 
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | <!-- A clear and concise description of what the bug is. -->
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | <!-- A clear and concise description of what you expected to happen. -->
22 | 
23 | **Screenshots**
24 | <!-- If applicable, add screenshots to help explain your problem. -->
25 | 
26 | **Desktop (please complete the following information):**
27 |  - OS: [e.g. iOS]
28 |  - Browser [e.g. chrome, safari]
29 |  - Version [e.g. 22]
30 | 
31 | **Smartphone (please complete the following information):**
32 |  - Device: [e.g. iPhone6]
33 |  - OS: [e.g. iOS8.1]
34 |  - Browser [e.g. stock browser, safari]
35 |  - Version [e.g. 22]
36 | 
37 | **Additional context**
38 | <!-- Add any other context about the problem here. -->


--------------------------------------------------------------------------------
/LICENCE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Pietro Passarelli 2020
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/docs/notes/deconstructing word timing computation.md:
--------------------------------------------------------------------------------
 1 | ```js
 2 | (startTime * (nodeWords.length - idx) + endTime * idx) / nodeWords.length,
 3 | ```
 4 | 
 5 | ## first word
 6 | 
 7 | ```
 8 | start = 1.2
 9 | endTime = 3.5
10 | nodeWords.length = 10
11 | idx = 0
12 | ```
13 | 
14 | ```js
15 |  (   1.2    *  (   10          -  0 ) +     3.5 *  0 ) /    10
16 | (startTime * (nodeWords.length - idx) + endTime * idx) / nodeWords.length,
17 | ```
18 | 
19 | ```
20 | (1.2*(10-0)+3.5*0)/10 = 1.2
21 | ```
22 | 
23 | ## second word
24 | 
25 | ```
26 | start = 1.2
27 | endTime = 3.5
28 | nodeWords.length = 10
29 | idx = 1
30 | ```
31 | 
32 | ```js
33 |  (   1.2    *  (   10          -  1 ) +     3.5 *  1 ) /    10
34 | (startTime * (nodeWords.length - idx) + endTime * idx) / nodeWords.length,
35 | ```
36 | 
37 | ## third word
38 | 
39 | ```
40 | start = 1.2
41 | endTime = 3.5
42 | nodeWords.length = 10
43 | idx = 2
44 | ```
45 | 
46 | ```js
47 |  (   1.2    *  (   10          -  2 ) +     3.5 *  2 ) /    10
48 | (startTime * (nodeWords.length - idx) + endTime * idx) / nodeWords.length,
49 | ```
50 | 
51 | ```
52 | (1.2*(10-2)+3.5*2)/10
53 | (1.2*(8)+3.5*2)/10
54 | (1.2*(8)+7)/10
55 | (9.6+7)/10
56 | 16.6/10
57 | 1.6600000000000001
58 | ```
59 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/compose-subtitles/premiere.js:
--------------------------------------------------------------------------------
 1 | import escapeText from './util/escape-text.js';
 2 | import formatSeconds from './util/format-seconds.js';
 3 | 
 4 | const ttmlGeneratorPremiere = (vttJSON) => {
 5 |   let ttmlOut = `<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
 6 |     <tt xmlns="http://www.w3.org/ns/ttml"
 7 |     xmlns:ttp="http://www.w3.org/ns/ttml#parameter"
 8 |     ttp:timeBase="media"
 9 |     xmlns:m608="http://www.smpte-ra.org/schemas/2052-1/2010/smpte-tt#cea608"
10 |     xmlns:smpte="http://www.smpte-ra.org/schemas/2052-1/2010/smpte-tt"
11 |     xmlns:ttm="http://www.w3.org/ns/ttml#metadata">
12 |     <head>
13 |     <metadata>
14 |     <smpte:information m608:captionService="F1C1CC" m608:channel="cc1"/>
15 |     </metadata>
16 |     <styling></styling>
17 |     <layout></layout>
18 |     </head>
19 |     <body><div>`;
20 | 
21 |   vttJSON.forEach((v) => {
22 |     ttmlOut += `<p begin="${formatSeconds(parseFloat(v.start))}" end="${formatSeconds(parseFloat(v.end))}">${escapeText(v.text).replace(
23 |       /\n/g,
24 |       '<br />'
25 |     )}</p>\n`;
26 |   });
27 |   ttmlOut += '</div>\n</body>\n</tt>';
28 | 
29 |   return `${ttmlOut}`;
30 | };
31 | 
32 | export default ttmlGeneratorPremiere;
33 | 


--------------------------------------------------------------------------------
/src/components/slate-helpers/handle-split-paragraph/is-text-same-as-words-list.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Helper function to tell if caret/cursor is in the middle of a word
 3 |  * helper function for handle split paragraph
 4 |  * @param {string} textBefore - string text
 5 |  * @param {array} wordsBefore  - list of words object
 6 |  */
 7 | // import SlateTextEditor from '../index';
 8 | 
 9 | function isTextSameAsWordsList(textBefore, wordsBefore) {
10 |   // convert them to the same format, for comparison
11 |   // convert the text list string to an array strings (words text)
12 |   const textBeforeList = textBefore.trim().replace(/\s\s+/g, ' ').split(' ');
13 |   // convert the array of words object, to an array of strings (words text)
14 |   const wordsBeforeList = wordsBefore.map((w) => {
15 |     return w.text;
16 |   });
17 |   // get last word from text list
18 |   const lastTextWord = textBeforeList[textBeforeList.length - 1];
19 |   // get last word from word list
20 |   const lastWord = wordsBeforeList[wordsBeforeList.length - 1];
21 |   // if they are not the same then the cursor is in the middle of a word
22 |   // because `lastTextWord` would be chopped
23 | 
24 |   const result = !(lastTextWord.trim() === lastWord.trim());
25 | 
26 |   return result;
27 | }
28 | 
29 | export default isTextSameAsWordsList;
30 | 


--------------------------------------------------------------------------------
/src/components/slate-helpers/index.js:
--------------------------------------------------------------------------------
 1 | import getClosestBlock from './get-closest-block';
 2 | import getSelectionNodes from './get-selection-nodes';
 3 | import insertNodesAtSelection from './insert-nodes-at-selection';
 4 | import insertText from './insert-text';
 5 | import mergeNodes from './merge-nodes';
 6 | import removeNodes from './remove-nodes';
 7 | import setNode from './set-node';
 8 | import splitNodes from './split-nodes';
 9 | import breakParagraph from './break-paragraph';
10 | import collapseSelectionToAsinglePoint from './collapse-selection-to-a-single-point';
11 | import handleSplitParagraph from './handle-split-paragraph';
12 | import createNewParagraphBlock from './create-new-paragraph-block';
13 | import handleDeleteInParagraph from './handle-delete-in-paragraph';
14 | import setSelection from './set-selection';
15 | import getNodebyPath from './get-node-by-path';
16 | const SlateHelpers = {
17 |   getClosestBlock,
18 |   getSelectionNodes,
19 |   insertNodesAtSelection,
20 |   mergeNodes,
21 |   removeNodes,
22 |   setNode,
23 |   splitNodes,
24 |   breakParagraph,
25 |   insertText,
26 |   collapseSelectionToAsinglePoint,
27 |   handleSplitParagraph,
28 |   createNewParagraphBlock,
29 |   handleDeleteInParagraph,
30 |   setSelection,
31 |   getNodebyPath,
32 | };
33 | 
34 | export default SlateHelpers;
35 | 


--------------------------------------------------------------------------------
/src/util/timecode-converter/index.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Wrapping around "time stamps" and timecode conversion modules
 3 |  * To provide more support for variety of formats.
 4 |  */
 5 | import secondsToTimecode from './src/secondsToTimecode';
 6 | import timecodeToSecondsHelper from './src/timecodeToSeconds';
 7 | import padTimeToTimecode from './src/padTimeToTimecode';
 8 | 
 9 | /**
10 |  * @param {*} time
11 |  * Can take as input timecodes in the following formats
12 |  * - hh:mm:ss:ff
13 |  * - mm:ss
14 |  * - m:ss
15 |  * - ss - seconds --> if it's already in seconds then it just returns seconds
16 |  * - hh:mm:ff
17 |  * @todo could be refactored with some helper functions for clarity
18 |  */
19 | const timecodeToSeconds = time => {
20 |   if (typeof time === 'string') {
21 |     const resultPadded = padTimeToTimecode(time);
22 |     const resultConverted = timecodeToSecondsHelper(resultPadded);
23 | 
24 |     return resultConverted;
25 |   }
26 | 
27 |   // assuming it receive timecode as seconds as string '600'
28 |   return parseFloat(time);
29 | };
30 | 
31 | const shortTimecode = time => {
32 |   // handle edge case if it's zero, then just return shorter timecode
33 |   if (time === 0) {
34 |     return '00:00:00';
35 |   } else {
36 |     const timecode = secondsToTimecode(time);
37 |     return timecode.slice(0, -3);
38 |   }
39 | };
40 | 
41 | export { secondsToTimecode, timecodeToSeconds, shortTimecode };
42 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/presegment-text/line-break-between-sentences/index.test.js:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | import addLineBreakBetweenSentences from './index.js';
 3 | 
 4 | var sampleText = `Hi there, my name is Ian police - are recording this video to talk about mercury for the folks at a tech daily conference in New York.
 5 | Sorry, I can't be there in person, so we are building a prototype funded in part by Google DNI of a web-based computer, assisted transcription and translation tool with some video editing features.
 6 | It does speech to text and then automated consistent translation and then text to speech generate synthetic voices at time codes that line up with the original original audio.`;
 7 | 
 8 | var expectedOutput = `Hi there, my name is Ian police - are recording this video to talk about mercury for the folks at a tech daily conference in New York.
 9 | 
10 | Sorry, I can't be there in person, so we are building a prototype funded in part by Google DNI of a web-based computer, assisted transcription and translation tool with some video editing features.
11 | 
12 | It does speech to text and then automated consistent translation and then text to speech generate synthetic voices at time codes that line up with the original original audio.`;
13 | 
14 | test('add line break between sentences', () => {
15 |   var result = addLineBreakBetweenSentences(sampleText);
16 |   expect(result).toBe(expectedOutput);
17 | });
18 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/presegment-text/fold/index.test.js:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | import foldWords from './index.js';
 3 | 
 4 | const sampleText = `Hi there, my name is Ian police - are recording this video to talk about mercury for the folks at a tech daily conference in New York.
 5 | 
 6 | Sorry, I can't be there in person, so we are building a prototype funded in part by Google DNI of a web-based computer, assisted transcription and translation tool with some video editing features.
 7 | 
 8 | It does speech to text and then automated consistent translation and then text to speech generate synthetic voices at time codes that line up with the original original audio.`;
 9 | 
10 | const expectedOutput = `Hi there, my name is Ian police -
11 | are recording this video to talk
12 | about mercury for the folks at a
13 | tech daily conference in New York.
14 | 
15 | Sorry, I can't be there in person,
16 | so we are building a prototype
17 | funded in part by Google DNI of a
18 | web-based computer, assisted
19 | transcription and translation tool
20 | with some video editing features.
21 | 
22 | It does speech to text and then
23 | automated consistent translation
24 | and then text to speech generate
25 | synthetic voices at time codes
26 | that line up with the original
27 | original audio.`;
28 | 
29 | test('fold words at 35 char', () => {
30 |   const result = foldWords(sampleText, 35);
31 |   expect(result).toBe(expectedOutput);
32 | });
33 | 


--------------------------------------------------------------------------------
/docs/notes/verbose-generate-previous-timings-up-to-current-func.md:
--------------------------------------------------------------------------------
 1 | # verbose-generate-previous-timings-up-to-current-func
 2 | 
 3 | ```javascript
 4 | /**
 5 |  * See explanation in `src/utils/dpe-to-slate/index.js` for how this function works with css injection
 6 |  * to provide current paragaph's highlight.
 7 |  * @param {Number} currentTime - float in seconds
 8 |  */
 9 | const generatePreviousTimingsUpToCurrent = (currentTime) => {
10 |   const lastWordStartTime = props.transcriptData.words[props.transcriptData.words.length - 1].start;
11 |   const lastWordStartTimeInt = parseInt(lastWordStartTime);
12 |   const emptyListOfTimes = Array(lastWordStartTimeInt);
13 |   const listOfTimesInt = [...emptyListOfTimes.keys()];
14 |   const listOfTimesUpToCurrentTimeInt = listOfTimesInt.splice(0, currentTime, 0);
15 |   const stringlistOfTimesUpToCurrentTimeInt = listOfTimesUpToCurrentTimeInt.join(' ');
16 |   return stringlistOfTimesUpToCurrentTimeInt;
17 | };
18 | ```
19 | 
20 | One line
21 | 
22 | ```javascript
23 | const generatePreviousTimingsUpToCurrent = (currentTime) => {
24 |   return [...Array(parseInt(props.transcriptData.words[props.transcriptData.words.length - 1].start)).keys()].splice(0, currentTime, 0).join(' ');
25 | };
26 | ```
27 | 
28 | simplified without using words
29 | 
30 | ```js
31 | function generatePreviousTimingsUpToCurrent(start) {
32 |   return new Array(parseInt(start))
33 |     .fill(1)
34 |     .map((_, i) => i + 1)
35 |     .join(' ');
36 | }
37 | ```
38 | 


--------------------------------------------------------------------------------
/src/util/timecode-converter/src/padTimeToTimecode.js:
--------------------------------------------------------------------------------
 1 | const countColon = timecode => timecode.split(':').length;
 2 | 
 3 | const includesFullStop = timecode => timecode.includes('.');
 4 | 
 5 | const isOneDigit = str => str.length === 1;
 6 | 
 7 | const padTimeToTimecode = time => {
 8 |   if (typeof time === 'string') {
 9 |     switch (countColon(time)) {
10 |       case 4:
11 |         // is already in timecode format
12 |         // hh:mm:ss:ff
13 |         return time;
14 |       case 2:
15 |         // m:ss
16 |         if (isOneDigit(time.split(':')[0])) {
17 |           return `00:0${time}:00`;
18 |         }
19 | 
20 |         return `00:${time}:00`;
21 |       case 3:
22 |         // hh:mm:ss
23 |         return `${time}:00`;
24 |       default:
25 |         // mm.ss
26 |         if (includesFullStop(time)) {
27 |           // m.ss
28 |           if (isOneDigit(time.split('.')[0])) {
29 |             return `00:0${time.split('.')[0]}:${time.split('.')[1]}:00`;
30 |           }
31 | 
32 |           return `00:${time.replace('.', ':')}:00`;
33 |         }
34 | 
35 |         // if just int, then it's seconds
36 |         // s
37 |         if (isOneDigit(time)) {
38 |           return `00:00:0${time}:00`;
39 |         }
40 | 
41 |         return `00:00:${time}:00`;
42 |     }
43 |     // edge case if it's number return a number coz cannot refactor
44 |     // TODO: might need to refactor and move this elsewhere
45 |   } else {
46 |     return time;
47 |   }
48 | };
49 | 
50 | export default padTimeToTimecode;
51 | 


--------------------------------------------------------------------------------
/src/components/slate-helpers/insert-nodes-at-selection/index.js:
--------------------------------------------------------------------------------
 1 | /*
 2 | https://github.com/dylans/slate-snippets#insert-node-at-beginning-of-document
 3 | // Insert nodes at selection
 4 | 
 5 | // https://docs.slatejs.org/api/transforms#transforms-insertnodes-editor-editor-nodes-node-or-node-options
 6 | 
 7 | Transforms.insertNodes(editor, [
 8 |   { type: 'inline_type', children: [{ text: 'some text', marks: [] }] },
 9 |   { text: ' and some text after the inline', marks: [] },
10 | ]);
11 | 
12 | https://github.com/dylans/slate-snippets#insert-inline--text--navigate-to-text
13 | // Insert inline + text & navigate to text
14 | 
15 | Transforms.insertNodes(editor, [
16 |     { type: 'link', url:'x', children: [{ text:'mja', marks:[] }] },
17 |     { text: '', marks:[] },
18 | ]);
19 | const nextPoint = Editor.after(editor, editor.selection.anchor);
20 | Editor.setSelection(editor, {anchor:nextPoint, focus:nextPoint})
21 | */
22 | import { Transforms, Editor } from 'slate';
23 | 
24 | /**
25 |  *
26 |  * @param {*} editor
27 |  * @param {array} - list of slateJS blocks objects
28 |  */
29 | function insertNodesAtSelection({ editor, blocks, moveSelection = false, options = {} }) {
30 |   Transforms.insertNodes(editor, [...blocks], options);
31 |   // move selection to that point
32 |   if (moveSelection) {
33 |     const nextPoint = Editor.after(editor, editor.selection.anchor);
34 |     Transforms.setSelection(editor, { anchor: nextPoint, focus: nextPoint });
35 |   }
36 | }
37 | 
38 | export default insertNodesAtSelection;
39 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/presegment-text/index.js:
--------------------------------------------------------------------------------
 1 | import textSegmentation from './text-segmentation/index.js';
 2 | import addLineBreakBetweenSentences from './line-break-between-sentences/index.js';
 3 | import foldWords from './fold/index.js';
 4 | import divideIntoTwoLines from './divide-into-two-lines/index.js';
 5 | 
 6 | /**
 7 |  * Takes in array of word object,
 8 |  *  and returns string containing all the text
 9 |  * @param {array} words - Words
10 |  */
11 | function getTextFromWordsList(words) {
12 |   return words
13 |     .map((word) => {
14 |       return word.text;
15 |     })
16 |     .join(' ');
17 | }
18 | 
19 | /**
20 |  *
21 |  * @param {*} textInput - can be either plain text string or an array of word objects
22 |  */
23 | function preSegmentText(textInput, tmpNumberOfCharPerLine = 35) {
24 |   let text = textInput;
25 |   if (typeof textInput === 'object') {
26 |     text = getTextFromWordsList(textInput);
27 |   }
28 |   const segmentedText = textSegmentation(text);
29 |   // - 2.Line brek between stentences
30 |   const textWithLineBreakBetweenSentences = addLineBreakBetweenSentences(segmentedText);
31 |   // - 3.Fold char limit per line
32 |   const foldedText = foldWords(textWithLineBreakBetweenSentences, tmpNumberOfCharPerLine);
33 |   // - 4.Divide into two lines
34 |   const textDividedIntoTwoLines = divideIntoTwoLines(foldedText);
35 | 
36 |   return textDividedIntoTwoLines;
37 | }
38 | 
39 | export { preSegmentText, getTextFromWordsList };
40 | 
41 | export default preSegmentText;
42 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/presegment-text/divide-into-two-lines/index.test.js:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | import divideIntoTwoLines from './index.js';
 3 | 
 4 | var sampleText = `Hi there, my name is Ian police -
 5 | are recording this video to talk
 6 | about mercury for the folks at a
 7 | tech daily conference in New York.
 8 | 
 9 | Sorry, I can’t be there in person,
10 | so we are building a prototype
11 | funded in part by Google DNI of a
12 | web-based computer, assisted
13 | transcription and translation tool
14 | with some video editing features.
15 | 
16 | It does speech to text and then
17 | automated consistent translation
18 | and then text to speech generate
19 | synthetic voices at time codes that
20 | line up with the original original
21 | audio.`;
22 | 
23 | var expectedOutput = `Hi there, my name is Ian police -
24 | are recording this video to talk
25 | 
26 | about mercury for the folks at a
27 | tech daily conference in New York.
28 | 
29 | Sorry, I can’t be there in person,
30 | so we are building a prototype
31 | 
32 | funded in part by Google DNI of a
33 | web-based computer, assisted
34 | 
35 | transcription and translation tool
36 | with some video editing features.
37 | 
38 | It does speech to text and then
39 | automated consistent translation
40 | 
41 | and then text to speech generate
42 | synthetic voices at time codes that
43 | 
44 | line up with the original original
45 | audio.`;
46 | 
47 | test('divide into two lines', () => {
48 |   var result = divideIntoTwoLines(sampleText);
49 |   expect(result).toBe(expectedOutput);
50 | });
51 | 


--------------------------------------------------------------------------------
/src/util/timecode-converter/src/padTimeToTimecode.test.js:
--------------------------------------------------------------------------------
 1 | import padTimeToTimecode from './padTimeToTimecode';
 2 | 
 3 | describe('Timecode conversion TC- convertToSeconds', () => {
 4 |   it('Should be defined', () => {
 5 |     const demoTimecode = '12:34:56:78';
 6 |     const result = padTimeToTimecode(demoTimecode);
 7 |     expect(result).toBeDefined();
 8 |   });
 9 | 
10 |   it('hh:mm:ss:ff --> hh:mm:ss:ff ', () => {
11 |     const demoTimecode = '12:34:56:78';
12 |     const result = padTimeToTimecode(demoTimecode);
13 |     expect(result).toEqual(demoTimecode);
14 |   });
15 | 
16 |   it('mm:ss --> convert to hh:mm:ss:ms', () => {
17 |     const demoTime = '34:56';
18 |     const expectedTimecode = '00:34:56:00';
19 |     const result = padTimeToTimecode(demoTime);
20 |     expect(result).toEqual(expectedTimecode);
21 |   });
22 | 
23 |   xit('hh:mm:ss --> convert to hh:mm:ss:ms', () => {
24 |     const demoTime = '34:56:78';
25 |     const expectedTimecode = '00:34:56:78';
26 |     const result = padTimeToTimecode(demoTime);
27 |     expect(result).toEqual(expectedTimecode);
28 |   });
29 | 
30 |   it('mm.ss--> convert to hh:mm:ss:ms', () => {
31 |     const demoTime = '34.56';
32 |     const expectedTimecode = '00:34:56:00';
33 |     const result = padTimeToTimecode(demoTime);
34 |     expect(result).toEqual(expectedTimecode);
35 |   });
36 | 
37 |   it('120 sec --> 120', () => {
38 |     const demoTime = 120;
39 |     const expectedTimecode = 120;
40 |     const result = padTimeToTimecode(demoTime);
41 |     expect(result).toEqual(expectedTimecode);
42 |   });
43 | });
44 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/qa_individual_issue_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: QA Report - individual issue
 3 | about: Raise an individual QA issue
 4 | title: '[QA] Issue #2.1 - Create a project'
 5 | labels: QA Issue
 6 | assignees:
 7 | ---
 8 | 
 9 | <!-- Modify the above title as appropriate with the issue number # and step title for the QA issue you are logging. If it's an issue not part of the main QA steps leave the number blank, and create a descriptive title for it. -->
10 | 
11 | **QA Item and step's title**
12 | 
13 | <!-- eg #1.1 Can edit the text -->
14 | <!-- If it's an issue not part of the main QA steps leave the number blank, just uncomment line blow -->
15 | <!-- Not part of the main QA steps -->
16 | 
17 | **Describe the bug**
18 | 
19 | <!-- A clear and concise description of what the bug is. -->
20 | 
21 | **To Reproduce**
22 | Steps to reproduce the behavior:
23 | 
24 | 1. Go to '...'
25 | 2. Click on '....'
26 | 3. Scroll down to '....'
27 | 4. See error
28 | 
29 | **Expected behavior**
30 | 
31 | <!-- A clear and concise description of what you expected to happen. -->
32 | 
33 | **Screenshots**
34 | 
35 | <!-- If applicable, add screenshots to help explain your problem. -->
36 | 
37 | **Desktop (please complete the following information):**
38 | 
39 | - OS: [e.g. iOS]
40 | - Browser [e.g. chrome, safari]
41 | - Version [e.g. 22]
42 | 
43 | **Smartphone (please complete the following information):**
44 | 
45 | - Device: [e.g. iPhone6]
46 | - OS: [e.g. iOS8.1]
47 | - Browser [e.g. stock browser, safari]
48 | - Version [e.g. 22]
49 | 
50 | **Additional context**
51 | 
52 | <!-- Add any other context about the problem here. -->
53 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/presegment-text/text-segmentation/index.test.js:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | import textSegmentation from './index.js';
 3 | 
 4 | var sampleText =
 5 |   "Hi there, my name is Mr. Ian police - are recording this video to talk about mercury for the folks at a tech daily conference in New York. Sorry, I can't be there in person, so we are building a prototype funded in part by Google DNI of a web-based computer, assisted transcription and translation tool with some video editing features. It does speech to text and then automated consistent translation and then text to speech generate synthetic voices at time codes that line up with the original original audio.";
 6 | 
 7 | var expectedOutput = `Hi there, my name is Mr. Ian police - are recording this video to talk about mercury for the folks at a tech daily conference in New York.
 8 | Sorry, I can't be there in person, so we are building a prototype funded in part by Google DNI of a web-based computer, assisted transcription and translation tool with some video editing features.
 9 | It does speech to text and then automated consistent translation and then text to speech generate synthetic voices at time codes that line up with the original original audio.`;
10 | 
11 | var optionalHonorificsSample = 'Mr';
12 | 
13 | test('add line break between sentences', () => {
14 |   var result = textSegmentation(sampleText);
15 |   expect(result).toBe(expectedOutput);
16 | });
17 | 
18 | test('add line break between sentences,with optional honorifics', () => {
19 |   var result = textSegmentation(sampleText, optionalHonorificsSample);
20 |   expect(result).toBe(expectedOutput);
21 | });
22 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/compose-subtitles/csv/index.test.js:
--------------------------------------------------------------------------------
 1 | import csvGenerator from './index.js';
 2 | const SAMPLE_SRT_JSON_CONTENT = [
 3 |   {
 4 |     text: "=cmd|' /C calc'!'A1' So tell me, let’s start at the beginning.",
 5 |     start: 1.41,
 6 |     end: 3.28,
 7 |     speaker: 'James Jacoby',
 8 |   },
 9 |   {
10 |     text: 'How’d you get to Facebook in the beginning?',
11 |     start: 3.28,
12 |     end: 6.1,
13 |     speaker: 'James Jacoby',
14 |   },
15 |   {
16 |     text: 'So I joined the company in the late summer of 2005.',
17 |     start: 6.1,
18 |     end: 9.49,
19 |     speaker: 'James Jacoby',
20 |   },
21 |   {
22 |     text: 'At the time, I was an independent designer and developer working in',
23 |     start: 9.49,
24 |     end: 12.67,
25 |     speaker: 'Soleio Cuervo',
26 |   },
27 |   {
28 |     text: 'San Francisco.',
29 |     start: 12.67,
30 |     end: 13.29,
31 |     speaker: 'Soleio Cuervo',
32 |   },
33 | ];
34 | 
35 | const CSV_SAMPLE_OUTPUT = `N,In,Out,Speaker,Text
36 | 1,"00:00:01,410","00:00:03,280","'James Jacoby","'=cmd|' /C calc'!'A1' So tell me, let’s start at the beginning."
37 | 2,"00:00:03,280","00:00:06,100","'James Jacoby","'How’d you get to Facebook in the beginning?"
38 | 3,"00:00:06,100","00:00:09,490","'James Jacoby","'So I joined the company in the late summer of 2005."
39 | 4,"00:00:09,490","00:00:12,670","'Soleio Cuervo","'At the time, I was an independent designer and developer working in"
40 | 5,"00:00:12,670","00:00:13,290","'Soleio Cuervo","'San Francisco."`;
41 | 
42 | test('CSV generator', () => {
43 |   const result = csvGenerator(SAMPLE_SRT_JSON_CONTENT);
44 |   expect(result).toEqual(CSV_SAMPLE_OUTPUT);
45 | });
46 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/txt/index.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Convert Slate editor contnet to plain text without timecodes or speaker names
 3 |  * Text+speaker+timecode
 4 |  * TODO: have a separate one or some logic to get text without timecodes?
 5 |  *
 6 |  * Export looks like
 7 |  ```
 8 | 00:00:13		F_S12
 9 | There is a day. About ten years ago when I asked a friend to hold a baby dinosaur called plea. All
10 | 
11 | 00:00:24		F_S1
12 | that
13 | 
14 | 00:00:24		F_S12
15 | he'd ordered and I was really excited about it because I've always loved about this one has really caught technical features. It had more orders and touch sensors. It had an infra red camera and one of the things that had was a tilt sensor so it. Knew what direction. It was facing. If and when you held it upside down.
16 | 
17 | 00:00:46		U_UKN
18 | I thought.
19 | ```
20 |  */
21 | 
22 | import { shortTimecode } from '../../timecode-converter/index.js';
23 | import { Node } from 'slate';
24 | const slateToText = ({ value, speakers, timecodes, atlasFormat }) => {
25 |   return (
26 |     value
27 |       // Return the string content of each paragraph in the value's children.
28 |       .map((n) => {
29 |         if (atlasFormat) {
30 |           return `${timecodes ? `${speakers ? n.speaker : ''}\t[${shortTimecode(n.start)}]\t` : ''}\t${Node.string(n)}`;
31 |         } else {
32 |           return `${timecodes ? `${shortTimecode(n.start)}\t` : ''}${speakers ? n.speaker.toUpperCase() : ''}${
33 |             speakers || timecodes ? '\n' : ''
34 |           }${Node.string(n)}`;
35 |         }
36 |       })
37 |       // Join them all with line breaks denoting paragraphs.
38 |       .join('\n\n')
39 |   );
40 | };
41 | 
42 | export default slateToText;
43 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/presegment-text/line-break-between-sentences/README.md:
--------------------------------------------------------------------------------
 1 | # Line break between sentences
 2 | 
 3 | <!-- See module readme for more details -->
 4 | separates each line (a sentence) with an empty line.
 5 | <!-- Adds a line break `\n\n` in between in each stence.  -->
 6 | 
 7 | #### Input
 8 | 
 9 | Text where each sentence that ends with full stop is on a new line. `\n`.
10 | 
11 | ```
12 | Hi there, my name is Ian police - are recording this video to talk about mercury for the folks at a tech daily conference in New York.
13 | Sorry, I can't be there in person, so we are building a prototype funded in part by Google DNI of a web-based computer, assisted transcription and translation tool with some video editing features.
14 | It does speech to text and then automated consistent translation and then text to speech generate synthetic voices at time codes that line up with the original original audio.
15 | ```
16 | 
17 | #### Output
18 | 
19 | ```
20 | Hi there, my name is Ian police - are recording this video to talk about mercury for the folks at a tech daily conference in New York.
21 | 
22 | Sorry, I can't be there in person, so we are building a prototype funded in part by Google DNI of a web-based computer, assisted transcription and translation tool with some video editing features.
23 | 
24 | It does speech to text and then automated consistent translation and then text to speech generate synthetic voices at time codes that line up with the original original audio.
25 | ```
26 | 
27 | #### algo 
28 | 
29 | ```bash
30 | # Add blank line after every new line
31 | sed -e 'G' test.txt > test2.txt
32 | ```
33 | 
34 | Equivalent to 
35 | 
36 | ```js
37 | test.replace(/\n/g,"\n\n")
38 | ```
39 | 


--------------------------------------------------------------------------------
/src/util/timecode-converter/src/secondsToTimecode.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Raised in this comment https://github.com/bbc/react-transcript-editor/pull/9
 3 |  * abstracted from https://github.com/bbc/newslabs-cdn/blob/master/js/20-bbcnpf.utils.js
 4 |  * In broadcast VIDEO, timecode is NOT hh:mm:ss:ms, it's hh:mm:ss:ff where ff is frames,
 5 |  * dependent on the framerate of the media concerned.
 6 |  * `hh:mm:ss:ff`
 7 |  */
 8 | 
 9 | /**
10 |  * Helper function
11 |  * Rounds to the 14milliseconds boundaries
12 |  * Time in video can only "exist in" 14milliseconds boundaries.
13 |  * This makes it possible for the HTML5 player to be frame accurate.
14 |  * @param {*} seconds
15 |  * @param {*} fps
16 |  */
17 | const normalisePlayerTime = function(seconds, fps) {
18 |   return Number(((1.0 / fps) * Math.floor(Number((fps * seconds).toPrecision(12)))).toFixed(2));
19 | };
20 | 
21 | /*
22 |  * @param {*} seconds
23 |  * @param {*} fps
24 |  */
25 | const secondsToTimecode = function(seconds, framePerSeconds) {
26 |   // handle edge case, trying to convert zero seconds
27 |   if (seconds === 0) {
28 |     return '00:00:00:00';
29 |   }
30 |   // written for PAL non-drop timecode
31 |   let fps = 25;
32 |   if (framePerSeconds !== undefined) {
33 |     fps = framePerSeconds;
34 |   }
35 | 
36 |   const normalisedSeconds = normalisePlayerTime(seconds, fps);
37 | 
38 |   const wholeSeconds = Math.floor(normalisedSeconds);
39 |   const frames = ((normalisedSeconds - wholeSeconds) * fps).toFixed(2);
40 | 
41 |   // prepends zero - example pads 3 to 03
42 |   function _padZero(n) {
43 |     if (n < 10) return `0${parseInt(n)}`;
44 | 
45 |     return parseInt(n);
46 |   }
47 | 
48 |   return `${_padZero((wholeSeconds / 60 / 60) % 60)}:${_padZero((wholeSeconds / 60) % 60)}:${_padZero(wholeSeconds % 60)}:${_padZero(frames)}`;
49 | };
50 | 
51 | export default secondsToTimecode;
52 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/compose-subtitles/csv/index.js:
--------------------------------------------------------------------------------
 1 | import formatSeconds from '../util/format-seconds.js';
 2 | 
 3 | /**
 4 |  * from issue https://github.com/newscorp-ghfb/dj-tools-transcribe/issues/70
 5 |  * > prepend each cell field with a single quote, so that their content will be read as text by the spreadsheet editor.
 6 |  * @param {string} text
 7 |  * @returns {string}
 8 |  */
 9 | function escapeStringForCSV(text) {
10 |   return `'${text}`;
11 | }
12 | 
13 | // seconds to HH:MM:SS,000
14 | function formatTimecodesInSrtFormat(seconds) {
15 |   return formatSeconds(parseFloat(seconds)).replace('.', ',');
16 | }
17 | 
18 | function csvGenerator(srtJsonContent) {
19 |   const csvHeader = 'N,In,Out,Speaker,Text';
20 | 
21 |   const csvBody = srtJsonContent
22 |     .map(({ start, end, speaker, text }, index) => {
23 |       const lineIndex = `${index + 1}`;
24 |       const startTimecode = `\"${formatTimecodesInSrtFormat(start)}\"`;
25 |       const endTimecode = `\"${formatTimecodesInSrtFormat(end)}\"`;
26 |       // removing line breaks and and removing " as they break the csv.
27 |       // wrapping text in escaped " to  escape any , for the csv.
28 |       // adding carriage return \n to signal end of line in csv
29 |       // Preserving line break within srt lines to allow round trip from csv back to srt file in same format.
30 |       // by replacing \n with \r\n.
31 |       const speakerLabel = `\"${escapeStringForCSV(speaker.replace(/\n/g, ' '))}\"`;
32 |       const textField = `\"${escapeStringForCSV(text.replace(/\n/g, ' '))}\"`;
33 |       const csvLine = `${lineIndex},${startTimecode},${endTimecode},${speakerLabel},${textField}`;
34 |       return csvLine;
35 |     })
36 |     .join('\n');
37 | 
38 |   const csvContent = `${csvHeader}\n${csvBody}`;
39 |   return csvContent;
40 | }
41 | 
42 | export default csvGenerator;
43 | 


--------------------------------------------------------------------------------
/src/components/slate-helpers/get-selection-nodes/index.js:
--------------------------------------------------------------------------------
 1 | const getSelectionNodes = (editor, selection) => {
 2 |   try {
 3 |     const orderedSelection = [selection.anchor, selection.focus].sort((a, b) => {
 4 |       return a.path[0] - b.path[0];
 5 |     });
 6 |     const selectionStart = orderedSelection[0];
 7 |     const selectionEnd = orderedSelection[1];
 8 |     let counterAnchor = 0;
 9 |     let goalAnchor = selectionStart.offset;
10 |     let targetWordIndexAnchor = null;
11 |     let selectedLeafWordsAnchor = editor.children[selectionStart.path[0]].children[0].words;
12 |     // let pathValue = selectionStart.path;
13 |     // let selectedLeafWordsAnchor2 = editor.children[selectionStart.path].children[0].words;
14 | 
15 |     selectedLeafWordsAnchor.forEach((word, wordIndex) => {
16 |       const wordLength = (word.text + ' ').length;
17 | 
18 |       counterAnchor = counterAnchor + wordLength;
19 |       if (counterAnchor <= goalAnchor) {
20 |         targetWordIndexAnchor = wordIndex;
21 |       }
22 |     });
23 | 
24 |     const startWord = selectedLeafWordsAnchor[targetWordIndexAnchor + 1];
25 | 
26 |     let counter = 0;
27 |     let goal = selectionEnd.offset;
28 |     let targetWordIndex = null;
29 |     let selectedLeafWords = editor.children[selectionEnd.path[0]].children[0].words;
30 |     selectedLeafWords.forEach((word, wordIndex) => {
31 |       const wordLength = (word.text + ' ').length;
32 | 
33 |       counter = counter + wordLength;
34 |       if (counter <= goal) {
35 |         targetWordIndex = wordIndex;
36 |       }
37 |     });
38 | 
39 |     const endWord = selectedLeafWords[targetWordIndex + 1];
40 |     // return { startSec: startWord.start, endSec: endWord.end };
41 |     return { startWord, endWord };
42 |   } catch (error) {
43 |     console.error('error finding times from selection:: ', error);
44 |   }
45 | };
46 | 
47 | export default getSelectionNodes;
48 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/presegment-text/README.md:
--------------------------------------------------------------------------------
 1 | # Pre segmentation 
 2 | 
 3 | ## Input 
 4 | - either an array list of words objects    
 5 | example
 6 | ```json
 7 |  [ 
 8 |       {
 9 |         "id": 0,
10 |         "start": 13.02,
11 |         "end": 13.17,
12 |         "text": "There"
13 |       },
14 |       {
15 |         "id": 1,
16 |         "start": 13.17,
17 |         "end": 13.38,
18 |         "text": "is"
19 |       },
20 |       {
21 |         "id": 2,
22 |         "start": 13.38,
23 |         "end": 13.44,
24 |         "text": "a"
25 |       },
26 |       {
27 |         "id": 3,
28 |         "start": 13.44,
29 |         "end": 13.86,
30 |         "text": "day."
31 |       },
32 |       {
33 |         "id": 4,
34 |         "start": 13.86,
35 |         "end": 14.13,
36 |         "text": "About"
37 |       },
38 |       {
39 |         "id": 5,
40 |         "start": 14.13,
41 |         "end": 14.38,
42 |         "text": "ten"
43 |       },
44 |       {
45 |         "id": 6,
46 |         "start": 14.38,
47 |         "end": 14.61,
48 |         "text": "years"
49 |       },
50 |       {
51 |         "id": 7,
52 |         "start": 14.61,
53 |         "end": 15.15,
54 |         "text": "ago"
55 |       },
56 | ```
57 | - or a string of text     
58 | Example
59 | ```
60 | There is a day. About ten years ago
61 | ```
62 | 
63 | ## Output: 
64 | - segmented plain text
65 | 
66 | example
67 | 
68 | ```
69 | There is a day.
70 | 
71 | About ten years ago when I asked a
72 | 
73 | friend to hold a baby dinosaur
74 | robot upside down.
75 | 
76 | It was a toy called plea.
77 | 
78 | All It's a super courts are
79 | 
80 | showing off to my friend and I
81 | said to hold it, but he'll see
82 | 
83 | ...
84 | ```
85 | 
86 | 
87 | This allows for flexibility in giving the input either to aeneas forced aligner to produce subtitles or to another algorithm to restore timecodes from STT word timings output if available.


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/compose-subtitles/itt.js:
--------------------------------------------------------------------------------
 1 | import tcFormat from './util/tc-format.js';
 2 | import escapeText from './util/escape-text.js';
 3 | 
 4 | const ittGenerator = (vttJSON, lang = 'en-GB', FPS = 25) => {
 5 |   let ittOut = `<?xml version="1.0" encoding="UTF-8"?>
 6 |         <tt
 7 |           xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 8 |           xmlns="http://www.w3.org/ns/ttml"
 9 |           xmlns:tt="http://www.w3.org/ns/ttml"
10 |           xmlns:tts="http://www.w3.org/ns/ttml#styling"
11 |           xmlns:ttp="http://www.w3.org/ns/ttml#parameter"
12 |           xml:lang="${lang}"
13 |           ttp:timeBase="smpte"
14 |           ttp:frameRate="${FPS}"
15 |           ttp:frameRateMultiplier="${FPS === 25 ? '1 1' : '999 1000'}"
16 |           ttp:dropMode="nonDrop"
17 |         >
18 |         <head>
19 |           <styling>
20 |             <style
21 |               xml:id="normal"
22 |               tts:fontFamily="sansSerif"
23 |               tts:fontWeight="normal"
24 |               tts:fontStyle="normal"
25 |               tts:color="white"
26 |               tts:fontSize="100%"
27 |             />
28 |           </styling>
29 |           <layout>
30 |             <region
31 |               xml:id="bottom"
32 |               tts:origin="0% 85%"
33 |               tts:extent="100% 15%"
34 |               tts:textAlign="center"
35 |               tts:displayAlign="after"
36 |             />
37 |           </layout>
38 |         </head>
39 |         <body style="normal" region="bottom">
40 |           <div begin="-01:00:00:00">`;
41 |   vttJSON.forEach(v => {
42 |     ittOut += `<p begin="${tcFormat(parseFloat(v.start) * FPS, FPS)}" end="${tcFormat(parseFloat(v.end) * FPS, FPS)}">${escapeText(v.text).replace(
43 |       /\n/g,
44 |       '<br />'
45 |     )}</p>\n`;
46 |   });
47 |   ittOut += '</div>\n</body>\n</tt>';
48 | 
49 |   return ittOut;
50 | };
51 | 
52 | export default ittGenerator;
53 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/presegment-text/divide-into-two-lines/README.md:
--------------------------------------------------------------------------------
 1 | # Divide into two lines
 2 | 
 3 | Take these new chunks and separate them further so that there are no more than two consecutive lines before an empty line.
 4 | 
 5 | Creating block of text, with one or two consecutive lines.
 6 | 
 7 | Groups “paragraphs” by `\n`.
 8 | 
 9 | Of “paragraphs” if they are more then 1 line. 	
10 | break/add line break  `\n` every two or more line breaks.
11 | 
12 | 
13 | #### Input
14 | is output of previous section 
15 | 
16 | ```
17 | Hi there, my name is Ian police -
18 | are recording this video to talk
19 | about mercury for the folks at a
20 | tech daily conference in New York.
21 | 
22 | Sorry, I can’t be there in person,
23 | so we are building a prototype
24 | funded in part by Google DNI of a
25 | web-based computer, assisted
26 | transcription and translation tool
27 | with some video editing features.
28 | 
29 | It does speech to text and then
30 | automated consistent translation
31 | and then text to speech generate
32 | synthetic voices at time codes that
33 | line up with the original original
34 | audio.
35 | ```
36 | 
37 | #### output
38 | 
39 | ```
40 | Hi there, my name is Ian police -
41 | are recording this video to talk
42 | 
43 | about mercury for the folks at a
44 | tech daily conference in New York.
45 | 
46 | Sorry, I can’t be there in person,
47 | so we are building a prototype
48 | 
49 | funded in part by Google DNI of a
50 | web-based computer, assisted
51 | 
52 | transcription and translation tool
53 | with some video editing features.
54 | 
55 | It does speech to text and then
56 | automated consistent translation
57 | 
58 | and then text to speech generate
59 | synthetic voices at time codes that
60 | 
61 | line up with the original original
62 | audio.
63 | ```
64 | 
65 | #### algo
66 | ```perl
67 | # Insert new line for every two lines, preserving paragraphs
68 | perl -00 -ple 's/.*\n.*\n/$&\n/mg' test3.txt > "$f"
69 | ```
70 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/example-usage.js:
--------------------------------------------------------------------------------
 1 | import fs from 'fs';
 2 | import subtitlesGenerator from './index.js';
 3 | // import transcript from './sample/words-list.sample.json';
 4 | import transcript from './sample/words-list-2.sample.json';
 5 | const sampleWords = transcript.words;
 6 | 
 7 | function getTextFromWordsList(words) {
 8 |   return words
 9 |     .map((word) => {
10 |       return word.text;
11 |     })
12 |     .join(' ');
13 | }
14 | 
15 | const plainText = getTextFromWordsList(sampleWords);
16 | 
17 | const subtitlesJson = subtitlesGenerator({ words: sampleWords, type: 'json' });
18 | // const ttmlPremiere = subtitlesGenerator({ words: sampleWords, type: 'premiere' });
19 | // const ittData = subtitlesGenerator({ words: sampleWords, type: 'itt' });
20 | // const ttmlData = subtitlesGenerator({ words: sampleWords, type: 'ttml' });
21 | // const srtData = subtitlesGenerator({ words: sampleWords, type: 'srt', numberOfCharPerLine: 35 });
22 | // const vttData = subtitlesGenerator({ words: sampleWords, type: 'vtt' });
23 | // const csvData = subtitlesGenerator({ words: sampleWords, type: 'csv' });
24 | // const preSegmentTextData = subtitlesGenerator({ words: sampleWords, type: 'pre-segment-txt' });
25 | // const testTet = subtitlesGenerator({ words: plainText, type: 'txt' });
26 | 
27 | console.log(subtitlesJson);
28 | 
29 | // fs.writeFileSync('./example-output/test.json', JSON.stringify(subtitlesJson, null, 2));
30 | // fs.writeFileSync('./example-output/test-premiere.xml', ttmlPremiere);
31 | // fs.writeFileSync('./example-output/test.itt', ittData);
32 | // fs.writeFileSync('./example-output/test.ttml', ttmlData);
33 | // fs.writeFileSync('./example-output/test.srt', srtData);
34 | // fs.writeFileSync('./example-output/test.vtt', vttData);
35 | // fs.writeFileSync('./example-output/test.csv', csvData);
36 | // fs.writeFileSync('./example-output/test-presegment.txt', preSegmentTextData);
37 | // fs.writeFileSync('./example-output/test.txt', testTet);
38 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/presegment-text/fold/README.md:
--------------------------------------------------------------------------------
 1 | <!-- or use https://github.com/substack/node-wordwrap -->
 2 | # Fold char limit per line
 3 | 
 4 | folds each line at char limit. eg 35 char. 
 5 | 
 6 | he 2nd line (pictured) takes each of sentences (now separated by an empty line) and places a new line mark at the end of the word that exceeds > 35 characters (if the sentence exceeds that number)
 7 | 
 8 | 
 9 | <!-- // https://jsfiddle.net/cL3kuxum/9/
10 | // https://jsfiddle.net/36nmceu1/16/
11 | // https://jsfiddle.net/6nsyqzen/12/
12 | // TODO: refactor into module var text = `Hi there, my name is Ian police - are recording this video to talk about mercury for the folks at a tech daily conference in New York.
13 |  -->
14 | 
15 | #### Input
16 | 
17 | ```
18 | Hi there, my name is Ian police - are recording this video to talk about mercury for the folks at a tech daily conference in New York.
19 | 
20 | Sorry, I can't be there in person, so we are building a prototype funded in part by Google DNI of a web-based computer, assisted transcription and translation tool with some video editing features.
21 | 
22 | It does speech to text and then automated consistent translation and then text to speech generate synthetic voices at time codes that line up with the original original audio.
23 | ```
24 | 
25 | #### Output
26 | 
27 | ```
28 | 
29 | Hi there, my name is Ian police -
30 | are recording this video to talk
31 | about mercury for the folks at a
32 | tech daily conference in New York.
33 | 
34 | Sorry, I can’t be there in person,
35 | so we are building a prototype
36 | funded in part by Google DNI of a
37 | web-based computer, assisted
38 | transcription and translation tool
39 | with some video editing features.
40 | 
41 | It does speech to text and then
42 | automated consistent translation
43 | and then text to speech generate
44 | synthetic voices at time codes that
45 | line up with the original original
46 | audio.
47 | ```
48 | 
49 | #### algo
50 | 
51 | ```bash
52 | # Break each line at 35 characters
53 | fold -w 35 -s test2.txt > test3.txt
54 | ```
55 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/presegment-text/text-segmentation/README.md:
--------------------------------------------------------------------------------
 1 | # Text segmention 
 2 | 
 3 | #### Input
 4 | 
 5 | Plain text, **with punctuation** all on one line 
 6 | 
 7 | ```
 8 | Hi there, my name is Ian police - are recording this video to talk about mercury for the folks at a tech daily conference in New York. Sorry, I can't be there in person, so we are building a prototype funded in part by Google DNI of a web-based computer, assisted transcription and translation tool with some video editing features. It does speech to text and then automated consistent translation and then text to speech generate synthetic voices at time codes that line up with the original original audio.
 9 | ```
10 | 
11 | #### Out 
12 | 
13 | Puts each sentence that ends with full stop on new line. `\n`. Without getting fulled by `HONOFIFICS`.
14 | 
15 | ```
16 | Hi there, my name is Ian police - are recording this video to talk about mercury for the folks at a tech daily conference in New York.
17 | Sorry, I can't be there in person, so we are building a prototype funded in part by Google DNI of a web-based computer, assisted transcription and translation tool with some video editing features.
18 | It does speech to text and then automated consistent translation and then text to speech generate synthetic voices at time codes that line up with the original original audio.
19 | ```
20 | 
21 | #### algo 
22 | 
23 | [Joseph Polizzotto's perl script identify sentence boundaries sentence-boundary.pl ](https://github.com/polizoto/segment_transcript/blob/master/sentence-boundary.pl)
24 | 
25 | ```perl
26 | # segment transcript into sentences
27 | perl sentence-boundary.pl -d HONORIFICS -i "$f" -o test.txt
28 | ```
29 | 
30 | list of [`HONORIFICS` here](https://github.com/polizoto/align_transcript/blob/master/HONORIFICS)
31 | 
32 | 
33 | ## Dependency 
34 | 
35 | - [sbd](https://www.npmjs.com/package/sbd)
36 | 
37 | 
38 | ## TODO: 
39 | - [ ] Do further tests with honorifics, see `HONORIFICS` here](./HONORIFICS.txt)
40 | - [ ] if packagins `text_segmengation` as separate module, add `package.json` with `sbd` dependency.


--------------------------------------------------------------------------------
/src/util/export-adapters/index.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Adapters for Draft.js conversion
 3 |  * @param {json} slateValue - Draft.js blocks
 4 |  * @param {string} type - the type of file supported by the available adapters
 5 |  */
 6 | 
 7 | // Note: export adapter does not doo any alignment
 8 | // just converts between formats
 9 | import slateToText from './txt';
10 | import converSlateToDpe from './slate-to-dpe';
11 | import slateToDocx from '../export-adapters/docx';
12 | import subtitlesExportOptionsList from './subtitles-generator/list';
13 | import subtitlesGenerator from './subtitles-generator/index';
14 | 
15 | const captionTypeList = subtitlesExportOptionsList.map((list) => {
16 |   return list.type;
17 | });
18 | 
19 | export const isCaptionType = (type) => {
20 |   const res = captionTypeList.includes(type);
21 |   return res;
22 | };
23 | const exportAdapter = ({ slateValue, type, ext, transcriptTitle, speakers, timecodes, inlineTimecodes, hideTitle, atlasFormat }) => {
24 |   switch (type) {
25 |     case 'text':
26 |       return slateToText({ value: slateValue, speakers, timecodes, atlasFormat });
27 |     case 'json-slate':
28 |       return slateValue;
29 |     case 'json-digitalpaperedit':
30 |       return converSlateToDpe(slateValue);
31 |     case 'word':
32 |       //   return { data: draftToDocx(slateValue, transcriptTitle), ext: 'docx' };
33 |       return slateToDocx({
34 |         title: transcriptTitle,
35 |         value: slateValue,
36 |         speakers,
37 |         timecodes,
38 |         inlineTimecodes,
39 |         hideTitle,
40 |       });
41 |     default:
42 |       if (isCaptionType(type)) {
43 |         const editorContent = converSlateToDpe(slateValue);
44 |         let subtitlesJson = subtitlesGenerator({
45 |           words: editorContent.words,
46 |           paragraphs: editorContent.paragraphs,
47 |           type,
48 |           slateValue,
49 |         });
50 |         return subtitlesJson;
51 |       }
52 |       // some default, unlikely to be called
53 |       console.error('Did not recognise the export format ', type);
54 |       return 'Did not recognise the export format';
55 |   }
56 | };
57 | 
58 | export default exportAdapter;
59 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Logs
  2 | logs
  3 | *.log
  4 | npm-debug.log*
  5 | yarn-debug.log*
  6 | yarn-error.log*
  7 | lerna-debug.log*
  8 | 
  9 | # Diagnostic reports (https://nodejs.org/api/report.html)
 10 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
 11 | 
 12 | # Runtime data
 13 | pids
 14 | *.pid
 15 | *.seed
 16 | *.pid.lock
 17 | 
 18 | # Directory for instrumented libs generated by jscoverage/JSCover
 19 | lib-cov
 20 | 
 21 | # Coverage directory used by tools like istanbul
 22 | coverage
 23 | *.lcov
 24 | 
 25 | # nyc test coverage
 26 | .nyc_output
 27 | 
 28 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
 29 | .grunt
 30 | 
 31 | # Bower dependency directory (https://bower.io/)
 32 | bower_components
 33 | 
 34 | # node-waf configuration
 35 | .lock-wscript
 36 | 
 37 | # Compiled binary addons (https://nodejs.org/api/addons.html)
 38 | build/Release
 39 | 
 40 | dist/
 41 | 
 42 | # Dependency directories
 43 | node_modules/
 44 | jspm_packages/
 45 | 
 46 | # TypeScript v1 declaration files
 47 | typings/
 48 | 
 49 | # TypeScript cache
 50 | *.tsbuildinfo
 51 | 
 52 | # Optional npm cache directory
 53 | .npm
 54 | 
 55 | # Optional eslint cache
 56 | .eslintcache
 57 | 
 58 | # Optional REPL history
 59 | .node_repl_history
 60 | 
 61 | # Output of 'npm pack'
 62 | *.tgz
 63 | 
 64 | # Yarn Integrity file
 65 | .yarn-integrity
 66 | 
 67 | # dotenv environment variables file
 68 | .env
 69 | .env.test
 70 | 
 71 | # parcel-bundler cache (https://parceljs.org/)
 72 | .cache
 73 | 
 74 | # next.js build output
 75 | .next
 76 | 
 77 | # nuxt.js build output
 78 | .nuxt
 79 | 
 80 | # vuepress build output
 81 | .vuepress/dist
 82 | 
 83 | # Serverless directories
 84 | .serverless/
 85 | 
 86 | # FuseBox cache
 87 | .fusebox/
 88 | 
 89 | # DynamoDB Local files
 90 | .dynamodb/
 91 | 
 92 | .DS_Store
 93 | 
 94 | 
 95 | .DS_Store
 96 | 
 97 | #ignore all video files
 98 | *.wmv
 99 | *.mpg
100 | *.mpeg
101 | *.mp4
102 | *.mov
103 | *.flv
104 | *.avi
105 | *.ogv
106 | *.ogg
107 | *.webm
108 | 
109 | #ignore audio file
110 | *.wav
111 | *.mp3
112 | 
113 | #ingore subtitles files
114 | *.srt
115 | *.sbv
116 | 
117 | 
118 | lib/
119 | 
120 | .out/
121 | out1381/


--------------------------------------------------------------------------------
/src/util/insert-timecodes-in-line-in-words-list/index.js:
--------------------------------------------------------------------------------
 1 | import { shortTimecode } from '../timecode-converter';
 2 | /**
 3 |  * Helper function for OHMS format
 4 |  * OHMS is an open source indexing tool created by the University of Kentucky,
 5 |  * which is used by a number of cultural heritage institutions.
 6 |  * OHMS uses xml for the the index and a Word doc for the transcript
 7 |  * with timecodes at 30 second or 60 second intervals written in-line in the format of [hh:mm:ss]
 8 |  * `slate-transcript-editor` OHMS export option exports the word part.
 9 |  * Thi functions organises the words to add timecodes intervals at the required times.
10 |  */
11 | 
12 | import convertWordsToText from '../convert-words-to-text';
13 | 
14 | const insertTimecodesInlineinWordsList = ({ intervalSeconds = 30, words, lastInsertTime = 0 }) => {
15 |   const tmpWords = JSON.parse(JSON.stringify(words));
16 |   const sortedWords = tmpWords.sort((a, b) => a.start - b.start);
17 |   let newWords = [];
18 | 
19 |   for (const word of sortedWords) {
20 |     if (word.start - lastInsertTime > intervalSeconds) {
21 |       lastInsertTime = Math.floor(word.start / intervalSeconds) * intervalSeconds;
22 |       const timecode = shortTimecode(lastInsertTime);
23 |       newWords.push({
24 |         start: word.start,
25 |         end: word.start + (word.end - word.start) / 2,
26 |         text: `[${timecode}]`,
27 |       });
28 |       word.start = word.start + (word.end - word.start) / 2;
29 |     }
30 | 
31 |     newWords.push(word);
32 |   }
33 | 
34 |   return [newWords, lastInsertTime];
35 | };
36 | 
37 | const insertTimecodesInLineInSlateJs = (slateValue) => {
38 |   let lastInsertTime = 0;
39 |   return slateValue.map((block) => {
40 |     const newBlock = JSON.parse(JSON.stringify(block));
41 |     const [newWords, lastInsert] = insertTimecodesInlineinWordsList({
42 |       words: newBlock.children[0].words,
43 |       lastInsertTime,
44 |     });
45 |     lastInsertTime = lastInsert;
46 |     newBlock.children[0].words = newWords;
47 |     newBlock.children[0].text = convertWordsToText(newBlock.children[0].words);
48 |     return newBlock;
49 |   });
50 | };
51 | 
52 | export default insertTimecodesInLineInSlateJs;
53 | 


--------------------------------------------------------------------------------
/docs/adr/adr-template.md:
--------------------------------------------------------------------------------
 1 | # \[short title of solved problem and solution\] - ADR Template
 2 | 
 3 | * Status: \[accepted \| superseded by [ADR-0005](https://github.com/pietrop/slate-transcript-editor/tree/2cb2ae9d36e3d8379350baed5ba4471a76ea90d5/docs/ADR/0005-example.md) \| deprecated \| …\] 
 4 | * Deciders: \[list everyone involved in the decision\] 
 5 | * Date: \[YYYY-MM-DD when the decision was last updated\] 
 6 | 
 7 | Technical Story: \[description \| ticket/issue URL\] 
 8 | 
 9 | ## Context and Problem Statement
10 | 
11 | \[Describe the context and problem statement, e.g., in free form using two to three sentences. You may want to articulate the problem in form of a question.\]
12 | 
13 | ## Decision Drivers
14 | 
15 | * \[driver 1, e.g., a force, facing concern, …\]
16 | * \[driver 2, e.g., a force, facing concern, …\]
17 | * … 
18 | 
19 | ## Considered Options
20 | 
21 | * \[option 1\]
22 | * \[option 2\]
23 | * \[option 3\]
24 | * … 
25 | 
26 | ## Decision Outcome
27 | 
28 | Chosen option: "\[option 1\]", because \[justification. e.g., only option, which meets k.o. criterion decision driver \| which resolves force force \| … \| comes out best \(see below\)\].
29 | 
30 | ### Positive Consequences
31 | 
32 | * \[e.g., improvement of quality attribute satisfaction, follow-up decisions required, …\]
33 | * …
34 | 
35 | ### Negative consequences
36 | 
37 | * \[e.g., compromising quality attribute, follow-up decisions required, …\]
38 | * …
39 | 
40 | ## Pros and Cons of the Options
41 | 
42 | ### \[option 1\]
43 | 
44 | \[example \| description \| pointer to more information \| …\] 
45 | 
46 | * Good, because \[argument a\]
47 | * Good, because \[argument b\]
48 | * Bad, because \[argument c\]
49 | * … 
50 | 
51 | ### \[option 2\]
52 | 
53 | \[example \| description \| pointer to more information \| …\] 
54 | 
55 | * Good, because \[argument a\]
56 | * Good, because \[argument b\]
57 | * Bad, because \[argument c\]
58 | * … 
59 | 
60 | ### \[option 3\]
61 | 
62 | \[example \| description \| pointer to more information \| …\] 
63 | 
64 | * Good, because \[argument a\]
65 | * Good, because \[argument b\]
66 | * Bad, because \[argument c\]
67 | * … 
68 | 
69 | ## Links
70 | 
71 | * \[Link type\] \[Link to ADR\] 
72 | * … 
73 | 
74 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/presegment-text/fold/index.js:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | import removeSpaceAfterCarriageReturn from '../util/remove-space-after-carriage-return.js';
 3 | 
 4 | /*
 5 |  * Helper function
 6 |  * folds array of words
 7 |  * adds `\n`
 8 |  * foldNumber = char after which to fold. eg 35 char default
 9 |  * TODO: this could be refactored with smaller helper functions
10 |  */
11 | function foldWordsReturnArray(textArray, foldNumber = 35) {
12 |   var counter = 0;
13 |   var result = textArray.map((word, index, list) => {
14 |     counter += word.length + 1;
15 |     //resetting counter when there is a 'paragraph' line break \n\n
16 |     if (counter <= foldNumber) {
17 |       // if not last word in list
18 |       // cover edge case last element in array does not have a next element
19 |       if (list[index + 1] !== undefined) {
20 |         var nextElementLength = list[index + 1].length;
21 |         //check if adding next word would make the line go over the char limit foldNumber
22 |         if (counter + nextElementLength < foldNumber) {
23 |           return word;
24 |         } else {
25 |           // if it makes it go over, reset counter, return and add line break
26 |           counter = 0;
27 | 
28 |           return `${word}\n`;
29 |         }
30 |         //last word in the list
31 |       } else {
32 |         return word;
33 |       }
34 |       // if not greater then char foldNumber
35 |     } else {
36 |       counter = 0;
37 | 
38 |       return `${word}\n`;
39 |     }
40 |   });
41 | 
42 |   return result;
43 | }
44 | 
45 | /*
46 |  * text string of words
47 |  * foldNumber = char after which to fold. eg 35 char.
48 |  */
49 | function foldWords(text, foldNumber) {
50 |   // split on two line break
51 |   var lineArr = text.split('\n\n');
52 |   // fold each line on non fold number char count
53 |   var foldedWordsInArray = lineArr.map(line => {
54 |     return foldWordsReturnArray(line.split(' '), foldNumber);
55 |   });
56 |   // flatten result
57 |   var foldedWordsFlatten = foldedWordsInArray.map(line => {
58 |     return line.join(' ');
59 |   });
60 | 
61 |   // remove space after carriage return \n in lines
62 |   const result = foldedWordsFlatten.map(r => {
63 |     return removeSpaceAfterCarriageReturn(r);
64 |   });
65 |   // return text
66 | 
67 |   return result.join('\n\n');
68 | }
69 | 
70 | export default foldWords;
71 | 


--------------------------------------------------------------------------------
/src/components/2-Longer.stories.js:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { action } from '@storybook/addon-actions';
 3 | import SlateTranscriptEditor from './index.js';
 4 | 
 5 | export default {
 6 |   title: 'Performance for over 1 hour media',
 7 |   component: SlateTranscriptEditor,
 8 | };
 9 | 
10 | const DEMO_MEDIA_URL_ZUCK_5HOURS =
11 |   'https://democratic-presidential-debate-stt-analyses.s3.us-east-2.amazonaws.com/Facebook+CEO+Mark+Zuckerberg+FULL+testimony+before+U.S.+senate-pXq-5L2ghhg.mp4';
12 | 
13 | const DEMO_TITLE_ZUCK_2HOURS = ' 2 Hours |Facebook CEO Mark Zuckerberg | full testimony before U.S. Senate ';
14 | import DEMO_TRANSCRIPT_ZUCK_2HOURS_DPE from '../sample-data/Facebook-CEO-Mark-Zuckerberg-FULL-testimony-before-U.S.senate-pXq-5L2ghhg.mp4.dpe-2hours.json';
15 | export const Hours2 = () => {
16 |   return (
17 |     <SlateTranscriptEditor
18 |       mediaUrl={DEMO_MEDIA_URL_ZUCK_5HOURS}
19 |       title={DEMO_TITLE_ZUCK_2HOURS}
20 |       transcriptData={DEMO_TRANSCRIPT_ZUCK_2HOURS_DPE}
21 |       handleSaveEditor={action('handleSaveEditor')}
22 |       autoSaveContentType={'digitalpaperedit'}
23 |       showTitle={true}
24 |     />
25 |   );
26 | };
27 | 
28 | const DEMO_TITLE_ZUCK_5HOURS = ' 5 Hours | Facebook CEO Mark Zuckerberg  | full testimony before U.S. Senate';
29 | import DEMO_TRANSCRIPT_ZUCK_5HOURS_DPE from '../sample-data/Facebook-CEO-Mark-Zuckerberg-FULL-testimony-before-U.S.senate-pXq-5L2ghhg.mp4.dpe.json';
30 | export const Hours5 = () => {
31 |   return (
32 |     <SlateTranscriptEditor
33 |       mediaUrl={DEMO_MEDIA_URL_ZUCK_5HOURS}
34 |       title={DEMO_TITLE_ZUCK_5HOURS}
35 |       transcriptData={DEMO_TRANSCRIPT_ZUCK_5HOURS_DPE}
36 |       handleSaveEditor={action('handleSaveEditor')}
37 |       autoSaveContentType={'digitalpaperedit'}
38 |       showTitle={true}
39 |     />
40 |   );
41 | };
42 | 
43 | // TODO: 5 hours with auto save, to troubleshoot performance snag, and optimise auto save logic
44 | // export const Hours5WithAutoSave = () => {
45 | //   return <SlateTranscriptEditor
46 | //     mediaUrl={DEMO_MEDIA_URL_ZUCK_5HOURS}
47 | //     title={DEMO_TITLE_ZUCK_5HOURS}
48 | //     transcriptData={DEMO_TRANSCRIPT_ZUCK_5HOURS_DPE}
49 | //     handleSaveEditor={action('handleSaveEditor')}
50 | //     autoSaveContentType={'digitalpaperedit'}
51 | //     handleAutoSaveChanges={action('handleAutoSaveChanges')}
52 | //     showTitle={true}
53 | //     />
54 | // };
55 | 


--------------------------------------------------------------------------------
/docs/notes/draftjs-vs-slatejs.md:
--------------------------------------------------------------------------------
 1 | # Takeaways form draftJs vs SlateJs 
 2 | Some notes on Takeaways form draftJs vs Slate for making timed text editors.
 3 | 
 4 | Making a timed text editor is not the same thing as making a rich text editor, there are inherent set of feature that come with the domain, that most rich text editor library had most likely not anticipated at their inception. The rich text editor that is the most lean and efficient is probably the better candidate to be extended in that direction.
 5 | 
 6 | * First things first the DraftJs data model is complex and hard to follow, and that gets in the way of development, period.
 7 | * SlateJS data model is fairly straight forward json representation, and the parallel to larger DOM data structures and concept makes it's logic consistent and easier to dive into.
 8 | * DraftJS uses [immutableJS](https://immutable-js.github.io/immutable-js/), this causes performance issues  [\#437](https://github.com/facebook/draft-js/issues/437) \([\#501](https://github.com/Automattic/simplenote-electron/issues/501)\)
 9 | * SlateJs made the decision to[remove immutableJs](https://github.com/ianstormtaylor/slate/milestone/3?closed=1) 
10 | * Adding things to `blockRendererFn` via a "`WrapperBlock`" in DraftJs can introduces a further performance drag, if not handled with care. We use it to add things like speaker names, and time-codes to each paragraphs.
11 | * in SlateJS this is equivalent to use `renderElement`, but its considerably easier to improve its performance, via careful use of closures \(eg if you have a function to handle  on click event for that block moving it outside of that component can be beneficial for performance, as I think it results on less things added to that DOM node\), `.bind` and other practices to avoid bloating the DOM.
12 | * Furthermore SlateJs provides convenient ways to update its Nodes using a Transform, as well as do an update on all the Noted in the editor based on specified conditions \(eg this is useful for updating speakers\)
13 | * pause while typing might introduce performance issues on longer transcripts if on every keystroke it's creating and destroing a timer. \(see[Wait for User to Stop Typing, in JavaScript](https://schier.co/blog/wait-for-user-to-stop-typing-using-javascript)\)
14 | * paragraph levle vs word level highlight. Word level highlight generlaly involves wrapping words into spans
15 | * `handleAutoSaveChanges` function in it's current implementation gets in the way of performance for longer transcripts.


--------------------------------------------------------------------------------
/src/util/export-adapters/slate-to-dpe/update-timestamps/plain-text-align-to-slate.js:
--------------------------------------------------------------------------------
 1 | import { alignSTT } from 'stt-align-node';
 2 | import { shortTimecode } from '../../../timecode-converter';
 3 | import countWords from '../../../count-words';
 4 | import generatePreviousTimingsUpToCurrent from '../../../dpe-to-slate/generate-previous-timings-up-to-current';
 5 | const createSlateContentFromSlateJsParagraphs = (currentContent, newEntities) => {
 6 |   // Update entites to block structure.
 7 |   const updatedBlockArray = [];
 8 |   let totalWords = 0;
 9 | 
10 |   for (const blockIndex in currentContent) {
11 |     const block = currentContent[blockIndex];
12 |     const text = block.children[0].text;
13 |     // if copy and pasting large chunk of text
14 |     // currentContentBlock, would not have speaker and start/end time info
15 |     // so for updatedBlock, getting start time from first word in blockEntities
16 |     const wordsInBlock = countWords(text);
17 |     const blockEntites = newEntities.slice(totalWords, totalWords + wordsInBlock);
18 |     let speaker = block.speaker;
19 |     const start = parseFloat(blockEntites[0].start);
20 |     // const end = parseFloat(blockEntites[blockEntites.length - 1].end);
21 |     // const currentParagraph = { start, end };
22 |     // The speakers would also not exist. unles in future iteration
23 |     // ad optin to have a convention for spaker formatting, eg all caps with : at beginning of sentence
24 |     // or somthing like that but out of scope for now.
25 |     if (!speaker) {
26 |       speaker = 'U_UKN';
27 |     }
28 | 
29 |     const newText = blockEntites
30 |       .map((w) => {
31 |         return w.text;
32 |       })
33 |       .join(' ')
34 |       .trim();
35 | 
36 |     const updatedBlock = {
37 |       type: 'timedText',
38 |       speaker: speaker,
39 |       start,
40 |       previousTimings: generatePreviousTimingsUpToCurrent(start),
41 |       startTimecode: shortTimecode(start),
42 |       children: [
43 |         {
44 |           text: newText,
45 |           words: blockEntites,
46 |         },
47 |       ],
48 |     };
49 | 
50 |     updatedBlockArray.push(updatedBlock);
51 |     totalWords += wordsInBlock;
52 |   }
53 |   return updatedBlockArray;
54 | };
55 | 
56 | function plainTextalignToSlateJs(words, text, slateJsValue) {
57 |   // TODO: maybe there's a more performant way to do this?
58 |   // As on larger over 1 hour transcript it might freeze the UI 🤷‍♂️
59 |   const alignedWords = alignSTT(words, text);
60 |   const updatedBlockArray = createSlateContentFromSlateJsParagraphs(slateJsValue, alignedWords);
61 |   return updatedBlockArray;
62 | }
63 | 
64 | export default plainTextalignToSlateJs;
65 | 


--------------------------------------------------------------------------------
/src/util/dpe-to-slate/index.js:
--------------------------------------------------------------------------------
 1 | import { shortTimecode } from '../timecode-converter';
 2 | 
 3 | /**
 4 |  *
 5 |  * `generatePreviousTimingsUpToCurrent` is used to
 6 |  *  add a `previousTimings` data attribute
 7 |  * to the paragraph `TimedTextElement` in `renderElement`
 8 |  * This makes it possible to do css injection to hilight current timings
 9 |  * `.timecode[data-previous-timings*="${listOfPreviousTimingsUpToCurrentOne}"]
10 |  *
11 |  * where `listOfPreviousTimingsUpToCurrentOne` is dinamically generated up to the current one.
12 |  * eg if current time is `3` then `listOfPreviousTimingsUpToCurrentOne` "0 1 2"
13 |  */
14 | 
15 | import getWordsForParagraph from '../get-words-for-paragraph';
16 | import generatePreviousTimingsUpToCurrent from './generate-previous-timings-up-to-current';
17 | 
18 | /**
19 |  * splices a list of times, int, up to a certain, index current time.
20 |  * eg  `totalTimingsInt` is [0, 1, 2, 3, 4, 5] and `time` is 3, it retusn "0 1 2"
21 |  * then it returns
22 |  * @param {Array} totalTimingsInt -  list of timings int, generated with `generatePreviousTimings`
23 |  * @param {Number} time - float, time in seconds
24 |  * @returns {String}
25 |  */
26 | 
27 | function isEmpty(obj) {
28 |   return Object.keys(obj).length === 0;
29 | }
30 | 
31 | const generateText = (paragraph, words) => {
32 |   return words
33 |     .filter((word) => word.start >= paragraph.start && word.end <= paragraph.end)
34 |     .map((w) => w.text)
35 |     .join(' ');
36 | };
37 | 
38 | const convertDpeToSlate = (transcript) => {
39 |   if (isEmpty(transcript)) {
40 |     return [
41 |       {
42 |         speaker: 'U_UKN',
43 |         start: 0,
44 |         previousTimings: '0',
45 |         startTimecode: '00:00:00',
46 |         type: 'timedText',
47 |         children: [
48 |           {
49 |             text: 'Text',
50 |             // Adding list of words in slateJs paragraphs
51 |             words: [],
52 |           },
53 |         ],
54 |       },
55 |     ];
56 |   }
57 | 
58 |   const { words, paragraphs } = transcript;
59 | 
60 |   return paragraphs.map((paragraph) => ({
61 |     speaker: paragraph.speaker,
62 |     start: paragraph.start,
63 |     previousTimings: generatePreviousTimingsUpToCurrent(paragraph.start),
64 |     // pre-computing the display of the formatting here so that it doesn't need to convert it in leaf render
65 |     startTimecode: shortTimecode(paragraph.start),
66 |     type: 'timedText',
67 |     children: [
68 |       {
69 |         text: generateText(paragraph, words),
70 |         // Adding list of words in slateJs paragraphs
71 |         words: getWordsForParagraph(paragraph, words),
72 |       },
73 |     ],
74 |   }));
75 | };
76 | 
77 | export default convertDpeToSlate;
78 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/docx/index.js:
--------------------------------------------------------------------------------
 1 | import { Document, Paragraph, TextRun, Packer } from 'docx';
 2 | import { shortTimecode } from '../../timecode-converter/';
 3 | import { Node } from 'slate';
 4 | export default slateToDocx;
 5 | 
 6 | function slateToDocx({
 7 |   value,
 8 |   speakers,
 9 |   timecodes,
10 |   inlineTimecodes,
11 |   hideTitle,
12 |   title = 'Transcript',
13 |   creator = 'Slate Transcript Editor',
14 |   description = 'Transcript',
15 | }) {
16 |   const doc = new Document({
17 |     creator: creator,
18 |     description: description,
19 |     title: title,
20 |   });
21 | 
22 |   if (!hideTitle) {
23 |     // Transcript Title
24 |     const textTitle = new TextRun(title);
25 |     const paragraphTitle = new Paragraph();
26 |     paragraphTitle.addRun(textTitle);
27 |     paragraphTitle.heading1().center();
28 |     doc.addParagraph(paragraphTitle);
29 | 
30 |     // add spacing
31 |     var paragraphEmpty = new Paragraph();
32 |     doc.addParagraph(paragraphEmpty);
33 |   }
34 | 
35 |   value.forEach((slateParagraph) => {
36 |     // TODO: use timecode converter module to convert from seconds to timecode
37 | 
38 |     const paragraphSpeakerTimecodes = new Paragraph();
39 |     if (timecodes) {
40 |       const timecodeStartTime = new TextRun(shortTimecode(slateParagraph.start));
41 |       paragraphSpeakerTimecodes.addRun(timecodeStartTime);
42 |     }
43 |     if (speakers) {
44 |       if (timecodes) {
45 |         const speaker = new TextRun(slateParagraph.speaker).bold().tab();
46 |         paragraphSpeakerTimecodes.addRun(speaker);
47 |       } else {
48 |         const speaker = new TextRun(slateParagraph.speaker).bold();
49 |         paragraphSpeakerTimecodes.addRun(speaker);
50 |       }
51 |     }
52 | 
53 |     const paragraphContents = Node.string(slateParagraph);
54 |     const textBreak = new TextRun('').break();
55 | 
56 |     if (inlineTimecodes) {
57 |       paragraphSpeakerTimecodes.addRun(new TextRun(`${slateParagraph.speaker.toUpperCase()}:  ${paragraphContents}`));
58 |     }
59 | 
60 |     if (timecodes || speakers || inlineTimecodes) {
61 |       doc.addParagraph(paragraphSpeakerTimecodes);
62 |       doc.addParagraph(new Paragraph());
63 |     }
64 | 
65 |     if (!inlineTimecodes) {
66 |       const paragraphText = new Paragraph(paragraphContents);
67 |       paragraphText.addRun(textBreak);
68 |       doc.addParagraph(paragraphText);
69 |     }
70 |   });
71 | 
72 |   const packer = new Packer();
73 | 
74 |   packer.toBlob(doc).then((blob) => {
75 |     const filename = `${title}.docx`;
76 |     // // const type =  'application/octet-stream';
77 |     const a = document.createElement('a');
78 |     a.href = window.URL.createObjectURL(blob);
79 |     a.download = filename;
80 |     a.click();
81 | 
82 |     return blob;
83 |   });
84 | }
85 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/slate-to-dpe/index.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * converted from react-transcript-editor draftJS update timestamp helper function
 3 |  * https://github.com/pietrop/react-transcript-editor/blob/master/packages/components/timed-text-editor/UpdateTimestamps/index.js
 4 |  * similar to "update Timestamps" function
 5 |  */
 6 | import countWords from '../../count-words';
 7 | import updateBloocksTimestamps from './update-timestamps/update-bloocks-timestamps';
 8 | /**
 9 |  * Transposes the timecodes from stt json list of words onto
10 |  * dpe transcript with paragraphs and words
11 |  */
12 | export const createDpeParagraphsFromSlateJs = (currentContent, newEntities) => {
13 |   // Update entites to block structure.
14 |   let totalWords = 0;
15 |   return currentContent.map((block) => {
16 |     const text = block.children[0].text;
17 |     const wordsInBlock = countWords(text);
18 |     const blockEntites = newEntities.slice(totalWords, totalWords + wordsInBlock);
19 |     let speaker = block.speaker;
20 |     const start = parseFloat(blockEntites[0].start);
21 |     const end = parseFloat(blockEntites[blockEntites.length - 1].end);
22 |     if (!speaker) {
23 |       speaker = 'U_UKN';
24 |     }
25 |     const updatedBlock = {
26 |       speaker: speaker,
27 |       start,
28 |       end,
29 |     };
30 | 
31 |     totalWords += wordsInBlock;
32 |     return updatedBlock;
33 |   });
34 | };
35 | 
36 | // slateParagraphs with words attributes ToDpeWords
37 | const slateParagraphsToDpeWords = (slateParagraphs) => {
38 |   return slateParagraphs
39 |     .map((block) => {
40 |       return block.children[0].words;
41 |     })
42 |     .flat();
43 | };
44 | /**
45 |  * Update timestamps usign stt-align module
46 |  * @param {*} currentContent - slate js value
47 |  * @param {*} words - list of stt words
48 |  * @return dpe transcript with paragraphs and words
49 |  */
50 | 
51 | const converSlateToDpe = (currentContent) => {
52 |   // using updateBloocksTimestamps instead of previous way to align
53 |   // this should be more computationally efficient for now as it only adjust paragraphs that have changed
54 |   // keeps source of truth in the blocks as opposed to compare to the dpe transcript
55 |   // const alignedSlateParagraphs = updateBloocksTimestamps(currentContent);
56 |   // const alignedWords = slateParagraphsToDpeWords(alignedSlateParagraphs);
57 |   // assumes that words are already aligned and this is just doing a conversion between formats
58 |   // the parent component handles keeping the words in sync
59 |   const alignedWords = slateParagraphsToDpeWords(currentContent);
60 |   const updatedContent = createDpeParagraphsFromSlateJs(currentContent, alignedWords);
61 |   return { words: alignedWords, paragraphs: updatedContent };
62 | };
63 | 
64 | export default converSlateToDpe;
65 | 


--------------------------------------------------------------------------------
/src/components/5-Saving.stories.js:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { action } from '@storybook/addon-actions';
 3 | import { withKnobs, text, boolean, number, object, select } from '@storybook/addon-knobs';
 4 | import { withInfo } from '@storybook/addon-info';
 5 | import { version } from '../../package.json';
 6 | 
 7 | import SlateTranscriptEditor from './index.js';
 8 | import 'fontsource-roboto';
 9 | 
10 | export default {
11 |   title: 'Saving indicator',
12 |   component: SlateTranscriptEditor,
13 |   decorators: [withKnobs, withInfo],
14 |   parameters: {
15 |     info: {
16 |       maxPropArrayLength: 3,
17 |       maxPropsIntoLine: 3,
18 |       maxPropObjectKeys: 1,
19 |       excludedPropTypes: ['transcriptData'],
20 |       source: false,
21 |     },
22 |   },
23 | };
24 | 
25 | const DEMO_MEDIA_URL_SOLEIO =
26 |   'https://digital-paper-edit-demo.s3.eu-west-2.amazonaws.com/PBS-Frontline/The+Facebook+Dilemma+-+interviews/The+Facebook+Dilemma+-+Soleio+Cuervo-OIAUfZBd_7w.mp4';
27 | const DEMO_TITLE_SOLEIO = 'Soleio Interview, PBS Frontline';
28 | import DEMO_SOLEIO from '../sample-data/soleio-dpe.json';
29 | 
30 | export const NoAutoSave = () => {
31 |   return (
32 |     <>
33 |       <p>
34 |         Slate Transcript Editor version: <code>{version}</code>
35 |       </p>
36 |       <SlateTranscriptEditor
37 |         title={DEMO_TITLE_SOLEIO}
38 |         mediaUrl={text('mediaUrl', DEMO_MEDIA_URL_SOLEIO)}
39 |         handleSaveEditor={action('handleSaveEditor')}
40 |         // handleAutoSaveChanges={action('handleAutoSaveChanges')}
41 |         // https://www.npmjs.com/package/@storybook/addon-knobs#select
42 |         autoSaveContentType={select('autoSaveContentType', ['digitalpaperedit', 'slate'], 'digitalpaperedit')} // digitalpaperedit or slate - digitalpaperedit, runs alignement before exporting, slate, is just the raw data.
43 |         // transcriptData={object('transcriptData', DEMO_SOLEIO)}
44 |         transcriptData={DEMO_SOLEIO}
45 |       />
46 |     </>
47 |   );
48 | };
49 | 
50 | export const AutoSave = () => {
51 |   return (
52 |     <>
53 |       <p>
54 |         Slate Transcript Editor version: <code>{version}</code>
55 |       </p>
56 |       <SlateTranscriptEditor
57 |         title={DEMO_TITLE_SOLEIO}
58 |         mediaUrl={text('mediaUrl', DEMO_MEDIA_URL_SOLEIO)}
59 |         handleSaveEditor={action('handleSaveEditor')}
60 |         handleAutoSaveChanges={action('handleAutoSaveChanges')}
61 |         // https://www.npmjs.com/package/@storybook/addon-knobs#select
62 |         autoSaveContentType={select('autoSaveContentType', ['digitalpaperedit', 'slate'], 'digitalpaperedit')} // digitalpaperedit or slate - digitalpaperedit, runs alignement before exporting, slate, is just the raw data.
63 |         // transcriptData={object('transcriptData', DEMO_SOLEIO)}
64 |         transcriptData={DEMO_SOLEIO}
65 |       />
66 |     </>
67 |   );
68 | };
69 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "slate-transcript-editor",
 3 |   "version": "0.1.6-alpha.19",
 4 |   "description": "A React component to make correcting automated transcriptions of audio and video easier and faster. Using the Slate Editor",
 5 |   "main": "index.js",
 6 |   "scripts": {
 7 |     "test": "NODE_OPTIONS=--experimental-vm-modules npx jest --findRelatedTests ./src/util/export-adapters/subtitles-generator/compose-subtitles/csv/index.js",
 8 |     "start": "npm run storybook",
 9 |     "storybook": "start-storybook -p 6006",
10 |     "build-storybook": "build-storybook -c .storybook -o .out",
11 |     "deploy:ghpages": "npm run build-storybook  && cp .nojekyll .out/.nojekyll && gh-pages -d .out",
12 |     "build:component": "cross-env BABEL_ENV=production babel src -d dist && /bin/cp -rf package.json dist && /bin/cp -rf README.md dist",
13 |     "publish:public": "npm run build:component && npm publish dist --access public",
14 |     "publish:dry:run": "npm run build:component && npm publish  dist --dry-run",
15 |     "lint": "prettier --write \"**/*.js\""
16 |   },
17 |   "keywords": [],
18 |   "author": "Pietro Passarelli",
19 |   "contributors": [
20 |     {
21 |       "name": "Pietro Passarelli",
22 |       "url": "https://github.com/pietrop"
23 |     }
24 |   ],
25 |   "bugs": {
26 |     "url": "https://github.com/pietrop/slate-transcript-editor/issues"
27 |   },
28 |   "homepage": "https://github.com/pietrop/slate-transcript-editor",
29 |   "repository": {
30 |     "type": "git",
31 |     "url": "git+https://github.com/pietrop/slate-transcript-editor.git"
32 |   },
33 |   "license": "MIT",
34 |   "devDependencies": {
35 |     "@babel/cli": "^7.8.4",
36 |     "@babel/core": "^7.9.0",
37 |     "@babel/preset-react": "^7.9.4",
38 |     "@storybook/addon-a11y": "^5.3.18",
39 |     "@storybook/addon-actions": "^5.3.18",
40 |     "@storybook/addon-info": "^5.3.18",
41 |     "@storybook/addon-knobs": "^5.3.18",
42 |     "@storybook/addon-links": "^5.3.18",
43 |     "@storybook/addon-storysource": "^6.3.0",
44 |     "@storybook/addons": "^5.3.18",
45 |     "@storybook/react": "^5.3.18",
46 |     "babel-loader": "^8.1.0",
47 |     "babel-plugin-transform-react-jsx": "^6.24.1",
48 |     "cross-env": "^7.0.2",
49 |     "gh-pages": "^2.2.0",
50 |     "husky": "^4.2.5",
51 |     "jest": "^27.0.5",
52 |     "node-fetch": ">=2.6.1",
53 |     "prettier": "2.0.5",
54 |     "pretty-quick": "^2.0.1"
55 |   },
56 |   "dependencies": {
57 |     "@material-ui/core": "^4.11.3",
58 |     "@material-ui/icons": "^4.11.2",
59 |     "docx": "4.7.1",
60 |     "fontsource-roboto": "^4.0.0",
61 |     "lodash": "^4.17.20",
62 |     "p-debounce": "^3.0.1",
63 |     "prop-types": "^15.7.2",
64 |     "sbd": "^1.0.18",
65 |     "slate": "^0.59.0",
66 |     "slate-history": "^0.59.0",
67 |     "slate-react": "^0.59.0",
68 |     "smpte-timecode": "^1.2.3",
69 |     "stt-align-node": "^2.0.1"
70 |   },
71 |   "husky": {
72 |     "hooks": {
73 |       "pre-commit": "pretty-quick --staged"
74 |     }
75 |   }
76 | }
77 | 


--------------------------------------------------------------------------------
/docs/notes/notes.md:
--------------------------------------------------------------------------------
 1 | # notes
 2 | 
 3 | ```text
 4 | // const playVideo = ()=>{
 5 | //   console.log('playVideo')
 6 | //   if (videoRef && videoRef.current) {
 7 | //     videoRef.current.play()
 8 | //   }
 9 | // }
10 | 
11 | // const handleEditorOnChange = (e)=>{
12 |   // if( e.key!== 'Tab' 
13 |   //   && e.key!== 'Shift'
14 |   //   && e.key!== 'Enter' 
15 |   //   ){
16 |   //     if (videoRef && videoRef.current) {
17 |   //       // https://lodash.com/docs/4.17.15#debounce
18 |   //       // https://lodash.com/docs/4.17.15#throttle
19 |   //         videoRef.current.pause();
20 |   //         // throttle(playVideo, PAUSE_WHILTE_TYPING_TIMEOUT_MILLISECONDS, {trailing: true})
21 |   //         once(playVideo)
22 |   //     }
23 |   // }
24 | // }
25 | 
26 |     // useEffect(()=>{
27 | //     // resolved relative to main.js url path
28 | //     const workerFor = new Worker('web-worker.js');
29 | //     // listen to message event of worker
30 | //     workerFor.addEventListener('message', workerMessage);
31 | //     // listen to error event of worker
32 | //     workerFor.addEventListener('error', workerError);
33 | 
34 | //     return function cleanupWorkers() {
35 | //       workerFor.removeEventListener("error",workerMessage )
36 | //       workerFor.removeEventListener("message", workerError)
37 | //     }
38 | 
39 | // },[])
40 | 
41 | ----
42 |     const TimedTextElement = props => {
43 |   // console.log('TimedTextElement',props)
44 |   const handleSetSpeakerName = (e)=>{
45 |     const resp = prompt('Change speaker name', props.element.speaker)
46 |     const path = editor.selection.anchor.path;
47 |     console.log('path',path)
48 |     if(resp){
49 |       Transforms.setNodes(
50 |         editor,
51 |         { speaker: resp },
52 |         {
53 |           match: n => Editor.isBlock(editor, n),
54 |         }
55 |       )
56 |     }
57 | 
58 |   }
59 |     return (
60 |       <Row {...props.attributes} >
61 |           <Col xs={4} sm={2} md={4} lg={3} xl={2} className={'p-t-2 text-truncate'} >
62 |           <code 
63 |               style={{cursor: 'pointer'}} 
64 |               className={['timecode', 'text-muted'].join(' ')}  
65 |               onClick={(e)=>{handleTimedTextClick(e, props.element.start)}}
66 |               title={shortTimecode(props.element.start)}
67 |               >{shortTimecode(props.element.start)}
68 |               </code>
69 |               </Col>
70 |               <Col xs={8} sm={10} md={8} lg={3} xl={3} className={'p-t-2 text-truncate'} >
71 |               <strong 
72 |                 className={'text-truncate'}
73 |                 style={{
74 |                   cursor: 'pointer',
75 |                   width: '100%'
76 |                 }} 
77 |                 title={props.element.speaker.toUpperCase()}
78 |                 onClick={handleSetSpeakerName}> {props.element.speaker.toUpperCase()}</strong>
79 |           </Col>
80 |           <Col  xs={12} sm={12} md={12} lg={6} xl={7} className={'p-b-1 mx-auto'}>
81 |           {props.children} 
82 |           </Col>
83 |           </Row>
84 |     )
85 |   }
86 | ```
87 | 
88 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/slate-to-dpe/update-timestamps/update-bloocks-timestamps.js:
--------------------------------------------------------------------------------
 1 | import { alignSTT } from 'stt-align-node';
 2 | import countWords, { removeExtraWhiteSpaces, splitOnWhiteSpaces, countChar } from '../../../count-words';
 3 | import convertWordsToText from '../../../convert-words-to-text';
 4 | 
 5 | export function isTextAndWordsListChanged({ text, words }) {
 6 |   const wordsText = convertWordsToText(words);
 7 |   // TODO: here could optimize to check against word length
 8 |   // and only transpose the timcods
 9 |   return !(removeExtraWhiteSpaces(text) === wordsText);
10 | }
11 | 
12 | function isEqualNumberOfWords({ text, words }) {
13 |   // Quick fix, if there's words with empty string in the block
14 |   // for some issues further upstream, either in the initial conversion
15 |   // from STT or in previosu alignments (?)
16 |   const sanitizedWords = removeEmptyWords(words);
17 |   const wordsText = convertWordsToText(sanitizedWords);
18 |   const textCount = countChar(text);
19 |   const wordsCount = countChar(wordsText);
20 |   return textCount === wordsCount;
21 | }
22 | 
23 | /**
24 |  * quick workaround, sometime the alignment in alignSTT
25 |  * module results in words with either no text attribute or an empty string
26 |  * This removes those words as thy cause issue with the alignment
27 |  * TODO: figure out a better fix in the alignSTT repo further upstream
28 |  */
29 | function removeEmptyWords(words) {
30 |   return words.filter((word) => {
31 |     return word.text;
32 |   });
33 | }
34 | 
35 | export function alignBlock({ block, text, words }) {
36 |   const newBlock = JSON.parse(JSON.stringify(block));
37 |   // if same number of words in words list and text
38 |   // then can do an optimization where you don't need to run diff
39 |   // just transpose words onto the timecodes.
40 |   // this assumes STT will be ok at recognising utterances
41 |   // even if in worste case scenario it might have mis-identified the words
42 |   if (isEqualNumberOfWords({ text, words })) {
43 |     const textList = splitOnWhiteSpaces(text);
44 |     const newWords = JSON.parse(JSON.stringify(words));
45 |     newBlock.children[0].words = newWords.map((word, index) => {
46 |       word.text = textList[index];
47 |       return word;
48 |     });
49 |     return newBlock;
50 |   }
51 |   const alignedWords = alignSTT({ words }, text);
52 |   newBlock.children[0].words = removeEmptyWords(alignedWords);
53 |   return newBlock;
54 | }
55 | 
56 | export function updateIndividualBlockTimestamps(block) {
57 |   const text = block.children[0].text;
58 |   const words = block.children[0].words;
59 |   if (isTextAndWordsListChanged({ text, words })) {
60 |     const newBlockAligned = alignBlock({ block, text, words });
61 |     return newBlockAligned;
62 |   }
63 |   return block;
64 | }
65 | 
66 | // This option, diffs text and words in transcripts
67 | function updateBloocksTimestamps(slateJsValue) {
68 |   return slateJsValue.map((block) => {
69 |     return updateIndividualBlockTimestamps(block);
70 |   });
71 | }
72 | 
73 | export default updateBloocksTimestamps;
74 | 


--------------------------------------------------------------------------------
/src/components/4-Live.stories.js:
--------------------------------------------------------------------------------
 1 | import React, { useState, useEffect, useMemo, useCallback } from 'react';
 2 | import { action } from '@storybook/addon-actions';
 3 | import { withKnobs, text, boolean, number, object, select } from '@storybook/addon-knobs';
 4 | import { withInfo } from '@storybook/addon-info';
 5 | import SlateTranscriptEditor from './index.js';
 6 | 
 7 | export default {
 8 |   title: 'Live',
 9 |   component: SlateTranscriptEditor,
10 |   decorators: [withKnobs, withInfo],
11 |   parameters: {
12 |     info: {
13 |       maxPropArrayLength: 3,
14 |       maxPropsIntoLine: 3,
15 |       maxPropObjectKeys: 1,
16 |       excludedPropTypes: ['transcriptData'],
17 |       source: false,
18 |     },
19 |   },
20 | };
21 | 
22 | const DEMO_MEDIA_URL_SOLEIO =
23 |   'https://digital-paper-edit-demo.s3.eu-west-2.amazonaws.com/PBS-Frontline/The+Facebook+Dilemma+-+interviews/The+Facebook+Dilemma+-+Soleio+Cuervo-OIAUfZBd_7w.mp4';
24 | import DEMO_SOLEIO_LIVE from '../sample-data/segmented-transcript-soleio-dpe.json';
25 | 
26 | // Parent component to simulate results from a live STT stream.
27 | const Example = (props) => {
28 |   // Declare a new state variable, which we'll call "count"
29 |   const [jsonData, setJsonData] = useState({});
30 |   const [interimResults, setInterimResults] = useState({});
31 | 
32 |   useEffect(() => {
33 |     props.transcriptInParts &&
34 |       props.transcriptInParts.forEach(
35 |         delayLoop((transcriptPart) => {
36 |           setInterimResults(transcriptPart);
37 |         }, 3000)
38 |       );
39 |   }, []);
40 | 
41 |   // https://travishorn.com/delaying-foreach-iterations-2ebd4b29ad30
42 |   const delayLoop = (fn, delay) => {
43 |     return (x, i) => {
44 |       setTimeout(() => {
45 |         fn(x);
46 |       }, i * delay);
47 |     };
48 |   };
49 | 
50 |   return (
51 |     <>
52 |       <SlateTranscriptEditor
53 |         mediaUrl={text('mediaUrl', DEMO_MEDIA_URL_SOLEIO)}
54 |         handleSaveEditor={action('handleSaveEditor')}
55 |         handleAutoSaveChanges={action('handleAutoSaveChanges')}
56 |         // https://www.npmjs.com/package/@storybook/addon-knobs#select
57 |         autoSaveContentType={select('autoSaveContentType', ['digitalpaperedit', 'slate'], 'digitalpaperedit')} // digitalpaperedit or slate - digitalpaperedit, runs alignement before exporting, slate, is just the raw data.
58 |         transcriptData={jsonData}
59 |         transcriptDataLive={interimResults}
60 |         isEditable={props.isEditable}
61 |         title={props.title}
62 |         showTitle={true}
63 |         showTimecodes={true}
64 |         showSpeakers={true}
65 |       />
66 |     </>
67 |   );
68 | };
69 | 
70 | export const NotEditable = () => {
71 |   return (
72 |     <Example
73 |       isEditable={false}
74 |       transcriptInParts={DEMO_SOLEIO_LIVE}
75 |       title={'Simulated a live STT interim results via a timer and segmented STT json, NOT editable'}
76 |     />
77 |   );
78 | };
79 | 
80 | export const Editable = () => {
81 |   return (
82 |     <Example
83 |       isEditable={true}
84 |       transcriptInParts={DEMO_SOLEIO_LIVE}
85 |       title={'Simulated a live STT interim results via a timer and segmented STT json, editable'}
86 |     />
87 |   );
88 | };
89 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # CONTRIBUTING - Draft
 2 | 
 3 | This project has a [Code of Conduct](./CODE_OF_CONDUCT.md) that we expect all of our contributors to abide by, please check it out before contributing.
 4 | 
 5 | ## Contributor license agreement
 6 | By submitting code as an individual or as an entity you agree that your code is licensed the same as the [slate transcript editor](./LICENCE.md).
 7 | 
 8 | ## Pull requests and branching
 9 | 
10 | 1. [Feel free to start by raising an issue](https://github.com/pietrop/slate-transcript-editor/issues/new?template=feature_request.md) so that others can be aware of areas where there is active development, and if needed we can synchronies the effort.
11 | 
12 | 2. [Fork the repo](https://help.github.com/articles/fork-a-repo/)
13 | 
14 | 3. Before working on a feature **always** create a new branch first. Createa a branch with a meaningful name.
15 | 4. Branches should be short lived - consider doing multiple PR breaking down functionalities as opposed to one big change.
16 | 5. If you've added code that should be tested, add tests, if you need help with automated testing, feel free to raise an [issue](https://github.com/pietrop/slate-transcript-editor/issues/new?template=feature_request.md).
17 | 6. Ensure the test suite passes.
18 | 7. Make sure your code lints.
19 | 8. If you've changed APIs, consider [updating the documentation](https://github.com/pietrop/slate-transcript-editor/#documentation) and QA Testing docs.
20 | 9. Once the work is complete push the branch up on to GitHub for review. Make sure your branch is up to date with `master` before making a pull request. eg use [`git merge origin/master`](https://git-scm.com/docs/git-merge) or [rebase](https://git-scm.com/book/en/v2/Git-Branching-Rebasing)
21 | 10. Once a branch has been merged into `master`, delete it.
22 | 
23 | `master` is rarely committed to directly unless the change is quite trivial or a code review is unnecessary (code formatting or documentation updates for example).
24 | 
25 | ## Code Quality 
26 | - Aim for solutions that are easy to explain and reason around so that others can contribute to it.
27 | - Use meaningfull descriptive variables and function names. eg avoid using `x`,`y`,`z` as variable names.
28 | - Keep lines short 
29 | - Keep functions small and avoid [side effects](https://en.wikipedia.org/wiki/Side_effect_(computer_science)) when possible.
30 | - etc..
31 | 
32 | See [this blog post for more on this](https://medium.com/mindorks/how-to-write-clean-code-lessons-learnt-from-the-clean-code-robert-c-martin-9ffc7aef870c)
33 | 
34 | ## Contributing checklist
35 | 
36 | - [ ] Fork the repository
37 | - [ ] Create a branch with a meaningful name
38 | - [ ] Add automated tests where appropriate 
39 | - [ ] Ensure test suite passes (`npm run test`)
40 | - [ ] Make sure your code lints. (`npm run lint`)
41 | - [ ] consider re-factoring for code quality and readability
42 | - [ ] Update documentation and QA docs where appropriate - [see updating the documentation](https://github.com/pietrop/slate-transcript-editor/#documentation)
43 | - [ ] Setup your PR for review 
44 | 
45 | <!-- 
46 | Good example of contribution guideline
47 | 
48 | https://reactjs.org/docs/how-to-contribute.html
49 | 
50 | another example - with more tech details
51 | 
52 | https://github.com/facebook/create-react-app/blob/master/CONTRIBUTING.md
53 | 
54 | https://github.com/facebookresearch/wav2letter/blob/master/CONTRIBUTING.md 
55 | 
56 | https://github.com/hiddentao/fast-levenshtein/blob/master/CONTRIBUTING.md 
57 |  -->


--------------------------------------------------------------------------------
/docs/guides/dpe-transcript-format.md:
--------------------------------------------------------------------------------
 1 | # DPE transcript format
 2 | 
 3 | DPE, stands for Digital Paper Edit, named after [digital-paper-edit](https://github.com/pietrop/digital-paper-edit-client) project. Also known as [autoEdit3](https://www.autoEdit.io)
 4 | 
 5 | > An application to make it faster, easier and more accessible to edit audio and video interviews using automatically generated transcriptions form STT service.
 6 | > The current representation of a transcription is a list of timed word objects and one of speakers.
 7 | 
 8 | ```js
 9 | {
10 |   "words": [
11 |     {
12 |       "end": 0.46, // in seconds
13 |       "start": 0,
14 |       "text": "Hello"
15 |     },
16 |     {
17 |       "end": 1.02,
18 |       "start": 0.46,
19 |       "text": "World"
20 |     },
21 |     ...
22 |     ]
23 |     "paragraphs": [
24 |     {
25 |       "speaker": "SPEAKER_A",
26 |       "start": 0,
27 |       "end": 3
28 |     },
29 |     {
30 |       "speaker": "SPEAKER_B",
31 |       "start": 3,
32 |       "end": 19.2
33 |     },
34 |     ...
35 |     ]
36 |  }
37 | ```
38 | 
39 | Having paragraphs and words separate as a way of modelling this domain has proven extremly flexible for situation where you need to run alignment on the whole text or just parts of it.
40 | 
41 | ## Generating paragraphs
42 | 
43 | Paragraphs are generally generated by the Speech To Text service speaker diarization information. Or when this is not available they can generated via punctuation (`.`|`?` |`!`) that might be present in the words.
44 | 
45 | <details>
46 |   <summary>See these STT adapters for examples of it can be generated</summary>
47 | 
48 | - AssemblyAI [`assemblyai-to-dpe`](https://github.com/pietrop/assemblyai-to-dpe)
49 | - AWS Transcriber [`aws-to-dpe`](https://github.com/pietrop/aws-to-dpe)
50 | - Google STT [`gcp-to-dpe`](https://github.com/pietrop/gcp-to-dpe)
51 | - IBM Watson STT (in PR [pietrop/digital-paper-edit-electron#52](https://github.com/pietrop/digital-paper-edit-electron/pull/52) module [`ibmwatson-to-dpe`](https://github.com/pietrop/digital-paper-edit-electron/pull/52/files#diff-fc121f3f4370613b5ddb6d5a3ef0a7bff5307f74684e0b482185d1a4572add06) but not extracted as separate module npm/github repo)
52 | - ~Speechmatics~ (There's a [`speechmatics-to-dpe`](https://github.com/pietrop/digital-paper-edit-electron/tree/master/src/ElectronWrapper/lib/transcriber/speechmatics/speechmatics-to-dpe) module but not extracted as a separate npm/github repo/module - [since speechmatics web portal API deprecation notice](https://www.speechmatics.com/transcription-web-portal-deprecation-notice/))
53 | 
54 | There's helper functions such as [dpe-add-words-to-paragraphs.sj](https://gist.github.com/pietrop/36b80bfaacf829a07ad24c9d943cdd2f) you can write to interpolate the paragraphs back with the words of `getWordsForParagraph` used in `slate-transcript-editor` - `dpe-to-slate` "import" adapter.
55 | 
56 | </details>
57 | 
58 | ## interpolating paragraphs
59 | 
60 | ```js
61 | /**
62 |  *
63 |  * @param {*} currentParagraph a dpe paragraph object, with start, and end attribute eg in seconds
64 |  * @param {*} words a list of word objects with start and end attributes
65 |  * @returns a lsit of words obejcts that are included in the given paragraphs
66 |  */
67 | const getWordsForParagraph = (currentParagraph, words) => {
68 |   const { start, end } = currentParagraph;
69 |   return words.filter((word) => {
70 |     return word.start >= start && word.end <= end;
71 |   });
72 | };
73 | export default getWordsForParagraph;
74 | ```
75 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/qa_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: QA Report
 3 | about: Create a QA report to audit the app before a major release
 4 | title: '[QA] Main checklist'
 5 | labels: QA Report
 6 | assignees:
 7 | ---
 8 | 
 9 | ## QA Report
10 | 
11 | <!-- See [QA](/docs/QA/index.md) instructions in docs for more info on the process. -->
12 | 
13 | _If you run into issues with any of the individual items, raise a separate issue for each as a [QA Report - individual issue](https://github.com/newscorp-ghfb/dj-tools-transcribe/issues/new?assignees=&labels=QA%20Issue&template=qa_individual_issue_report.md&title=[QA]%20Issue%20#1.1%20Can%20edit%20the%0text). Write a note of the item numnber, and "title" in the issue title and description._
14 | 
15 | <!-- Top tip: view the text of this issue in "preview" mode to make it easier to click through the checkboxes -->
16 | 
17 | ## QA ENV
18 | 
19 | Make a note of which ENV you are doing this QA in, eg local storybook, storybook on github pages, or some other react enviroment.
20 | 
21 | - [ ] storybook local
22 | - [ ] [storybook in github pages](https://pietropassarelli.com/slate-transcript-editor/?path=/story/slatetranscripteditor--demo)
23 | - [ ] some other React CRA Enviroment
24 | - [ ] Some other React NextJS enviroment
25 | - [ ] Some other React enviroment
26 | 
27 | ## 1.QA transcript editor
28 | 
29 | - [ ] 1.1 Can edit the text
30 | - [ ] 1.2 Double click on words
31 | - [ ] 1.3 Split paragraph
32 | - [ ] 1.4 Confirm that enter beginning and end of paragraphs does not work by design
33 | - [ ] 1.5 Delete at beginning of paragraphs to merge two paragraphs --> Expect timecodes adjustment + speaker merge.
34 | - [ ] 1.6 Same/similar got paragraph split
35 | - [ ] 1.7 Unable to enter delete or enter with select ok across multiple paragraphs —> select ok collapsed
36 | - [ ] 1.8 Unable to hit enter in the middle of a word
37 | - [ ] 1.9 Can edit, add, delete replace text, --> And then click align Or click save
38 | - [ ] 1.10 Test pause while typing feature
39 | 
40 | ## 2.Test export
41 | 
42 | - [ ] 2.1 All file format**s** + inspect content. Check matches expected.
43 |   - [ ] **Text Export options**
44 |     - [ ] Text (<code>.txt</code>)
45 |     - [ ] Text (Speakers)
46 |     - [ ] Text (Timecodes)
47 |     - [ ] Text (Speakers & Timecodes)
48 |     - [ ] Text (Atlas format)
49 |     - [ ] Word (<code>.docx</code>)
50 |     - [ ] Word (Speakers)
51 |     - [ ] Word (Timecodes)
52 |     - [ ] Word (Speakers & Timecodes)
53 |     - [ ] Word (OHMS)
54 |   - [ ] **Closed Captions Export**
55 |     - [ ] Srt (`.srt`)
56 |     - [ ] VTT (`.vtt`)
57 |     - [ ] VTT with speakers (`.vtt`)
58 |     - [ ] VTT with speakers and paragraphs (`.vtt`)
59 |     - [ ] iTT (`.itt`)
60 |     - [ ] TTML (`.ttml`)
61 |     - [ ] TTML for Adobe Premiere (`.ttml`)
62 |     - [ ] CSV (`.csv`)
63 |     - [ ] Pre segmented txt (`.txt`)
64 |     - [ ] Json (`.json`)
65 |   - [ ] **Developer options**
66 |     - [ ] SlateJs (<code>.json</code>)
67 |     - [ ] DPE (<code>.json</code>)
68 | 
69 | ## 3.Buttons
70 | 
71 | - [ ] 3.1 Side btns test, insert special symbols etc...
72 |   - [ ] 3.1.1 Save btn
73 |   - [ ] 3.1.2 Insert paragraph break btn
74 |   - [ ] 3.1.3 insert `[INAUDIBLE]` in text btn
75 |   - [ ] 3.1.4 Insert music note 🎵 in text btn
76 |   - [ ] 3.1.5 Pause while typing ⏸️ btn
77 |   - [ ] 3.1.6 Restore timecodes ♻ btn
78 |   - [ ] 3.1.7 Replace whole text ↑↓ btn
79 |   - [ ] 3.1.8 Info ❓ btn
80 | - [ ] 3.2 Playback speed `x` btn
81 | 
82 | ## 4.Other transcript functionalities
83 | 
84 | - [ ] 4.1 Double click on word jumps to that point
85 | - [ ] 4.2 Single click on time codes jump to that point
86 | - [ ] 4.3 Click on paragraph to change One speaker
87 | - [ ] 4.4 Click on paragraph to change all matching s speakers
88 | 
89 | ## 5.Other
90 | 
91 | - [ ] 5.1 Mobile test?
92 | - [ ] 5.2 Done?
93 | 
94 | <!-- Anything else noteworthy, eg things you noticed that are either bugs or not quiet right, outside of the steps above?  -->
95 | 
96 | _NA_
97 | 


--------------------------------------------------------------------------------
/src/components/6-CustomTheme.stories.js:
--------------------------------------------------------------------------------
  1 | import React from 'react';
  2 | import { action } from '@storybook/addon-actions';
  3 | import { withKnobs, text, boolean, number, object, select } from '@storybook/addon-knobs';
  4 | import { withInfo } from '@storybook/addon-info';
  5 | import { version } from '../../package.json';
  6 | 
  7 | import { createMuiTheme, ThemeProvider } from '@material-ui/core/styles';
  8 | import { lightBlue, red, orange, deepOrange } from '@material-ui/core/colors';
  9 | // import purple from '@material-ui/core/colors/purple';
 10 | 
 11 | import SlateTranscriptEditor from './index.js';
 12 | import 'fontsource-roboto';
 13 | 
 14 | export default {
 15 |   title: 'Custom Theme',
 16 |   component: SlateTranscriptEditor,
 17 |   decorators: [withKnobs, withInfo],
 18 |   parameters: {
 19 |     info: {
 20 |       maxPropArrayLength: 3,
 21 |       maxPropsIntoLine: 3,
 22 |       maxPropObjectKeys: 1,
 23 |       excludedPropTypes: ['transcriptData'],
 24 |       source: false,
 25 |     },
 26 |   },
 27 | };
 28 | 
 29 | const DEMO_MEDIA_URL_SOLEIO =
 30 |   'https://digital-paper-edit-demo.s3.eu-west-2.amazonaws.com/PBS-Frontline/The+Facebook+Dilemma+-+interviews/The+Facebook+Dilemma+-+Soleio+Cuervo-OIAUfZBd_7w.mp4';
 31 | const DEMO_TITLE_SOLEIO = 'Soleio Interview, PBS Frontline';
 32 | import DEMO_SOLEIO from '../sample-data/soleio-dpe.json';
 33 | 
 34 | export const CustomTheme = () => {
 35 |   const theme = createMuiTheme({
 36 |     palette: {
 37 |       background: {
 38 |         // paper: '#424242',
 39 |         // default: '#303030',
 40 |       },
 41 |       primary: {
 42 |         main: lightBlue['500'],
 43 |       },
 44 |       secondary: {
 45 |         main: red['500'],
 46 |       },
 47 |     },
 48 |   });
 49 |   return (
 50 |     <>
 51 |       <p>
 52 |         Slate Transcript Editor version: <code>{version}</code>
 53 |       </p>
 54 |       <ThemeProvider theme={theme}>
 55 |         <SlateTranscriptEditor
 56 |           title={DEMO_TITLE_SOLEIO}
 57 |           mediaUrl={text('mediaUrl', DEMO_MEDIA_URL_SOLEIO)}
 58 |           handleSaveEditor={action('handleSaveEditor')}
 59 |           // handleAutoSaveChanges={action('handleAutoSaveChanges')}
 60 |           // https://www.npmjs.com/package/@storybook/addon-knobs#select
 61 |           autoSaveContentType={select('autoSaveContentType', ['digitalpaperedit', 'slate'], 'digitalpaperedit')} // digitalpaperedit or slate - digitalpaperedit, runs alignement before exporting, slate, is just the raw data.
 62 |           // transcriptData={object('transcriptData', DEMO_SOLEIO)}
 63 |           transcriptData={DEMO_SOLEIO}
 64 |         />
 65 |       </ThemeProvider>
 66 |     </>
 67 |   );
 68 | };
 69 | 
 70 | export const CustomThemeExampleTwo = () => {
 71 |   const theme = createMuiTheme({
 72 |     palette: {
 73 |       background: {
 74 |         // paper: '#424242',
 75 |         // default: '#303030',
 76 |       },
 77 |       primary: {
 78 |         main: deepOrange['900'],
 79 |       },
 80 |       secondary: {
 81 |         main: orange['900'],
 82 |       },
 83 |     },
 84 |   });
 85 |   return (
 86 |     <>
 87 |       <p>
 88 |         Slate Transcript Editor version: <code>{version}</code>
 89 |       </p>
 90 |       <ThemeProvider theme={theme}>
 91 |         <SlateTranscriptEditor
 92 |           title={DEMO_TITLE_SOLEIO}
 93 |           mediaUrl={text('mediaUrl', DEMO_MEDIA_URL_SOLEIO)}
 94 |           handleSaveEditor={action('handleSaveEditor')}
 95 |           // handleAutoSaveChanges={action('handleAutoSaveChanges')}
 96 |           // https://www.npmjs.com/package/@storybook/addon-knobs#select
 97 |           autoSaveContentType={select('autoSaveContentType', ['digitalpaperedit', 'slate'], 'digitalpaperedit')} // digitalpaperedit or slate - digitalpaperedit, runs alignement before exporting, slate, is just the raw data.
 98 |           // transcriptData={object('transcriptData', DEMO_SOLEIO)}
 99 |           transcriptData={DEMO_SOLEIO}
100 |         />
101 |       </ThemeProvider>
102 |     </>
103 |   );
104 | };
105 | 


--------------------------------------------------------------------------------
/src/util/timecode-converter/index.test.js:
--------------------------------------------------------------------------------
  1 | import { timecodeToSeconds, secondsToTimecode } from './index';
  2 | 
  3 | describe('Timecode conversion TC- convertToSeconds', () => {
  4 |   it('Should be defined', () => {
  5 |     const demoTcValue = '00:10:00:00';
  6 |     const result = timecodeToSeconds(demoTcValue);
  7 |     expect(result).toBeDefined();
  8 |   });
  9 | 
 10 |   it('Should be able to convert: hh:mm:ss:ms ', () => {
 11 |     const demoTcValue = '00:10:00:00';
 12 |     const demoExpectedResultInSeconds = 600;
 13 |     const result = timecodeToSeconds(demoTcValue);
 14 |     expect(result).toEqual(demoExpectedResultInSeconds);
 15 |   });
 16 | 
 17 |   it('Should be able to conver: mm:ss ', () => {
 18 |     const demoTcValue = '10:00';
 19 |     const demoExpectedResultInSeconds = 600;
 20 |     const result = timecodeToSeconds(demoTcValue);
 21 |     expect(result).toEqual(demoExpectedResultInSeconds);
 22 |   });
 23 | 
 24 |   it('Should be able to convert: m:ss ', () => {
 25 |     const demoTcValue = '09:00';
 26 |     const demoExpectedResultInSeconds = 540;
 27 |     const result = timecodeToSeconds(demoTcValue);
 28 |     expect(result).toEqual(demoExpectedResultInSeconds);
 29 |   });
 30 | 
 31 |   it('Should be able to convert: m.ss ', () => {
 32 |     const demoTcValue = '9.01';
 33 |     const demoExpectedResultInSeconds = 541;
 34 |     const result = timecodeToSeconds(demoTcValue);
 35 |     expect(result).toEqual(demoExpectedResultInSeconds);
 36 |   });
 37 | 
 38 |   it('Should be able to convert: ss - seconds ', () => {
 39 |     const demoTcValue = 600;
 40 |     const demoExpectedResultInSeconds = 600;
 41 |     const result = timecodeToSeconds(demoTcValue);
 42 |     expect(result).toEqual(demoExpectedResultInSeconds);
 43 |   });
 44 | 
 45 |   xit('Should be able to convert: ss - seconds - eve if it is string ', () => {
 46 |     const demoTcValue = '600';
 47 |     const demoExpectedResultInSeconds = 600;
 48 |     const result = timecodeToSeconds(demoTcValue);
 49 |     expect(result).toEqual(demoExpectedResultInSeconds);
 50 |   });
 51 | 
 52 |   it('Should be able to convert: hh:mm:ss ', () => {
 53 |     const demoTcValue = '00:10:00';
 54 |     const demoExpectedResultInSeconds = 600;
 55 |     const result = timecodeToSeconds(demoTcValue);
 56 |     expect(result).toEqual(demoExpectedResultInSeconds);
 57 |   });
 58 | 
 59 |   xit(' "sss" seconds number as string --> ss', () => {
 60 |     const demoTime = '56';
 61 |     const expectedTimecode = '56';
 62 |     const result = timecodeToSeconds(demoTime);
 63 |     expect(result).toEqual(expectedTimecode);
 64 |   });
 65 | 
 66 |   xit(' "sss" seconds number as string --> ss', () => {
 67 |     const demoTime = '116';
 68 |     const expectedTimecode = '116';
 69 |     const result = timecodeToSeconds(demoTime);
 70 |     expect(result).toEqual(expectedTimecode);
 71 |   });
 72 | 
 73 |   it('120 sec --> 120', () => {
 74 |     const demoTime = 120;
 75 |     const expectedTimecode = 120;
 76 |     const result = timecodeToSeconds(demoTime);
 77 |     expect(result).toEqual(expectedTimecode);
 78 |   });
 79 | 
 80 |   //   xit('Should be able to convert: hh:mm:ss,ms ', ( )=> {
 81 |   //     const demoTcValue = '00:10:00,00';
 82 |   //     const demoExpectedResultInSeconds = 600;
 83 |   //     const result = convertToSeconds(demoTcValue);
 84 |   //     expect(result).toEqual(demoExpectedResultInSeconds);
 85 |   //   })
 86 | 
 87 |   //   xit('Should be able to convert hh:mm:ss;ms ', ( )=> {
 88 |   //     const demoTcValue = '00:10:00;00';
 89 |   //     const demoExpectedResultInSeconds = 600;
 90 |   //     const result = convertToSeconds(demoTcValue);
 91 |   //     expect(result).toEqual(demoExpectedResultInSeconds);
 92 |   //   })
 93 | 
 94 |   //   xit('Should be able to convert hh:mm:ss.ms ', ( )=> {
 95 |   //     const demoTcValue = '00:10:00.00';
 96 |   //     const demoExpectedResultInSeconds = 600;
 97 |   //     const result = convertToSeconds(demoTcValue);
 98 |   //     expect(result).toEqual(demoExpectedResultInSeconds);
 99 |   //   })
100 | });
101 | 
102 | describe('Timecode conversion seconds to - convertToTimecode ', () => {
103 |   it('Should be able to seconds to timecode hh:mm:ss:ms ', () => {
104 |     const demoSeconds = 600;
105 |     const demoExpectedResultInTc = '00:10:00:00';
106 |     const result = secondsToTimecode(demoSeconds);
107 |     expect(result).toEqual(demoExpectedResultInTc);
108 |   });
109 | 
110 |   it('Should be able to seconds - string to timecode hh:mm:ss:ms ', () => {
111 |     const demoSeconds = '600';
112 |     const demoExpectedResultInTc = '00:10:00:00';
113 |     const result = secondsToTimecode(demoSeconds);
114 |     expect(result).toEqual(demoExpectedResultInTc);
115 |   });
116 | });
117 | 


--------------------------------------------------------------------------------
/src/components/slate-helpers/handle-delete-in-paragraph/index.js:
--------------------------------------------------------------------------------
  1 | import isSameBlock from '../handle-split-paragraph/is-same-block';
  2 | import isBeginningOftheBlock from '../handle-split-paragraph/is-beginning-of-the-block';
  3 | import isSelectionCollapsed from '../handle-split-paragraph/is-selection-collapsed';
  4 | import { isTextAndWordsListChanged, alignBlock } from '../../../util/export-adapters/slate-to-dpe/update-timestamps/update-bloocks-timestamps';
  5 | import SlateHelpers from '../index';
  6 | 
  7 | /**
  8 |  *
  9 |  * @return {boolean} - to signal if it was suscesfull at splitting to a parent function
 10 |  */
 11 | // TODO: refacto clean up to make more legibl
 12 | function handleDeleteInParagraph({ editor, event }) {
 13 |   const { anchor, focus } = editor.selection;
 14 | 
 15 |   const { offset: anchorOffset, path: anchorPath } = anchor;
 16 |   const { offset: focusOffset, path: focusPath } = focus;
 17 | 
 18 |   if (isSameBlock(anchorPath, focusPath)) {
 19 |     if (isBeginningOftheBlock(anchorOffset, focusOffset)) {
 20 |       event.preventDefault();
 21 |       console.info('in the same block, but at the beginning of a paragraph for now you are not allowed to create an empty new line');
 22 |       const [blockNode, path] = SlateHelpers.getClosestBlock(editor);
 23 |       const currentBlockNode = blockNode;
 24 |       const currentBlockNumber = path[0];
 25 |       if (currentBlockNumber === 0) {
 26 |         return false;
 27 |       }
 28 | 
 29 |       const previousBlockNumber = currentBlockNumber - 1;
 30 |       const previousBlock = SlateHelpers.getNodebyPath({
 31 |         editor,
 32 |         path: [previousBlockNumber],
 33 |       });
 34 | 
 35 |       const previousBlockEndOffset = previousBlock.children[0].text.length;
 36 |       const previousBlockText = previousBlock.children[0].text;
 37 |       const previousBlockWordsList = previousBlock.children[0].words;
 38 |       let currentBlockText = currentBlockNode.children[0].text;
 39 |       let currentBlockWordsList = currentBlockNode.children[0].words;
 40 |       // if the word have changed. then re-align paragraph before splitting.
 41 |       // TODO: this needs re-thinking if there's other re-alignment happening
 42 |       // eg on key down debounce
 43 |       if (isTextAndWordsListChanged({ text: currentBlockText, words: currentBlockWordsList })) {
 44 |         const currentBlockNodeAligned = alignBlock({
 45 |           block: currentBlockNode,
 46 |           text: currentBlockText,
 47 |           words: currentBlockWordsList,
 48 |         });
 49 |         currentBlockWordsList = currentBlockNodeAligned.children[0].words;
 50 |         currentBlockText = currentBlockNodeAligned.children[0].text;
 51 |       }
 52 | 
 53 |       const newText = previousBlockText + ' ' + currentBlockText;
 54 |       const newWords = [...previousBlockWordsList, ...currentBlockWordsList];
 55 | 
 56 |       const range = {
 57 |         anchor: {
 58 |           path: [currentBlockNumber, 0],
 59 |           offset: 0,
 60 |         },
 61 |         focus: {
 62 |           path: [previousBlockNumber, 0],
 63 |           offset: previousBlockEndOffset,
 64 |         },
 65 |       };
 66 | 
 67 |       const options = {
 68 |         at: range,
 69 |         mode: 'highest',
 70 |       };
 71 |       //   const startTimeSecondParagraph = wordsAfter[0].start;
 72 |       const { speaker, start, previousTimings, startTimecode } = currentBlockNode;
 73 |       const newBlockParagraph = SlateHelpers.createNewParagraphBlock({
 74 |         speaker,
 75 |         start,
 76 |         previousTimings,
 77 |         startTimecode,
 78 |         text: newText,
 79 |         words: newWords,
 80 |       });
 81 | 
 82 |       SlateHelpers.removeNodes({ editor, options });
 83 | 
 84 |       const options2 = {
 85 |         at: [previousBlockNumber],
 86 |         mode: 'highest',
 87 |       };
 88 |       SlateHelpers.insertNodesAtSelection({
 89 |         editor,
 90 |         blocks: [newBlockParagraph],
 91 |         moveSelection: false,
 92 |         options: options2,
 93 |       });
 94 | 
 95 |       // move the selection to in the "middle" of the new paragraph where the text of the two is joined.s
 96 |       const newOffset = previousBlockText.length;
 97 |       const nextPoint = { offset: newOffset, path: [previousBlockNumber, 0] };
 98 |       SlateHelpers.setSelection({ editor, nextPoint });
 99 |       return true;
100 |     }
101 |     if (isSelectionCollapsed(anchorOffset, focusOffset)) {
102 |       //  In same block but with selection collapsed
103 |       // event.preventDefault();
104 |       return false;
105 |     } else {
106 |       // In same block but with wide selection
107 |       //   event.preventDefault();
108 |       return false;
109 |     }
110 |   } else {
111 |     event.preventDefault();
112 |     console.info('in different block, not handling this use case for now, and collapsing the selection instead');
113 |     SlateHelpers.collapseSelectionToAsinglePoint(editor);
114 |     return false;
115 |   }
116 | }
117 | 
118 | export default handleDeleteInParagraph;
119 | 


--------------------------------------------------------------------------------
/docs/guides/features-list.md:
--------------------------------------------------------------------------------
  1 | # Features List - draft
  2 | 
  3 | ## Player controls
  4 | 
  5 | - [x] play/pause
  6 | - [x] Current time + duration display
  7 | - [x] Adjust Playback rate
  8 | - [x] auto pause while typing
  9 | - [x] Roll back button ~15 sec~ 10 sec default, ~customizable amount~
 10 | 
 11 | _Currently not in scope_
 12 | 
 13 | - [ ] ~Adjust timecodes <— set a timecode offset - default to zero~
 14 | - [ ] ~UI Turn off video preview (toggle on/off)~
 15 | - [ ] ~Jump to timecode <— in timecode `hh:mm:ss:ms` format or (hh:mm:ss:ms hh:mm:ss mm:ss m:ss m.ss seconds)~
 16 | 
 17 | ## ~Keyboard Shortcuts~
 18 | 
 19 | _Currently not in scope_
 20 | 
 21 | - [ ] ~Keyboard Shortcuts~
 22 | - [ ] ~customizable Keyboard Shortcuts~
 23 | 
 24 | ## HyperTranscript - interactivity
 25 | 
 26 | - [x] On text word double click at timecode -> media current time set to word timecode
 27 | - [x] Paragraphs highlighted at current time
 28 | - [ ] Words highlighted at current time <—-
 29 | - [x] Preserve timecodes via seperatee sync btn for now (🔁)
 30 | - [ ] Preserve timecodes while editing (eg via debounce function? ) `<--` TBC
 31 | 
 32 | _Currently not in scope_
 33 | 
 34 | - [ ] ~Scroll Sync, keep current word in view <— (toggle on/off)~
 35 | 
 36 | ## Transcript Extra Info
 37 | 
 38 | - [x] Display Timecodes at paragraph level ~(with offset if present)~
 39 | - [x] Display editable speaker names at paragraph level - speaker diarization info
 40 | 
 41 | ## Save
 42 | 
 43 | - [ ] Save btn - triggers save callback for parent component to decide what to dp
 44 | - [ ] auto save (without effecting performance, eg when user stops typing ) `<--` TBC
 45 | - [ ] ~Save locally - (local storage)~
 46 | - [ ] ~Save locally - on interval, eg every `x` char~
 47 | - [ ] ~Save to server API end point - Btc~
 48 | - [ ] ~Save to server API end point - on interval~
 49 | 
 50 | ## Import
 51 | 
 52 | - [x] option to import accurate text to replace STT one and transpose timecodes (`↑↓`)
 53 | 
 54 | ## Export
 55 | 
 56 | ### Text/Word
 57 | 
 58 | - [x] Export plain text - without speaker names or timecodes
 59 | - [x] Customizable Export plain text, eg with timecodes, speakers names etc..
 60 |   - [x] text only
 61 |   - [x] with speaker names
 62 |   - [x] with timecodes
 63 |   - [x] with timecodes & speaker names
 64 | - [x] Plain text ([Atlas format](https://atlasti.com))
 65 | - [x] Export word document `.docx`)
 66 |   - [x] text only
 67 |   - [x] with speaker names
 68 |   - [x] with timecodes
 69 |   - [x] with timecodes & speaker names
 70 | - [x] Word document ([OHMS](./notes/OHMS.md))
 71 | 
 72 | ### Captions/Subtitles
 73 | 
 74 | - [x] SRT
 75 | - [x] VTT
 76 |   - [x] VTT (with speakers)
 77 |   - [x] VTT (with speakers & preserving paragraph breaks)
 78 | - [x] CSV
 79 | - [x] iTT
 80 | - [x] TTML (Adobe Premiere)
 81 | - [x] Json
 82 | - [x] presegmented text
 83 | - [ ] ~IIIF~
 84 | - [ ] ~SMT and/or CTM ?~<!-- SCLite -->
 85 | 
 86 | ### Dev export options
 87 | 
 88 | - [x] dpe json ([see here for more details on format](./guides/dpe-transcript-format.md))
 89 | - [x] slateJs json
 90 | 
 91 | ## Mobile First
 92 | 
 93 | - [x] Works on mobile
 94 | 
 95 | Browser compatibility
 96 | 
 97 | - [x] Works on Chrome / Brave
 98 | - [ ] Firefox
 99 | - [ ] ~Windows Explorer IE~
100 | 
101 | ## Dev - STT Adapters
102 | 
103 | Import Transcript Json as
104 | 
105 | - [x] dpe json ([see here for more details on dpe format](./guides/dpe-transcript-format.md))
106 | 
107 | _Input only available in one format, but external modules adapters available for other formats to convert to dpe json_
108 | 
109 | Current Separate adpaters modules available:
110 | 
111 | - [x] AssemblyAI [`assemblyai-to-dpe`](https://github.com/pietrop/assemblyai-to-dpe)
112 | - [x] AWS Transcriber [`aws-to-dpe`](https://github.com/pietrop/aws-to-dpe)
113 | - [x] Google STT [`gcp-to-dpe`](https://github.com/pietrop/gcp-to-dpe)
114 | - [ ] IBM Watson STT (in PR [pietrop/digital-paper-edit-electron#52](https://github.com/pietrop/digital-paper-edit-electron/pull/52) module [`ibmwatson-to-dpe`](https://github.com/pietrop/digital-paper-edit-electron/pull/52/files#diff-fc121f3f4370613b5ddb6d5a3ef0a7bff5307f74684e0b482185d1a4572add06) but not extracted as separate module npm/github repo)
115 | 
116 | ### Not in scope
117 | 
118 | - [ ] ~Speechmatics~ (There's a [`speechmatics-to-dpe`](https://github.com/pietrop/digital-paper-edit-electron/tree/master/src/ElectronWrapper/lib/transcriber/speechmatics/speechmatics-to-dpe) module but not extracted as a separate npm/github repo/module - [since speechmatics web portal API deprecation notice](https://www.speechmatics.com/transcription-web-portal-deprecation-notice/))
119 | - [ ] ~BBC Kaldi~
120 | - [ ] ~News Labs API - BBC Kaldi~
121 | - [ ] ~[autoEdit 2](https://opennewslabs.github.io/autoEdit_2/)~
122 | - [ ] ~Gentle Transcription~
123 | - [ ] ~Gentle Alignment Json~
124 | - [ ] ~Rev~
125 | - [ ] ~3play Media Json~
126 | 
127 | _If you are interest in an adapter that is currently not avaialble or you made one that could be useful for the community [feel free to raise an issue](https://github.com/pietrop/digital-paper-edit-electron/issues/new?assignees=pietrop&labels=enhancement&template=feature_request.md&title=New%20adapter)_
128 | 


--------------------------------------------------------------------------------
/src/components/slate-helpers/handle-split-paragraph/index.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * handles splitting a paragraph, as well as associated block paragraph data
  3 |  * such as word timecodes, previous times,
  4 |  * and adjusting start time for the paragraph etc..
  5 |  */
  6 | // import getClosestBlock from '../get-closest-block';
  7 | import isSameBlock from './is-same-block';
  8 | import isBeginningOftheBlock from './is-beginning-of-the-block.js';
  9 | import isEndOftheBlock from './is-end-of-the-block.js';
 10 | import isSelectionCollapsed from './is-selection-collapsed';
 11 | import splitTextAtOffset from './split-text-at-offset';
 12 | import splitWordsListAtOffset from './split-words-list-at-offset';
 13 | import countWords from '../../../util/count-words';
 14 | import SlateHelpers from '../index';
 15 | import isTextSameAsWordsList from './is-text-same-as-words-list';
 16 | import { isTextAndWordsListChanged, alignBlock } from '../../../util/export-adapters/slate-to-dpe/update-timestamps/update-bloocks-timestamps';
 17 | 
 18 | /**
 19 |  *
 20 |  * @param {*} editor slate editor
 21 |  * @return {boolean} - to signal if it was suscesfull at splitting to a parent function
 22 |  */
 23 | function handleSplitParagraph(editor) {
 24 |   // get char offset
 25 |   const { anchor, focus } = editor.selection;
 26 |   const { offset: anchorOffset, path: anchorPath } = anchor;
 27 |   const { offset: focusOffset, path: focusPath } = focus;
 28 | 
 29 |   if (isSameBlock(anchorPath, focusPath)) {
 30 |     if (isBeginningOftheBlock(anchorOffset, focusOffset)) {
 31 |       console.info('in the same block, but at the beginning of a paragraph for now you are not allowed to create an empty new line');
 32 |       return false;
 33 |     }
 34 | 
 35 |     if (isSelectionCollapsed(anchorOffset, focusOffset)) {
 36 |       // get current block
 37 |       const [blockNode, path] = SlateHelpers.getClosestBlock(editor);
 38 |       const currentBlockNode = blockNode;
 39 |       // split into two blocks
 40 |       let currentBlockWords = currentBlockNode.children[0].words;
 41 |       let text = currentBlockNode.children[0].text;
 42 | 
 43 |       if (isEndOftheBlock({ anchorOffset, focusOffset, totlaChar: text.split('').length })) {
 44 |         console.info('in the same block, but at the end of a paragraph for now you are not allowed to create an empty new line');
 45 |         return false;
 46 |       }
 47 | 
 48 |       // if the word have changed. then re-align paragraph before splitting.
 49 |       // TODO: this needs re-thinking if there's other re-alignment happening
 50 |       // eg on key down debounce
 51 |       if (isTextAndWordsListChanged({ text, words: currentBlockWords })) {
 52 |         const currentBlockNodeAligned = alignBlock({ block: currentBlockNode, text, words: currentBlockWords });
 53 |         currentBlockWords = currentBlockNodeAligned.children[0].words;
 54 |         text = currentBlockNodeAligned.children[0].text;
 55 |       }
 56 |       // split text in
 57 |       const [textBefore, textAfter] = splitTextAtOffset(text, anchorOffset);
 58 |       // also split words list
 59 |       // TODO: edge case splitting in the middle of a word eg find a way to prevent that for now? or is not a problem?
 60 |       const numberOfWordsBefore = countWords(textBefore);
 61 |       const [wordsBefore, wordsAfter] = splitWordsListAtOffset(currentBlockWords, numberOfWordsBefore);
 62 |       // if cursor in the middle of a word then move cursor to space just before
 63 | 
 64 |       const isCaretInMiddleOfAword = isTextSameAsWordsList(textBefore, wordsBefore);
 65 |       if (isCaretInMiddleOfAword) {
 66 |         return false;
 67 |       }
 68 |       // get start time of first block
 69 |       const { speaker, start } = currentBlockNode;
 70 |       // adjust previousTimings
 71 |       const blockParagraphBefore = SlateHelpers.createNewParagraphBlock({
 72 |         speaker,
 73 |         start,
 74 |         text: textBefore,
 75 |         words: wordsBefore,
 76 |       });
 77 |       // adjust start time (start and startTimecode) of second block, which is start time of second lsit of words
 78 |       const startTimeSecondParagraph = wordsAfter[0].start;
 79 |       const blockParagraphAfter = SlateHelpers.createNewParagraphBlock({
 80 |         speaker,
 81 |         start: startTimeSecondParagraph,
 82 |         text: textAfter,
 83 |         words: wordsAfter,
 84 |       });
 85 | 
 86 |       //delete original block
 87 |       SlateHelpers.removeNodes({ editor });
 88 |       // insert these two blocks
 89 |       SlateHelpers.insertNodesAtSelection({
 90 |         editor,
 91 |         blocks: [blockParagraphBefore, blockParagraphAfter],
 92 |         moveSelection: true,
 93 |       });
 94 |       return true;
 95 |     } else {
 96 |       console.info('in same block but with wide selection, not handling this use case for now, and collapsing the selection instead');
 97 |       SlateHelpers.collapseSelectionToAsinglePoint(editor);
 98 |       return false;
 99 |     }
100 |   } else {
101 |     console.info('in different block, not handling this use case for now, and collapsing the selection instead');
102 |     SlateHelpers.collapseSelectionToAsinglePoint(editor);
103 |     return false;
104 |   }
105 | }
106 | export default handleSplitParagraph;
107 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/index.js:
--------------------------------------------------------------------------------
  1 | import formatSeconds from './compose-subtitles/util/format-seconds.js';
  2 | import textSegmentation from './presegment-text/text-segmentation/index.js';
  3 | import addLineBreakBetweenSentences from './presegment-text/line-break-between-sentences/index.js';
  4 | import foldWords from './presegment-text/fold/index.js';
  5 | import divideIntoTwoLines from './presegment-text/divide-into-two-lines/index.js';
  6 | import preSegmentText from './presegment-text/index.js';
  7 | import { getTextFromWordsList } from './presegment-text/index.js';
  8 | 
  9 | import ttmlGeneratorPremiere from './compose-subtitles/premiere.js';
 10 | import ittGenerator from './compose-subtitles/itt.js';
 11 | import ttmlGenerator from './compose-subtitles/ttml.js';
 12 | import srtGenerator from './compose-subtitles/srt.js';
 13 | import vttGenerator from './compose-subtitles/vtt.js';
 14 | import csvGenerator from './compose-subtitles/csv/index.js';
 15 | import countWords from '../../count-words';
 16 | 
 17 | function segmentedTextToList(text) {
 18 |   let result = text.split('\n\n');
 19 |   result = result.map((line) => {
 20 |     return line.trim();
 21 |   });
 22 | 
 23 |   return result;
 24 | }
 25 | 
 26 | function addTimecodesToLines(wordsList, paragraphs, lines) {
 27 |   // console.log('addTimecodesToLines', wordsList, paragraphs, lines);
 28 |   wordsList = wordsList.filter((w) => {
 29 |     // console.log('addTimecodesToLines w', w);
 30 |     return w.text && w.text.length > 0;
 31 |   });
 32 |   let startWordCounter = 0;
 33 |   let endWordCounter = 0;
 34 |   console.log('lines', lines);
 35 |   const results = lines
 36 |     .filter((l) => {
 37 |       return l;
 38 |     })
 39 |     .map((line) => {
 40 |       endWordCounter += countWords(line);
 41 |       const jsonLine = { text: line.trim() };
 42 |       jsonLine.start = wordsList[startWordCounter].start;
 43 |       jsonLine.end = wordsList[endWordCounter - 1].end;
 44 |       // #-----------------|------|-----------------#
 45 |       const possibleParagraphs = paragraphs
 46 |         .filter((p) => jsonLine.start >= p.start && jsonLine.start < p.end)
 47 |         .map((p) => {
 48 |           const inParagraphEndTime = Math.min(jsonLine.end, p.end);
 49 |           const inParagraphDuration = inParagraphEndTime - jsonLine.start;
 50 | 
 51 |           const totalDuration = jsonLine.end - jsonLine.start;
 52 |           const pctInParagraph = inParagraphDuration / totalDuration;
 53 | 
 54 |           return {
 55 |             ...p,
 56 |             pctInParagraph,
 57 |           };
 58 |         })
 59 |         .sort((a, b) => b.pctInParagraph - a.pctInParagraph || a.start - b.start); // sort by % in paragraph descending, then start time ascending
 60 |       jsonLine.speaker = possibleParagraphs.length > 0 ? possibleParagraphs[0].speaker : 'UNKNOWN';
 61 |       startWordCounter = endWordCounter;
 62 | 
 63 |       return jsonLine;
 64 |     });
 65 | 
 66 |   return results;
 67 | }
 68 | 
 69 | function convertSlateValueToSubtitleJson(slateValue) {
 70 |   // there shouldn't be empty blocks in the slateJs content value
 71 |   // but adding a filter here to double check just in cases
 72 |   return slateValue
 73 |     .filter((block) => {
 74 |       return block;
 75 |     })
 76 |     .map((block) => {
 77 |       return {
 78 |         start: block.start,
 79 |         end: block.children[0].words[block.children[0].words.length - 1].end,
 80 |         speaker: block.speaker,
 81 |         text: block.children[0].text,
 82 |       };
 83 |     });
 84 | }
 85 | 
 86 | function preSegmentTextJson({ wordsList, paragraphs, numberOfCharPerLine }) {
 87 |   const result = preSegmentText(wordsList, numberOfCharPerLine);
 88 |   const segmentedTextArray = segmentedTextToList(result);
 89 |   return addTimecodesToLines(wordsList, paragraphs, segmentedTextArray);
 90 | }
 91 | 
 92 | function subtitlesComposer({ words, paragraphs, type, numberOfCharPerLine, slateValue }) {
 93 |   let subtitlesJson;
 94 |   if (type === 'vtt_speakers_paragraphs') {
 95 |     subtitlesJson = convertSlateValueToSubtitleJson(slateValue);
 96 |   } else {
 97 |     subtitlesJson = preSegmentTextJson({
 98 |       wordsList: words,
 99 |       paragraphs,
100 |       numberOfCharPerLine,
101 |     });
102 |     console.log('subtitlesJson', subtitlesJson);
103 |   }
104 | 
105 |   if (typeof words === 'string') {
106 |     return preSegmentText(words, numberOfCharPerLine);
107 |   }
108 |   switch (type) {
109 |     case 'premiereTTML':
110 |       return ttmlGeneratorPremiere(subtitlesJson);
111 |     case 'ttml':
112 |       return ttmlGenerator(subtitlesJson);
113 |     case 'itt':
114 |       return ittGenerator(subtitlesJson);
115 |     case 'srt':
116 |       return srtGenerator(subtitlesJson);
117 |     case 'vtt':
118 |       return vttGenerator(subtitlesJson);
119 |     case 'vtt_speakers':
120 |       return vttGenerator(subtitlesJson, true);
121 |     case 'vtt_speakers_paragraphs':
122 |       return vttGenerator(subtitlesJson, true);
123 |     case 'json':
124 |       // converting timecodes to captions time stamps
125 |       return subtitlesJson.map((line) => {
126 |         line.start = formatSeconds(parseFloat(line.start)).replace('.', ',');
127 |         line.end = formatSeconds(parseFloat(line.end)).replace('.', ',');
128 |         return line;
129 |       });
130 |     case 'csv':
131 |       return csvGenerator(subtitlesJson);
132 |     case 'pre-segment-txt':
133 |       return preSegmentText(words, numberOfCharPerLine);
134 |     case 'txt':
135 |       return preSegmentText(words, numberOfCharPerLine);
136 |     default:
137 |       return 'Could not find the subtitle format';
138 |   }
139 | }
140 | 
141 | export {
142 |   textSegmentation,
143 |   addLineBreakBetweenSentences,
144 |   foldWords,
145 |   divideIntoTwoLines,
146 |   getTextFromWordsList,
147 |   preSegmentText,
148 |   ttmlGeneratorPremiere,
149 |   ttmlGenerator,
150 |   ittGenerator,
151 |   srtGenerator,
152 |   vttGenerator,
153 | };
154 | 
155 | export default subtitlesComposer;
156 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Code of Conduct 
 2 | 
 3 | We are committed to providing a friendly, safe and welcoming environment for all, regardless of gender, sexual orientation, disability, ethnicity, religion, or similar personal characteristic.
 4 | 
 5 | We’ve written this code of conduct not because we expect bad behaviour from our community—which, in our experience, is overwhelmingly kind and civil—but because we believe a clear code of conduct is one necessary part of building a respectful community space.
 6 | 
 7 | We are committed to providing a welcoming and inspiring community for all and expect our code of conduct to be honored. Anyone who violates this code of conduct may be banned from the community.
 8 | 
 9 | Please be kind and courteous. There's no need to be mean or rude. Respect that people have differences of opinion and that every design or implementation choice carries a trade-off and numerous costs. There is seldom a right answer, merely an optimal answer given a set of values and circumstances.
10 | 
11 | 
12 | ## Our open community strives to:
13 | 
14 | **Be friendly and patient**.
15 | 
16 | **Be considerate**: Your work will be used by other people, and you in turn will depend on the work of others. Any decision you take will affect users and colleagues, and you should take those consequences into account when making decisions. Remember that we’re a world-wide community, so you might not be communicating in someone else’s primary language.
17 | 
18 | **Be respectful**: Not all of us will agree all the time, but disagreement is no excuse for poor behaviour and poor manners. We might all experience some frustration now and then, but we cannot allow that frustration to impact others. It’s important to remember that a community where people feel uncomfortable or threatened is not a productive one. 
19 | 
20 | **Be careful in the words that we choose**: we are a community of professionals, and we conduct ourselves professionally. Be kind to others. Do not insult or put down other participants. Harassment and other exclusionary behaviour aren’t acceptable. 
21 | 
22 | **Try to understand why we disagree**: Disagreements, both social and technical, happen all the time. It is important that we resolve disagreements and differing views constructively. Remember that we’re different. The strength of our community comes from its diversity, people from a wide range of backgrounds. Different people have different perspectives on issues. Being unable to understand why someone holds a viewpoint doesn’t mean that they’re wrong. Don’t forget that it is human to err and blaming each other doesn’t get us anywhere. Instead, focus on helping to resolve issues and learning from mistakes. 
23 | 
24 | **What goes around comes a round**. We believe in open source, and are excited by what happens when people add value to each others work in a collaborative way. 
25 | 
26 | **Take care of each other**. Alert a member of the project team if you notice a dangerous situation, someone in distress, or violations of this code of conduct, even if they seem inconsequential.
27 | 
28 | If any participants engages in harassing behaviour, the project team may take any lawful action we deem appropriate, including but not limited to warning the offender or asking the offender to leave the project.
29 | 
30 | 
31 |  ## Diversity Statement
32 | 
33 |  We encourage everyone to participate and are committed to building a community for all. Although we will fail at times, we seek to treat everyone both as fairly and equally as possible. Whenever a participant has made a mistake, we expect them to take responsibility for it. If someone has been harmed or offended, it is our responsibility to listen carefully and respectfully, and do our best to right the wrong.
34 | 
35 | ## Reporting Issues
36 | 
37 |  If you experience or witness unacceptable behaviour—or have any other concerns—please report it by contacting us via **${TBC}**<!-- TODO: we need to add an email for contact  eg at textAV we had - textav@bbcnewslabs.co.uk. --> All reports will be handled with discretion. In your report please include:
38 | 
39 | - Your contact information.
40 | - Names (real, nicknames, or pseudonyms) of any individuals involved. If there are additional witnesses, please include them as well. Your account of what occurred, and if you believe the incident is ongoing. If there is a publicly available record (e.g. a mailing list archive or a public slack channel), please include a link.
41 | - Any additional information that may be helpful.
42 | 
43 | After filing a report, a representative will contact you personally, review the incident, follow up with any additional questions, and make a decision as to how to respond. If the person who is harassing you is part of the response team, they will recuse themselves from handling your incident. If the complaint originates from a member of the response team, it will be handled by a different member of the response team. We will respect confidentiality requests for the purpose of protecting victims of abuse.
44 | 
45 | ## Feedback 
46 | We welcome your feedback on this and every other aspect of this project and we thank you for working with us to make it a safe, enjoyable, and friendly experience for everyone who participates.
47 | 
48 | ## Attribution & Acknowledgements
49 | 
50 | We all stand on the shoulders of giants across many open source communities. We’d like to thank the communities and projects that established code of conducts and diversity statements as our inspiration:
51 | 
52 | - [textAV](https://sites.google.com/view/textav/conduct-london-18)
53 | - [SRCCON](https://srccon.org/conduct/)
54 | - [Django](https://www.djangoproject.com/conduct/reporting/)
55 | - [Python](https://www.python.org/community/diversity/)
56 | - [Ubuntu](https://www.ubuntu.com/community/code-of-conduct)
57 | - [Contributor Covenant](https://www.contributor-covenant.org/)
58 | - [Geek Feminism](https://geekfeminism.org/about/code-of-conduct/)
59 | - [Citizen Code of Conduct](http://citizencodeofconduct.org/)
60 | - [Gulp](https://github.com/gulpjs/gulp/blob/master/CONTRIBUTING.md)
61 | - [Open code of Conduct](https://todogroup.org/opencodeofconduct/)
62 | - ["Why You Want a Code of Conduct & How We Made One"](http://incisive.nu/2014/codes-of-conduct/)
63 | - [Facebook Open Source Code of Conduct](https://code.fb.com/codeofconduct/)


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/presegment-text/steps.md:
--------------------------------------------------------------------------------
  1 | # Steps
  2 | 
  3 | <!-- - 0.Punctuation - _work in progress_ -->
  4 | - _remove line breaks_
  5 | - 1.Text Segmentation +
  6 | - 2.Line brek between stences +
  7 | - 3.Fold char limit per line +
  8 | - 4.Divide into two lines +
  9 | - 5.Aeneas `-->` subtitl file +
 10 | 
 11 | 
 12 | ## components
 13 | 
 14 | <!-- ### ~ 0.Punctuation 
 15 | 
 16 | Add punctuation  -->
 17 | 
 18 | <!-- Punctuator 2 library  -->
 19 | 
 20 | 
 21 | ### ~ 1.Text Segmentation 
 22 | 
 23 | <!-- See module readme for more details -->
 24 | 
 25 | #### Input
 26 | 
 27 | Plain text, **with punctuation** all on one line 
 28 | 
 29 | ```
 30 | Hi there, my name is Ian police - are recording this video to talk about mercury for the folks at a tech daily conference in New York. Sorry, I can't be there in person, so we are building a prototype funded in part by Google DNI of a web-based computer, assisted transcription and translation tool with some video editing features. It does speech to text and then automated consistent translation and then text to speech generate synthetic voices at time codes that line up with the original original audio.
 31 | ```
 32 | 
 33 | #### Out 
 34 | 
 35 | Puts each sentence that ends with full stop on new line. `\n`.
 36 |  
 37 | ``` 
 38 | Hi there, my name is Ian police - are recording this video to talk about mercury for the folks at a tech daily conference in New York.
 39 | Sorry, I can't be there in person, so we are building a prototype funded in part by Google DNI of a web-based computer, assisted transcription and translation tool with some video editing features.
 40 | It does speech to text and then automated consistent translation and then text to speech generate synthetic voices at time codes that line up with the original original audio.
 41 | ```
 42 | 
 43 | #### algo 
 44 | 
 45 | [Joseph Polizzotto's perl script identify sentence boundaries sentence-boundary.pl ](https://github.com/polizoto/segment_transcript/blob/master/sentence-boundary.pl)
 46 | 
 47 | ```perl
 48 | # segment transcript into sentences
 49 | perl sentence-boundary.pl -d HONORIFICS -i "$f" -o test.txt
 50 | ```
 51 | 
 52 | list of [`HONORIFICS` here](https://github.com/polizoto/align_transcript/blob/master/HONORIFICS)
 53 | 
 54 | ---
 55 | 
 56 | ### ~  2.Line brek between stences
 57 | 
 58 | <!-- See module readme for more details -->
 59 | separates each line (a sentence) with an empty line.
 60 | <!-- Adds a line break `\n\n` in between in each stence.  -->
 61 | 
 62 | #### Input
 63 | is output of previous section 
 64 | 
 65 | 
 66 | ```
 67 | Hi there, my name is Ian police - are recording this video to talk about mercury for the folks at a tech daily conference in New York.
 68 | Sorry, I can't be there in person, so we are building a prototype funded in part by Google DNI of a web-based computer, assisted transcription and translation tool with some video editing features.
 69 | It does speech to text and then automated consistent translation and then text to speech generate synthetic voices at time codes that line up with the original original audio.
 70 | ```
 71 | 
 72 | #### Output
 73 | 
 74 | ```
 75 | Hi there, my name is Ian police - are recording this video to talk about mercury for the folks at a tech daily conference in New York.
 76 | 
 77 | Sorry, I can't be there in person, so we are building a prototype funded in part by Google DNI of a web-based computer, assisted transcription and translation tool with some video editing features.
 78 | 
 79 | It does speech to text and then automated consistent translation and then text to speech generate synthetic voices at time codes that line up with the original original audio.
 80 | ```
 81 | 
 82 | #### algo 
 83 | 
 84 | ```bash
 85 | # Add blank line after every new line
 86 | sed -e 'G' test.txt > test2.txt
 87 | ```
 88 | 
 89 | Equivalent to 
 90 | 
 91 | ```js
 92 | test.replace(/\n/g,"\n\n")
 93 | ```
 94 | 
 95 | ---
 96 | 
 97 | ### ~  3.Fold char limit per line
 98 | 
 99 | folds each line at char limit. eg 35 char. 
100 | 
101 | he 2nd line (pictured) takes each of sentences (now separated by an empty line) and places a new line mark at the end of the word that exceeds > 35 characters (if the sentence exceeds that number)
102 | 
103 | #### Input
104 | is output of previous section 
105 | 
106 | ```
107 | Hi there, my name is Ian police - are recording this video to talk about mercury for the folks at a tech daily conference in New York.
108 | 
109 | Sorry, I can't be there in person, so we are building a prototype funded in part by Google DNI of a web-based computer, assisted transcription and translation tool with some video editing features.
110 | 
111 | It does speech to text and then automated consistent translation and then text to speech generate synthetic voices at time codes that line up with the original original audio.
112 | ```
113 | 
114 | #### Output
115 | 
116 | ```
117 | 
118 | Hi there, my name is Ian police -
119 | are recording this video to talk
120 | about mercury for the folks at a
121 | tech daily conference in New York.
122 | 
123 | Sorry, I can’t be there in person,
124 | so we are building a prototype
125 | funded in part by Google DNI of a
126 | web-based computer, assisted
127 | transcription and translation tool
128 | with some video editing features.
129 | 
130 | It does speech to text and then
131 | automated consistent translation
132 | and then text to speech generate
133 | synthetic voices at time codes that
134 | line up with the original original
135 | audio.
136 | ```
137 | 
138 | #### algo
139 | 
140 | ```bash
141 | # Break each line at 35 characters
142 | fold -w 35 -s test2.txt > test3.txt
143 | ```
144 | 
145 | 
146 | <!-- See module readme for more details -->
147 | 
148 | ---
149 | 
150 | ### ~  4.Divide into two lines
151 | 
152 | Take these new chunks and separate them further so that there are no more than two consecutive lines before an empty line.
153 | 
154 | Creating block of text, with one or two consecutive lines.
155 | 
156 | Groups “paragraphs” by `\n`.
157 | 
158 | Of “paragraphs” if they are more then 1 line. 	
159 | break/add line break  `\n` every two or more line breaks.
160 | 
161 | 
162 | #### Input
163 | is output of previous section 
164 | 
165 | ```
166 | 
167 | Hi there, my name is Ian police -
168 | are recording this video to talk
169 | about mercury for the folks at a
170 | tech daily conference in New York.
171 | 
172 | Sorry, I can’t be there in person,
173 | so we are building a prototype
174 | funded in part by Google DNI of a
175 | web-based computer, assisted
176 | transcription and translation tool
177 | with some video editing features.
178 | 
179 | It does speech to text and then
180 | automated consistent translation
181 | and then text to speech generate
182 | synthetic voices at time codes that
183 | line up with the original original
184 | audio.
185 | ```
186 | 
187 | #### output
188 | 
189 | ```
190 | Hi there, my name is Ian police -
191 | are recording this video to talk
192 | 
193 | about mercury for the folks at a
194 | tech daily conference in New York.
195 | 
196 | Sorry, I can’t be there in person,
197 | so we are building a prototype
198 | 
199 | funded in part by Google DNI of a
200 | web-based computer, assisted
201 | 
202 | transcription and translation tool
203 | with some video editing features.
204 | 
205 | It does speech to text and then
206 | automated consistent translation
207 | 
208 | and then text to speech generate
209 | synthetic voices at time codes that
210 | 
211 | line up with the original original
212 | audio.
213 | ```
214 | 
215 | #### algo  
216 | 
217 | ```perl
218 | # Insert new line for every two lines, preserving paragraphs
219 | perl -00 -ple 's/.*\n.*\n/$&\n/mg' test3.txt > "$f"
220 | ```
221 | 
222 | 
223 | ---
224 | 
225 | 
226 | ### ~  5.Aeneas Node
227 | 
228 | <!-- See module readme for more details -->
229 | 
230 | Takes, plain text file, same as output of step above, and media source, audio or video, and creates captions file, srt. 
231 | 
232 | #### example 
233 | 
234 | ```bash
235 | /usr/local/bin/aeneas_execute_task "./data/2017_07_19_11_26_13-Cd56vF3lZ_Q.mp4" "./examples/blaine.srt" "task_language=eng|os_task_file_format=srt|is_text_type=subtitles|is_audio_file_head_length=0|is_audio_file_tail_length=0|task_adjust_boundary_nonspeech_min=1.000|task_adjust_boundary_nonspeech_string=REMOVE|task_adjust_boundary_algorithm=percent|task_adjust_boundary_percent_value=75|is_text_file_ignore_regex=[*]" ./examples/2017_07_19_11_26_13-Cd56vF3lZ_Q.mp4.srt
236 | ```
237 | 


--------------------------------------------------------------------------------
/docs/notes/alternative-alignment-approaches.md:
--------------------------------------------------------------------------------
  1 | # alternative alignment approaches using computation
  2 | 
  3 | Not suree if this will be useful, but thought I'd gather some approaches that use computation below
  4 | 
  5 | ## weighted average of paragraph start and end times
  6 | 
  7 | This is quiet interesting altho I don’t fully understand all of it [/src/util/export-adapters/slate-to-dpe/index.js#L43-L100](https://github.com/clowdr-app/slate-transcript-editor/blob/master/src/util/export-adapters/slate-to-dpe/index.js#L43-L100)
  8 | 
  9 | in a fork of slate-transcript-editor
 10 | 
 11 | they commented out
 12 | 
 13 | ```js
 14 | //const res = alignDiraizedText(linesWithSpeaker, sttJson);
 15 | ```
 16 | 
 17 | and added an alternative way of doing it.
 18 | 
 19 | Is this line trying to do some kind of interpolation/math to re-calculate the times of the words without “alignment”? [/src/util/export-adapters/slate-to-dpe/index.js#L57](https://github.com/clowdr-app/slate-transcript-editor/blob/master/src/util/export-adapters/slate-to-dpe/index.js#L57)
 20 | 
 21 | counting the number of words, and then spacing them based on event time between the start/end of the range
 22 | 
 23 | ```js
 24 |    start: (startTime * (nodeWords.length - idx) + endTime * idx) / nodeWords.length, // weighted average of paragraph start and end times
 25 | ```
 26 | 
 27 | in context
 28 | 
 29 | ```js
 30 | // import slateToText from '../txt';
 31 | import { Node } from 'slate';
 32 | // importing this way, coz it runs as client side code, and the module, align-diarized-text index.js contains and import to a
 33 | // helper module to generate html view, that contains and fs, and it breaks storybook webpack.
 34 | // TODO: refactor in ` align-diarized-text` so that it can work outside node only, but also in browser, without workaround
 35 | // const alignDiraizedText = require('../../../../node_modules/align-diarized-text/src/add-timecodes-to-quotes');
 36 | // const alignDiraizedText = require('align-diarized-text');
 37 | import alignDiraizedText from 'align-diarized-text';
 38 | 
 39 | // TODO: this function needs to be brough into alignDiraizedText
 40 | // and applied to paragraphs - to avoid boundaries overlapp
 41 | function adjustTimecodesBoundaries(words) {
 42 |   return words.map((word, index, arr) => {
 43 |     // excluding first element
 44 |     if (index != 0) {
 45 |       const previousWord = arr[index - 1];
 46 |       const currentWord = word;
 47 |       if (previousWord.end > currentWord.start) {
 48 |         word.start = previousWord.end;
 49 |       }
 50 | 
 51 |       return word;
 52 |     }
 53 | 
 54 |     return word;
 55 |   });
 56 | }
 57 | 
 58 | const prepSlateParagraphForAlignement = (slateData) => {
 59 |   const result = [];
 60 |   slateData.forEach((el, index) => {
 61 |     const newEl = {
 62 |       text: Node.string(el),
 63 |       // start: `${el.start}`,// workaround
 64 |       start: `${el.start}`, // workaround
 65 |       speaker: el.speaker,
 66 |       id: `${index}`,
 67 |     };
 68 |     result.push(newEl);
 69 |   });
 70 |   return result;
 71 | };
 72 | const converSlateToDpe = (data, sttJson) => {
 73 |   const linesWithSpeaker = prepSlateParagraphForAlignement(data);
 74 |   console.log('linesWithSpeaker', linesWithSpeaker);
 75 |   console.log('sttJson', sttJson);
 76 |   //const res = alignDiraizedText(linesWithSpeaker, sttJson);
 77 | 
 78 |   const res = linesWithSpeaker.map((line, idx) => {
 79 |     const startTime = parseFloat(line.start);
 80 |     const endTime = linesWithSpeaker.length > idx + 1 ? parseFloat(linesWithSpeaker[idx + 1].start) : startTime + 1;
 81 | 
 82 |     const nodeWords = line.text.split(/\s+/);
 83 |     const words = nodeWords.map((nodeWord, idx) => {
 84 |       const word = {
 85 |         start: (startTime * (nodeWords.length - idx) + endTime * idx) / nodeWords.length, // weighted average of paragraph start and end times
 86 |         end: (startTime * (nodeWords.length - (idx + 1)) + endTime * (idx + 1)) / nodeWords.length,
 87 |         text: nodeWord,
 88 |       };
 89 |       return word;
 90 |     });
 91 | 
 92 |     return {
 93 |       end: endTime,
 94 |       start: startTime,
 95 |       id: line.id,
 96 |       speaker: line.speaker,
 97 |       text: line.text,
 98 |       words,
 99 |     };
100 |   });
101 | 
102 |   console.log('res', res);
103 |   const words = res
104 |     .map((paragraph) => {
105 |       if (paragraph) {
106 |         return paragraph.words;
107 |       }
108 |     })
109 |     .flat();
110 |   const paragraphs = res
111 |     .map((paragraph) => {
112 |       if (paragraph) {
113 |         return {
114 |           speaker: paragraph.speaker,
115 |           start: parseFloat(paragraph.start),
116 |           end: parseFloat(paragraph.end),
117 |           id: paragraph.id,
118 |         };
119 |       }
120 |     })
121 |     .flat();
122 |   // without adjusting the paragraph boundaries, can't go round trip
123 |   // back to slate, coz it's not able to reliably interpolate
124 |   // words and speaker again
125 |   const paragraphsWithAdjustedBoundaries = adjustTimecodesBoundaries(paragraphs);
126 |   return { words, paragraphs: paragraphsWithAdjustedBoundaries };
127 |   //    return {words, paragraphs};
128 | };
129 | 
130 | export default converSlateToDpe;
131 | ```
132 | 
133 | That could be extracted
134 | 
135 | ```js
136 | const text = `Call me Ishmael. Some years ago—never mind how long precisely—having
137 | little or no money in my purse, and nothing particular to interest me
138 | on shore, I thought I would sail about a little and see the watery part
139 | of the world. It is a way I have of driving off the spleen and
140 | regulating the circulation. Whenever I find myself growing grim about
141 | the mouth; whenever it is a damp, drizzly November in my soul; whenever
142 | I find myself involuntarily pausing before coffin warehouses, and
143 | bringing up the rear of every funeral I meet; and especially whenever
144 | my hypos get such an upper hand of me, that it requires a strong moral
145 | principle to prevent me from deliberately stepping into the street, and
146 | methodically knocking people’s hats off—then, I account it high time to
147 | get to sea as soon as I can. This is my substitute for pistol and ball.
148 | With a philosophical flourish Cato throws himself upon his sword; I
149 | quietly take to the ship. There is nothing surprising in this. If they
150 | but knew it, almost all men in their degree, some time or other,
151 | cherish very nearly the same feelings towards the ocean with me.`;
152 | const startTime = 1.2;
153 | const endTime = 3.5;
154 | 
155 | function round(number) {
156 |   return Math.round(number * 100) / 100;
157 | }
158 | 
159 | function computeStartTime({ startTime, wordCount, index, endTime }) {
160 |   return round((startTime * (wordCount - index) + endTime * index) / wordCount);
161 | }
162 | 
163 | function computeEndTime({ startTime, wordCount, index, endTime }) {
164 |   return round((startTime * (wordCount - (index + 1)) + endTime * (index + 1)) / wordCount);
165 | }
166 | 
167 | function computeWordsTimings({ text, startTime, endTime }) {
168 |   const nodeWords = text.trim().split(/\s+/);
169 |   return nodeWords.map((nodeWord, idx) => {
170 |     return {
171 |       start: computeStartTime({ startTime, wordCount: nodeWords.length, index: idx, endTime }), //(startTime * (nodeWords.length - idx) + endTime * idx) / nodeWords.length, // weighted average of paragraph start and end times
172 |       end: computeEndTime({ startTime, wordCount: nodeWords.length, index: idx, endTime }), // (startTime * (nodeWords.length - (idx + 1)) + endTime * (idx + 1)) / nodeWords.length,
173 |       text: nodeWord,
174 |     };
175 |   });
176 | }
177 | 
178 | const wordsList = computeWordsTimings({ text, startTime, endTime });
179 | console.log(wordsList);
180 | ```
181 | 
182 | ## PopcornJs srt paragraphs/lines to word timings
183 | 
184 | Reminds me of this code originally from PopcornJs, and used by @maboa in the hyperaud.io convert to convert from srt to word level timed text
185 | [interpolateWordsTimesFromSentence.js](https://gist.github.com/pietrop/fdac1672d757ae09de5ef5abac7f8bf5) from [srtParserComposer](https://github.com/pietrop/srtParserComposer) [originally from "srt to word accurate time"](https://github.com/pietrop/srtParserComposer#srt-to-word-accurate-time)
186 | 
187 | also in ruby [srt_to_json_hypertranscript_converter.rb](https://gist.github.com/pietrop/c385b528915fc81d9cb8)
188 | 
189 | ## word time euristic based on chat count
190 | 
191 | [wordDuration.js](https://gist.github.com/pietrop/94da62c00b477c5768fb57da52395e62)
192 | 
193 | ```js
194 | // Chris Baume BBC R&D heuristic to estimate duration of a word, based on looking across a number of transcripts.
195 | // from https://github.com/chrisbaume/webaligner/blob/9458df57d854e9df64a54bc23a7f0856de49730f/webaligner.js#L7
196 | // estimates the duration of a word, in seconds
197 | 
198 | function wordDuration(word) {
199 |   return 0.08475 + 0.05379 * word.length;
200 | }
201 | ```
202 | 


--------------------------------------------------------------------------------
/src/components/1-SlateTranscriptEditor.stories.js:
--------------------------------------------------------------------------------
  1 | import React from 'react';
  2 | import { action } from '@storybook/addon-actions';
  3 | import { withKnobs, text, boolean, number, object, select } from '@storybook/addon-knobs';
  4 | import { withInfo } from '@storybook/addon-info';
  5 | import { version } from '../../package.json';
  6 | 
  7 | import Button from '@material-ui/core/Button';
  8 | import SlateTranscriptEditor from './index.js';
  9 | import 'fontsource-roboto';
 10 | 
 11 | export default {
 12 |   title: 'SlateTranscriptEditor',
 13 |   component: SlateTranscriptEditor,
 14 |   decorators: [withKnobs, withInfo],
 15 |   parameters: {
 16 |     info: {
 17 |       maxPropArrayLength: 3,
 18 |       maxPropsIntoLine: 3,
 19 |       maxPropObjectKeys: 1,
 20 |       excludedPropTypes: ['transcriptData'],
 21 |       source: false,
 22 |     },
 23 |   },
 24 | };
 25 | 
 26 | const AUDIO_URL = 'https://www.w3schools.com/tags/horse.ogg';
 27 | 
 28 | const DEMO_MEDIA_URL_KATE = 'https://download.ted.com/talks/KateDarling_2018S-950k.mp4';
 29 | const DEMO_TITLE_KATE = 'TED Talk | Kate Darling - Why we have an emotional connection to robots';
 30 | import DEMO_TRANSCRIPT_KATE from '../sample-data/KateDarling-dpe.json';
 31 | 
 32 | const DEMO_MEDIA_URL_SOLEIO =
 33 |   'https://digital-paper-edit-demo.s3.eu-west-2.amazonaws.com/PBS-Frontline/The+Facebook+Dilemma+-+interviews/The+Facebook+Dilemma+-+Soleio+Cuervo-OIAUfZBd_7w.mp4';
 34 | const DEMO_TITLE_SOLEIO = 'Soleio Interview, PBS Frontline';
 35 | import DEMO_SOLEIO from '../sample-data/soleio-dpe.json';
 36 | export const demo = () => {
 37 |   return (
 38 |     <>
 39 |       <p>
 40 |         Slate Transcript Editor version: <code>{version}</code>
 41 |       </p>
 42 |       <SlateTranscriptEditor
 43 |         mediaUrl={text('mediaUrl', DEMO_MEDIA_URL_SOLEIO)}
 44 |         handleSaveEditor={action('handleSaveEditor')}
 45 |         // handleAutoSaveChanges={action('handleAutoSaveChanges')}
 46 |         // https://www.npmjs.com/package/@storybook/addon-knobs#select
 47 |         // autoSaveContentType={select('autoSaveContentType', ['digitalpaperedit', 'slate'], 'digitalpaperedit')} // digitalpaperedit or slate - digitalpaperedit, runs alignement before exporting, slate, is just the raw data.
 48 |         // transcriptData={object('transcriptData', DEMO_SOLEIO)}
 49 |         transcriptData={DEMO_SOLEIO}
 50 |       />
 51 |     </>
 52 |   );
 53 | };
 54 | 
 55 | export const MinimamlInitialization = () => {
 56 |   return (
 57 |     <SlateTranscriptEditor
 58 |       mediaUrl={text('mediaUrl', DEMO_MEDIA_URL_SOLEIO)}
 59 |       transcriptData={DEMO_SOLEIO}
 60 |       handleSaveEditor={action('handleSaveEditor')} // optional
 61 |     />
 62 |   );
 63 | };
 64 | 
 65 | MinimamlInitialization.story = {
 66 |   parameters: {
 67 |     info: {}, // mediaUrl: true, transcriptData:true,handleSaveEditor:true
 68 |   },
 69 | };
 70 | 
 71 | export const OptionalTitle = () => {
 72 |   return (
 73 |     <SlateTranscriptEditor
 74 |       showTitle={boolean('showTitle', true)} // optional - defaults to false
 75 |       mediaUrl={text('mediaUrl', DEMO_MEDIA_URL_SOLEIO)}
 76 |       title={text('title', DEMO_TITLE_SOLEIO)}
 77 |       transcriptData={DEMO_SOLEIO}
 78 |       handleSaveEditor={action('handleSaveEditor')}
 79 |       handleAutoSaveChanges={action('handleAutoSaveChanges')}
 80 |       autoSaveContentType={select('autoSaveContentType', ['digitalpaperedit', 'slate'], 'digitalpaperedit')} // digitalpaperedit or slate - digitalpaperedit, runs alignement before exporting, slate, is just the raw data.
 81 |       showTimecodes={boolean('timecodes', true)}
 82 |       showSpeakers={boolean('speakers', true)}
 83 |     />
 84 |   );
 85 | };
 86 | 
 87 | export const NoSpeakers = () => {
 88 |   return (
 89 |     <SlateTranscriptEditor
 90 |       showTitle={boolean('showTitle', false)}
 91 |       mediaUrl={text('mediaUrl', DEMO_MEDIA_URL_SOLEIO)}
 92 |       title={text('title', DEMO_TITLE_SOLEIO)}
 93 |       transcriptData={DEMO_SOLEIO}
 94 |       handleSaveEditor={action('handleSaveEditor')}
 95 |       handleAutoSaveChanges={action('handleAutoSaveChanges')}
 96 |       autoSaveContentType={select('autoSaveContentType', ['digitalpaperedit', 'slate'], 'digitalpaperedit')} // digitalpaperedit or slate - digitalpaperedit, runs alignement before exporting, slate, is just the raw data
 97 |       showTimecodes={boolean('timecodes', true)}
 98 |       showSpeakers={boolean('speakers', false)}
 99 |     />
100 |   );
101 | };
102 | 
103 | export const NoTimecodes = () => {
104 |   return (
105 |     <SlateTranscriptEditor
106 |       mediaUrl={text('mediaUrl', DEMO_MEDIA_URL_SOLEIO)}
107 |       title={text('title', DEMO_TITLE_SOLEIO)}
108 |       transcriptData={DEMO_SOLEIO}
109 |       handleSaveEditor={action('handleSaveEditor')}
110 |       handleAutoSaveChanges={action('handleAutoSaveChanges')}
111 |       autoSaveContentType={select('autoSaveContentType', ['digitalpaperedit', 'slate'], 'digitalpaperedit')} // digitalpaperedit or slate - digitalpaperedit, runs alignement before exporting, slate, is just the raw data.
112 |       showTimecodes={boolean('timecodes', false)}
113 |       showSpeakers={boolean('speakers', true)}
114 |     />
115 |   );
116 | };
117 | 
118 | export const NoSpeakersAndTimecodes = () => {
119 |   return (
120 |     <SlateTranscriptEditor
121 |       mediaUrl={text('mediaUrl', DEMO_MEDIA_URL_SOLEIO)}
122 |       title={DEMO_TITLE_SOLEIO}
123 |       transcriptData={DEMO_SOLEIO}
124 |       handleSaveEditor={action('handleSaveEditor')}
125 |       handleAutoSaveChanges={action('handleAutoSaveChanges')}
126 |       autoSaveContentType={select('autoSaveContentType', ['digitalpaperedit', 'slate'], 'digitalpaperedit')} // digitalpaperedit or slate - digitalpaperedit, runs alignement before exporting, slate, is just the raw data.
127 |       showTimecodes={boolean('timecodes', false)}
128 |       showSpeakers={boolean('speakers', false)}
129 |     />
130 |   );
131 | };
132 | export const ReadOnly = () => {
133 |   return (
134 |     <SlateTranscriptEditor
135 |       mediaUrl={text('mediaUrl', DEMO_MEDIA_URL_SOLEIO)}
136 |       title={DEMO_TITLE_SOLEIO}
137 |       transcriptData={DEMO_SOLEIO}
138 |       handleSaveEditor={action('handleSaveEditor')}
139 |       handleAutoSaveChanges={action('handleAutoSaveChanges')}
140 |       autoSaveContentType={select('autoSaveContentType', ['digitalpaperedit', 'slate'], 'digitalpaperedit')} // digitalpaperedit or slate - digitalpaperedit, runs alignement before exporting, slate, is just the raw data.
141 |       isEditable={false}
142 |     />
143 |   );
144 | };
145 | 
146 | export const Audio = () => {
147 |   return (
148 |     <SlateTranscriptEditor
149 |       mediaUrl={text('mediaUrl', AUDIO_URL)}
150 |       transcriptData={DEMO_SOLEIO}
151 |       handleSaveEditor={action('handleSaveEditor')}
152 |       handleAutoSaveChanges={action('handleAutoSaveChanges')}
153 |       autoSaveContentType={select('autoSaveContentType', ['digitalpaperedit', 'slate'], 'digitalpaperedit')} // digitalpaperedit or slate - digitalpaperedit, runs alignement before exporting, slate, is just the raw data.
154 |       isEditable={true}
155 |       mediaType={select('mediaType', ['audio', 'video'], 'audio')}
156 |     />
157 |   );
158 | };
159 | 
160 | export const optionalAnalytics = () => {
161 |   return (
162 |     <>
163 |       <p>
164 |         Slate Transcript Editor version: <code>{version}</code>
165 |       </p>
166 |       <SlateTranscriptEditor
167 |         mediaUrl={text('mediaUrl', DEMO_MEDIA_URL_SOLEIO)}
168 |         handleSaveEditor={action('handleSaveEditor')}
169 |         // handleAutoSaveChanges={action('handleAutoSaveChanges')}
170 |         // https://www.npmjs.com/package/@storybook/addon-knobs#select
171 |         // autoSaveContentType={select('autoSaveContentType', ['digitalpaperedit', 'slate'], 'digitalpaperedit')} // digitalpaperedit or slate - digitalpaperedit, runs alignement before exporting, slate, is just the raw data.
172 |         // transcriptData={object('transcriptData', DEMO_SOLEIO)}
173 |         transcriptData={DEMO_SOLEIO}
174 |         handleAnalyticsEvents={action('handleAnalyticsEvents')}
175 |       />
176 |     </>
177 |   );
178 | };
179 | 
180 | export const optionaChildComponents = () => {
181 |   return (
182 |     <>
183 |       <p>
184 |         Slate Transcript Editor version: <code>{version}</code>
185 |       </p>
186 |       <SlateTranscriptEditor
187 |         mediaUrl={text('mediaUrl', DEMO_MEDIA_URL_SOLEIO)}
188 |         handleSaveEditor={action('handleSaveEditor')}
189 |         // handleAutoSaveChanges={action('handleAutoSaveChanges')}
190 |         // https://www.npmjs.com/package/@storybook/addon-knobs#select
191 |         // autoSaveContentType={select('autoSaveContentType', ['digitalpaperedit', 'slate'], 'digitalpaperedit')} // digitalpaperedit or slate - digitalpaperedit, runs alignement before exporting, slate, is just the raw data.
192 |         // transcriptData={object('transcriptData', DEMO_SOLEIO)}
193 |         transcriptData={DEMO_SOLEIO}
194 |         handleAnalyticsEvents={action('handleAnalyticsEvents')}
195 |         optionalBtns={
196 |           <>
197 |             <Button
198 |               title="optional button"
199 |               color="primary"
200 |               onClick={() => {
201 |                 alert('optional componet added from outside STE');
202 |               }}
203 |             >
204 |               B
205 |             </Button>
206 |             <Button
207 |               title="optional button"
208 |               color="primary"
209 |               onClick={() => {
210 |                 alert('and yes you can add more then one optional componet added from outside STE');
211 |               }}
212 |             >
213 |               O
214 |             </Button>
215 |           </>
216 |         }
217 |       >
218 |         <h1>Optional child component</h1>
219 |       </SlateTranscriptEditor>
220 |     </>
221 |   );
222 | };
223 | 


--------------------------------------------------------------------------------
/src/util/export-adapters/subtitles-generator/sample/test.sample.txt:
--------------------------------------------------------------------------------
  1 | There is a day.
  2 | 
  3 | About ten years ago when I asked a
  4 | 
  5 | friend to hold a baby dinosaur
  6 | robot upside down.
  7 | 
  8 | It was a toy called plea.
  9 | 
 10 | All It's a super courts are
 11 | 
 12 | showing off to my friend and I
 13 | said to hold it, but he'll see
 14 | 
 15 | what debts.
 16 | 
 17 | We were watching the theatrics of
 18 | 
 19 | this robe that struggle and cry
 20 | out and and after a few The first.
 21 | 
 22 | After my little and I said o.k.
 23 | 
 24 | That's enough.
 25 | 
 26 | Now, let's put him back down and
 27 | pepper, about to make it.
 28 | 
 29 | Stop crying and I was kind of a
 30 | weird experience for me one thing,
 31 | 
 32 | wasn't the most maternal person at
 33 | the time.
 34 | 
 35 | Although, since then I've become a
 36 | mother and nine months ago.
 37 | 
 38 | And that is a score when hold them
 39 | up to now, but my response to this
 40 | 
 41 | robot was also interesting because
 42 | I knew exactly how this machine
 43 | 
 44 | work it.
 45 | 
 46 | And yet.
 47 | 
 48 | I still felt compelled to be kind
 49 | to it.
 50 | 
 51 | And that observation sparked that
 52 | curiosity that I spent the decade
 53 | 
 54 | pursuing it.
 55 | 
 56 | Why did they comfort this robe.
 57 | 
 58 | One of the things I discovered was
 59 | my treatment of this machine was
 60 | 
 61 | more than just an awkward moment
 62 | in my living room that in a world
 63 | 
 64 | were increasingly integrating
 65 | robots into our lives and things
 66 | 
 67 | like that might actually have
 68 | consequences because the first
 69 | 
 70 | thing that I discovered is that.
 71 | 
 72 | It's not just me in two thousand
 73 | 
 74 | seven.
 75 | 
 76 | The Washington Post reported that
 77 | 
 78 | the United States military was
 79 | testing this robot diffused
 80 | 
 81 | landmines.
 82 | 
 83 | We workers were shaped like a
 84 | 
 85 | stick insect would walk around a
 86 | minefield on its legs and every
 87 | 
 88 | time he stepped on a mine.
 89 | 
 90 | One of the legs would blow up
 91 | 
 92 | would continue on the other legs
 93 | to block your minds in the colonel
 94 | 
 95 | was in charge of this testing
 96 | exercise for calling it off
 97 | 
 98 | because he says it's too inhumane
 99 | to watch this damage robot drag
100 | 
101 | itself along What would cause a
102 | hardened military officer and
103 | 
104 | someone like myself to have this
105 | response to row.
106 | 
107 | But what.
108 | 
109 | Of course for prime for science
110 | 
111 | fiction, pop culture really want
112 | to personify these things, but it
113 | 
114 | goes a little bit deeper than that
115 | it turns out that we are
116 | 
117 | biologically hard wired to project
118 | intent and life onto any movement
119 | 
120 | in a physical space.
121 | 
122 | It seems I promised us people
123 | 
124 | treat all sort of robots like
125 | their life.
126 | 
127 | These bomb disposal units get
128 | names.
129 | 
130 | They get medals of honour had
131 | funeral for them with gun salutes.
132 | 
133 | Research shows that we do this.
134 | 
135 | Even with very simple household
136 | 
137 | robots like the room.
138 | 
139 | A vacuum cleaner.
140 | 
141 | Just a desk that runs around the
142 | floor and clean it just the fact
143 | 
144 | that it's moving around on his own
145 | will cause people to name the
146 | 
147 | marimba and feel bad for the room.
148 | 
149 | But when he gets stuck under the
150 | 
151 | couch.
152 | 
153 | We can design about specifically
154 | 
155 | to invoke this response using eyes
156 | and faces were movement.
157 | 
158 | People are magically
159 | subconsciously associate with
160 | 
161 | state of mind.
162 | 
163 | There's an entire body of research
164 | 
165 | called Human robot interaction
166 | that really shows how all this
167 | 
168 | works so.
169 | 
170 | For example.
171 | 
172 | Researchers at Stamford University
173 | found out that makes people really
174 | 
175 | uncomfortable and asked them to
176 | touch her about his private parts
177 | 
178 | from this from any other studies.
179 | 
180 | We know.
181 | 
182 | We know that people respond to the
183 | cues given to them by the lifelike
184 | 
185 | machines.
186 | 
187 | Even if they know that they're not
188 | 
189 | real.
190 | 
191 | We're heading towards a world
192 | 
193 | where robots are everywhere about
194 | the technology is moving out from
195 | 
196 | behind factory was entering
197 | workplaces households and as these
198 | 
199 | machines.
200 | 
201 | They can sense and make a ton of
202 | 
203 | my decisions and learn enter into
204 | the shared spaces.
205 | 
206 | I think that maybe the best
207 | analogy.
208 | 
209 | We have for this is our
210 | relationship with animals.
211 | 
212 | Thousands of years ago, we started
213 | to domesticate animals and we
214 | 
215 | train them for work and weaponry
216 | and companionship.
217 | 
218 | Throughout history.
219 | 
220 | We've treated.
221 | 
222 | Some animals like tools are the
223 | products and other animals.
224 | 
225 | We treated with kindness and given
226 | a place in society as our
227 | 
228 | companions.
229 | 
230 | I think it's possible.
231 | 
232 | We might start to integrate
233 | Robartes, but similar weights
234 | 
235 | animals are alive.
236 | 
237 | Robert and that.
238 | 
239 | And I can tell you from working.
240 | 
241 | What about the sister were pretty
242 | 
243 | far away from developing robots.
244 | 
245 | They can feel anything there, but
246 | 
247 | we feel for And that matters
248 | because if we're trying to
249 | 
250 | integrate robots into the shared
251 | spaces need to understand that
252 | 
253 | people treat them differently than
254 | other devices that in some cases.
255 | 
256 | For example, the case of a soldier
257 | who becomes emotionally attached
258 | 
259 | to the robot.
260 | 
261 | They work.
262 | 
263 | Well, if that can be anything from
264 | inefficient to dangerous.
265 | 
266 | But in other cases.
267 | 
268 | It can actually be used for the
269 | 
270 | faster this emotional connection
271 | to, but we're really seeing some
272 | 
273 | great use cases.
274 | 
275 | For example, robots working with
276 | 
277 | autistic children to engage them
278 | in ways that we haven't seen
279 | 
280 | previously robot's working with
281 | teachers to engage kids and
282 | 
283 | learning with new and it's not
284 | just for kids early studies show
285 | 
286 | that we can help doctors and
287 | patients and health care settings
288 | 
289 | and this is the pirate b. b. c.
290 | 
291 | But it's used in nursing homes
292 | 
293 | with dementia patients has been
294 | around for a while I remember
295 | 
296 | years ago.
297 | 
298 | Being a party and telling someone
299 | 
300 | about this throwback and her
301 | response was I can't believe we're
302 | 
303 | giving people robots instead of
304 | human care.
305 | 
306 | is a really common response and I
307 | think it's absolutely correct
308 | 
309 | because that would be terrible.
310 | 
311 | And in this case.
312 | 
313 | It's not with this robot replace
314 | it with this robot replaces his
315 | 
316 | animal therapy in context which he
317 | was real animals.
318 | 
319 | We can use robots because people
320 | consistently treat them like more.
321 | 
322 | More like an animal and have it
323 | acknowledging this emotional
324 | 
325 | connection.
326 | 
327 | Robert, can also help us
328 | 
329 | anticipate challenges as these
330 | devices.
331 | 
332 | Move into more intimate areas of
333 | people's lives and for example is
334 | 
335 | it.
336 | 
337 | o.k.
338 | 
339 | If your child's teddy bear robot
340 | records private conversations.
341 | 
342 | Is it.
343 | 
344 | o.k.
345 | 
346 | If your sex robot has compelling
347 | in our purchasers because rope.
348 | 
349 | That's plus capitalism equals
350 | questions around consumer
351 | 
352 | protection and privacy and those
353 | aren't the only reason, said her
354 | 
355 | behaviour around these machines
356 | could, madam.
357 | 
358 | A few years after that first
359 | initial experience.
360 | 
361 | I had with this baby dinosaur
362 | robot do workshop with her friend
363 | 
364 | Hannah Scott.
365 | 
366 | Scott, then we took five of these
367 | 
368 | baby dinosaur about we give them.
369 | 
370 | The five teams of people.
371 | 
372 | We had the name them and play with
373 | them and them for about an hour.
374 | 
375 | Then we unveiled a him or a
376 | hatchet and we told them to
377 | 
378 | torture and kill the row and then
379 | this turned out to be a little
380 | 
381 | more dramatic than we expected it
382 | to be because none of the
383 | 
384 | participants wouldn't even so much
385 | as straight.
386 | 
387 | A robot.
388 | 
389 | So we had to improvise.
390 | 
391 | End at some point.
392 | 
393 | He said o.k.
394 | 
395 | You can save your team's robot.
396 | 
397 | If you destroy another team throw.
398 | 
399 | I And anyone that didn't work.
400 | 
401 | They couldn't do it.
402 | 
403 | So finally said, We're gonna
404 | destroy all the robots are someone
405 | 
406 | takes a hatchet to one of them.
407 | 
408 | This guy stood up and he took the
409 | 
410 | hatchet and the whole room,
411 | Winston.
412 | 
413 | See brother had to down on the
414 | robot's neck and there was this
415 | 
416 | half joking.
417 | 
418 | Is there reason to.
419 | 
420 | For example, prevent the child
421 | from kicking about Doc That just
422 | 
423 | out of respect for property
424 | because the child may be more
425 | 
426 | likely to take a real dark and
427 | again.
428 | 
429 | It's not just kids and this is the
430 | violent video games question, but
431 | 
432 | it's a completely new level
433 | because of this visceral
434 | 
435 | physicality that we respond more
436 | intensely.
437 | 
438 | Two images on a screen, we behave
439 | violently towards Robarts
440 | 
441 | specifically robots that are
442 | designed to mimic life is is that
443 | 
444 | training cruelty muscles.
445 | 
446 | The answer to this question has
447 | 
448 | the potential impact human
449 | behaviour has the potential impact
450 | 
451 | social norms.
452 | 
453 | It has the potential to inspire
454 | 
455 | rules around.
456 | 
457 | What we can and can't do certain
458 | 
459 | Robarts animal cruelty, because
460 | even if robots can't fuel our
461 | 
462 | behaviour towards a matter for us
463 | and regardless of whether we end
464 | 
465 | up changing ovals robots might be
466 | able to help us come to a new
467 | 
468 | understanding of ourselves.
469 | 
470 | Most of what learned over the past
471 | 
472 | ten years have not been about
473 | technology.
474 | 
475 | A It's been about human psychology
476 | and empathy and how we relate to
477 | 
478 | others.
479 | 
480 | And because when a child is kind
481 | 
482 | to her room.
483 | 
484 | But when a soldier tries to save a
485 | 
486 | robot on the battlefield.
487 | 
488 | When a group of people refuses to
489 | 
490 | harm her about a baby dinosaur.
491 | 
492 | Those robots aren't just motors in
493 | 
494 | years and a groom's.


--------------------------------------------------------------------------------