├── pdf2video ├── __main__.py ├── __init__.py ├── parser.py └── pdf2video.py ├── sample.pdf ├── setup.cfg ├── sample.css ├── sample.html ├── setup.py ├── LICENSE ├── sample.txt └── README.md /pdf2video/__main__.py: -------------------------------------------------------------------------------- 1 | from .pdf2video import main 2 | main() 3 | -------------------------------------------------------------------------------- /sample.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tjunttila/pdf2video/HEAD/sample.pdf -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [options.entry_points] 2 | console_scripts = 3 | pdf2video = pdf2video.pdf2video:main 4 | [build-system] 5 | requires = ["setuptools", "wheel"] 6 | [metadata] 7 | license_files = LICENSE 8 | -------------------------------------------------------------------------------- /pdf2video/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | A tool for converting PDF presentations into narrated videos. 3 | Should only be called from the command line. 4 | Please see https://github.com/tjunttila/pdf2video/ for more details. 5 | """ 6 | -------------------------------------------------------------------------------- /sample.css: -------------------------------------------------------------------------------- 1 | video { 2 | border-style: solid; 3 | border-width: 2pt; 4 | border-color: #002F6C; 5 | border-radius: 5pt; 6 | } 7 | video::cue { 8 | /*font-size: 20pt;*/ 9 | color: white; 10 | background-color: #002F6C; 11 | opacity: 0.8; 12 | } 13 | -------------------------------------------------------------------------------- /sample.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | pdf2video - an example 6 | 7 | 8 | 9 |

10 | This HTML page shows how one can embed videos 11 | made with the pdf2video tool in web pages. 12 | WebVTT subtitles are supported by the tool. 13 |

14 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r", encoding="utf-8") as f: 4 | long_description = f.read() 5 | 6 | setuptools.setup( 7 | name="pdf2video", # Replace with your own username 8 | version="0.2.1", 9 | author="T. Junttila", 10 | author_email="Tommi.Junttila@aalto.fi", 11 | description="A tool for making narrated videos from PDF presentations.", 12 | long_description=long_description, 13 | long_description_content_type="text/markdown", 14 | url="https://github.com/tjunttila/pdf2video", 15 | packages=setuptools.find_packages(), 16 | license = "MIT", 17 | classifiers=[ 18 | "Programming Language :: Python :: 3", 19 | "License :: OSI Approved :: MIT License", 20 | "Operating System :: OS Independent", 21 | ], 22 | python_requires='>=3.6', 23 | setup_requires=['wheel'] 24 | ) 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020-2021 T. Junttila 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /sample.txt: -------------------------------------------------------------------------------- 1 | #page 2 | Welcome to a short sample presentation about the #sub#pdf-to-video#pdf2video# tool. 3 | This video is produced automatically with the tool. 4 | You can find more details in the GitHub page of the project. 5 | #20 6 | 7 | #page motivation 8 | Need to make videos of your PDF presentations? 9 | Tired in spending *hours* in recording and editing the audio tracks? 10 | #10 11 | The #sub#pdf-to-video#pdf2video# tool can help you! 12 | It is a small tool, written in the Python programming language, 13 | for making videos from PDF presentations. 14 | #8 15 | As input, it takes a PDF presentation and a textual script file. 16 | The presentation is then turned into a video, 17 | narrated by the Amazon Polly text-to-speech engine. 18 | #10 19 | In order to use the tool, 20 | you should have some common PDF and video tools installed in your computer. 21 | In addition, you should have access to Amazon Web Services. 22 | #20 23 | 24 | #page usage 25 | The use of the tool should be rather simple. 26 | One just provides the names of the inputs, 27 | the PDF file and the script file, 28 | as well as the name of the output video file. 29 | #10 30 | The tool also provides a number of options. 31 | For instance, one can select only some of the PDF pages 32 | to be included in the video. 33 | This makes it easier to split a long presentation into a set of shorter videos. 34 | #10 35 | In addition, the narration voice can be changed. 36 | For instance, this sample video is produced 37 | with the command shown in the slide. 38 | #10 39 | One can find the sample PDF and script files 40 | in the GitHub repository of the project. 41 | #30 42 | 43 | #page scripts_1 44 | The script files are rather simple text files. 45 | They should be easy to produce with *any* text editor. 46 | #10 47 | For each PDF page to be included in the video, 48 | the file contains a special header line, 49 | followed by the actual script text. 50 | #10 51 | In the text, some simple formatting commands can be used. 52 | For instance, 53 | one can make some text to be *#ph#read#red# in an emphasized style*. 54 | #10 55 | Similarly, one can make breaks of arbitrary lengths. 56 | #40 57 | 58 | #page scripts_2 59 | Subtitles are automatically generated from the script file. 60 | #10 61 | They can be customized with the #sub!hash-sub!#sub! modifier, 62 | which can be combined with the reading style modifiers. 63 | #20 64 | For instance, 65 | consider the example shown here. 66 | #30 67 | It is read as "#slow!big-#ph#Theta#Ti:t@# of n squared!" 68 | #8 69 | but the subtitles show #sub#the same in a mathematical form#Θ(n^2)#. 70 | #40 71 | That's all for this sample presentation! 72 | Please find more details in the GitHub page of the tool. 73 | #20 74 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Description 2 | 3 | `pdf2video` is a Python script that combines 4 | 5 | * (selected pages of) a [PDF](https://en.wikipedia.org/wiki/PDF) presentation, and 6 | * a text script 7 | 8 | into a video narrated by the [Amazon Polly](https://aws.amazon.com/polly/) text-to-speech engine. 9 | It can be used to generate, for instance, educational videos. 10 | 11 | Please see this [sample video](https://users.aalto.fi/tjunttil/pdf2video.mp4), 12 | produced with the tool, for a short introduction. 13 | Observe that some browsers don't show the subtitles embedded in MP4 videos, 14 | please see this [sample video with WebVTT subtitles](https://users.aalto.fi/tjunttil/pdf2video.html) in such as case. 15 | 16 | # Requirements 17 | 18 | Using `pdf2video` requires the following external tools and services: 19 | 20 | * [Python](https://www.python.org/) version 3.6 or later. 21 | * The `pdfinfo` and `pdftoppm` command line tools provided in the [poppler PDF rendering library](https://poppler.freedesktop.org/). 22 | 23 | In Ubuntu Linux, you can install these with `sudo apt get poppler-utils`. 24 | 25 | For macOs, they are available at least from [Homebrew](https://brew.sh/) with `brew install poppler`. 26 | * The `ffmpeg` command line tool from the [`FFmpeg`](https://ffmpeg.org/) framework. 27 | 28 | In Ubuntu Linux, you can install it with `sudo apt get ffmpeg`. 29 | 30 | For macOs, it is available at least from [Homebrew](https://brew.sh/) with `brew install ffmpeg`. 31 | * Access to [Amazon Web Services](https://aws.amazon.com/). 32 | * The [AWS Command Line Interface](https://aws.amazon.com/cli/) configured with a [profile](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-profiles.html) that can access the Polly service. To use the [neural voices](https://docs.aws.amazon.com/polly/latest/dg/ntts-voices-main.html) (recommended for the best quality), remember to select [a region in which they are supported](https://docs.aws.amazon.com/polly/latest/dg/NTTS-main.html). 33 | 34 | # Installation 35 | 36 | One can use `pip` to install `pdf2video` directly from GitHub: 37 | ``` 38 | python3 -m pip install git+https://github.com/tjunttila/pdf2video.git 39 | ``` 40 | See the [PyPA Installing Packages tutorial](https://packaging.python.org/tutorials/installing-packages/) for information on installing Python packages and on Python virtual environments. 41 | 42 | # Usage 43 | 44 | In the simplest case, 45 | ``` 46 | pdf2video presentation.pdf script.txt video.mp4 47 | ``` 48 | converts the PDF file `presentation.pdf` and 49 | the UTF-8 encoded script file `script.txt` 50 | into the video `video.mp4` narrated by the default voice (Amazon Polly standard voice Joanna in the current version). 51 | The video includes SRT subtitles that can be displayed by most video players. 52 | In addition, for HTML use, [WebVTT subtitles](https://www.w3schools.com/tags/tag_track.asp) are produced in a separate file as well. 53 | 54 | The selected PDF pages as well as the narration voice can be changed easily. 55 | For instance, the [sample video](https://users.aalto.fi/tjunttil/pdf2video.mp4) was produced with the command 56 | ``` 57 | pdf2video sample.pdf sample.txt --pages "1,2,4-6" --voice Matthew --neural --conversational sample.mp4 58 | ``` 59 | All the options can be printed with `pdf2video --help`. 60 | 61 | The script file is formatted as follows. 62 | The script for each presentation page starts with a line `#page [name]` and 63 | the following text then contains the script. The optional `[name]` parameter, that can be used in the `--only` option of the tool, is a string of ascii letters and underscores, possibly followed by a non-negative number. For instance `defs` and `example_3` are valid names. 64 | 65 | A line starting with `%` is a comment and thus ignored. 66 | 67 | In the script text, one can use the following modifiers: 68 | 69 | * `*text*` to read `text` in an emphasized style, 70 | * `@xyz@` to spell `xyz` as characters, 71 | * `#slow/text/` to read `text` in a slower rate, 72 | * `#high/text/` to use higher pitch for `text`, 73 | * `#low/text/` to use lower pitch for `text`, 74 | * `#n`, where `n` is a positive integer, to have a pause of length of `n`*100ms, 75 | * `#ph/word/pronunciation/` spell the `word` with the [X-SAMPA](https://en.wikipedia.org/wiki/X-SAMPA) `pronunciation`, and 76 | * `#sub/text/subtitle/` to use `subtitle` as the subtitle instead of the spoken `text`. 77 | 78 | Above, the `/` delimiter can be any other symbol not occurring in the "arguments" of the modifier. 79 | This allows one to nest modifiers. 80 | For instance, 81 | `#sub/big-#ph!Theta!Ti:.t@! of n/Θ(n)/` 82 | reads as "big-theta of n" but shows as `Θ(n)` in the subtitles. 83 | 84 | Please see the file [sample.txt](sample.txt) file for examples. 85 | 86 | 87 | # Some good practices and hints 88 | 89 | * Converting a script with many pages to video can take some time. For developing and debugging the script text, it is recommended to name the script pages with `#page pagename`, and then use the `--only` option of the tool to convert only the page under development. 90 | * For pronunciations, one can find [IPA](https://en.wikipedia.org/wiki/International_Phonetic_Alphabet) pronunciations in many online dictionaries, and then convert them to X-SAMPA by using the table in the [X-SAMPA Wikipedia page](https://en.wikipedia.org/wiki/X-SAMPA). 91 | * Whenever possible, avoid using the `@xyz@` construct as it seems to change the pitch of the whole sentence. 92 | 93 | 94 | # License 95 | 96 | The `pdf2video` tool is relased under the [MIT License](https://opensource.org/licenses/MIT). 97 | -------------------------------------------------------------------------------- /pdf2video/parser.py: -------------------------------------------------------------------------------- 1 | """ 2 | Parser for pdf2video script file syntax. 3 | Author: T. Junttila 4 | License: The MIT License 5 | """ 6 | 7 | from abc import ABC, abstractmethod 8 | import re 9 | import sys 10 | 11 | class AST(ABC): 12 | """Base class for abstract syntax tree nodes.""" 13 | 14 | @abstractmethod 15 | def to_ssml(self, neural): 16 | """Get the SSML representation of the sub-tree.""" 17 | 18 | @abstractmethod 19 | def to_words(self): 20 | """Get the plain words representation of the sub-tree.""" 21 | 22 | @abstractmethod 23 | def to_sub(self): 24 | """Get the sub-titles representation of the sub-tree.""" 25 | 26 | class ASTWord(AST): 27 | """An AST node for a word.""" 28 | def __init__(self, text): 29 | super().__init__() 30 | self.text = text 31 | def to_ssml(self, neural): 32 | return self.text 33 | def to_words(self): 34 | return [self.text] 35 | def to_sub(self): 36 | return self.text 37 | 38 | class ASTBreak(AST): 39 | """An AST node for a break.""" 40 | def __init__(self, time): 41 | self.time = time 42 | def to_ssml(self, neural): 43 | return '' 44 | def to_words(self): 45 | return [] 46 | def to_sub(self): 47 | return '' 48 | 49 | class ASTDelim(AST): 50 | """An AST node for a delimiter.""" 51 | def __init__(self, text): 52 | self.text = text 53 | def to_ssml(self, neural): 54 | return self.text 55 | def to_words(self): 56 | return [] 57 | def to_sub(self): 58 | return self.text 59 | 60 | class ASTSpace(AST): 61 | """An AST node for a white space.""" 62 | def __init__(self): 63 | pass 64 | def to_ssml(self, neural): 65 | return ' ' 66 | def to_words(self): 67 | return [] 68 | def to_sub(self): 69 | return ' ' 70 | 71 | class ASTEmph(AST): 72 | """An AST node for emphasized text.""" 73 | def __init__(self, children): 74 | self.children = children 75 | def to_ssml(self, neural): 76 | children_ssml = "".join([child.to_ssml(neural) for child in self.children]) 77 | if neural: 78 | return ''+children_ssml+'' 79 | return ''+children_ssml+'' 80 | def to_words(self): 81 | result = [] 82 | for child in self.children: 83 | result += child.to_words() 84 | return result 85 | def to_sub(self): 86 | return "".join([child.to_sub() for child in self.children]) 87 | 88 | class ASTPhoneme(AST): 89 | """An AST node for text read with phonemes.""" 90 | def __init__(self, text, xsampa): 91 | self.text = text 92 | self.xsampa = xsampa 93 | def to_ssml(self, neural): 94 | return f'{self.text}' 95 | def to_words(self): 96 | return re.split(r'\s+', self.text.strip()) 97 | def to_sub(self): 98 | return self.text 99 | 100 | class ASTSub(AST): 101 | """An AST node for text with different sub-title representation.""" 102 | def __init__(self, children, subtitles): 103 | self.children = children 104 | self.subtitles = subtitles 105 | def to_ssml(self, neural): 106 | children_ssml = [child.to_ssml(neural) for child in self.children] 107 | return "".join(children_ssml) 108 | def to_words(self): 109 | result = [] 110 | for child in self.children: 111 | result += child.to_words() 112 | return result 113 | def to_sub(self): 114 | return self.subtitles 115 | 116 | class ASTSlow(AST): 117 | """An AST node for text read slowly.""" 118 | def __init__(self, children): 119 | self.children = children 120 | def to_ssml(self, neural): 121 | children_ssml = "".join([child.to_ssml(neural) for child in self.children]) 122 | return ''+children_ssml+'' 123 | def to_words(self): 124 | result = [] 125 | for child in self.children: 126 | result += child.to_words() 127 | return result 128 | def to_sub(self): 129 | return "".join([child.to_sub() for child in self.children]) 130 | 131 | class ASTLow(AST): 132 | """An AST node for text read in low pitch.""" 133 | def __init__(self, children): 134 | self.children = children 135 | def to_ssml(self, neural): 136 | children_ssml = "".join([child.to_ssml(neural) for child in self.children]) 137 | if neural: 138 | # prosody pitch not yet in neural TTS, make it slightly slower 139 | return ''+children_ssml+'' 140 | return ''+children_ssml+'' 141 | def to_words(self): 142 | result = [] 143 | for child in self.children: 144 | result += child.to_words() 145 | return result 146 | def to_sub(self): 147 | return "".join([child.to_sub() for child in self.children]) 148 | 149 | class ASTHigh(AST): 150 | """An AST node for text read in high pitch.""" 151 | def __init__(self, children): 152 | self.children = children 153 | def to_ssml(self, neural): 154 | children_ssml = "".join([child.to_ssml(neural) for child in self.children]) 155 | if neural: 156 | # prosody pitch not yet in neural TTS, make it slightly faster 157 | return ''+children_ssml+'' 158 | return ''+children_ssml+'' 159 | def to_words(self): 160 | result = [] 161 | for child in self.children: 162 | result += child.to_words() 163 | return result 164 | def to_sub(self): 165 | return "".join([child.to_sub() for child in self.children]) 166 | 167 | class ASTSayAs(AST): 168 | """An AST node for text read as letters.""" 169 | def __init__(self, letters): 170 | self.letters = letters 171 | def to_ssml(self, neural): 172 | return ''+self.letters+'' 173 | def to_words(self): 174 | return re.split(r'\s+', self.letters.strip()) 175 | def to_sub(self): 176 | return self.letters 177 | 178 | 179 | def parse_to_ast(string, err_linenum = None): 180 | """Parse the script text string into a sequence of AST nodes.""" 181 | i = 0 182 | string_length = len(string) 183 | def read_until(chars): 184 | nonlocal i 185 | tmp = i 186 | while i < string_length and string[i] not in chars: 187 | i += 1 188 | return string[tmp:i] 189 | def err(msg): 190 | linenum_text = '' if err_linenum is None else f'On line {err_linenum}: ' 191 | print(linenum_text+msg) 192 | sys.exit(1) 193 | #assert False, msg 194 | result = [] 195 | while i < string_length: 196 | if string[i] == '#': 197 | if string[i:i+4] == '#sub': 198 | match = re.match( 199 | '^#sub(.)(?P((?!\1).)*?)\\1(?P((?!\1).)+?)\\1', 200 | string[i:]) 201 | if match is None: 202 | err(f'Malformed #sub "{string[i:]}"') 203 | result.append(ASTSub(parse_to_ast(match['text']), match['sub'])) 204 | i += len(match.group(0)) 205 | continue 206 | if string[i:i+5] == '#slow': 207 | match = re.match('^#slow(.)(?P((?!\1).)+?)\\1', string[i:]) 208 | if match is None: 209 | err(f'Malformed #slow "{string[i:]}"') 210 | result.append(ASTSlow(parse_to_ast(match['text']))) 211 | i += len(match.group(0)) 212 | continue 213 | if string[i:i+4] == '#low': 214 | match = re.match('^#low(.)(?P((?!\1).)+?)\\1', string[i:]) 215 | if match is None: 216 | err(f'Malformed #low "{string[i:]}"') 217 | result.append(ASTLow(parse_to_ast(match['text']))) 218 | i += len(match.group(0)) 219 | continue 220 | if string[i:i+5] == '#high': 221 | match = re.match('^#high(.)(?P((?!\1).)+?)\\1', string[i:]) 222 | if match is None: 223 | err(f'Malformed #high "{string[i:]}"') 224 | result.append(ASTHigh(parse_to_ast(match['text']))) 225 | i += len(match.group(0)) 226 | continue 227 | if string[i:i+3] == '#ph': 228 | match = re.match( 229 | '^#ph(.)(?P((?!\1).)+?)\\1(?P((?!\1).)+?)\\1', 230 | string[i:]) 231 | if match is None: 232 | err(f'Malformed #ph "{string[i:]}"') 233 | result.append(ASTPhoneme(match['text'], match['ph'])) 234 | i += len(match.group(0)) 235 | continue 236 | # Break #10 237 | match = re.match(r'^#(?P