10 | This HTML page shows how one can embed videos
11 | made with the pdf2video tool in web pages.
12 | WebVTT subtitles are supported by the tool.
13 |
14 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 |
3 | with open("README.md", "r", encoding="utf-8") as f:
4 | long_description = f.read()
5 |
6 | setuptools.setup(
7 | name="pdf2video", # Replace with your own username
8 | version="0.2.1",
9 | author="T. Junttila",
10 | author_email="Tommi.Junttila@aalto.fi",
11 | description="A tool for making narrated videos from PDF presentations.",
12 | long_description=long_description,
13 | long_description_content_type="text/markdown",
14 | url="https://github.com/tjunttila/pdf2video",
15 | packages=setuptools.find_packages(),
16 | license = "MIT",
17 | classifiers=[
18 | "Programming Language :: Python :: 3",
19 | "License :: OSI Approved :: MIT License",
20 | "Operating System :: OS Independent",
21 | ],
22 | python_requires='>=3.6',
23 | setup_requires=['wheel']
24 | )
25 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020-2021 T. Junttila
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/sample.txt:
--------------------------------------------------------------------------------
1 | #page
2 | Welcome to a short sample presentation about the #sub#pdf-to-video#pdf2video# tool.
3 | This video is produced automatically with the tool.
4 | You can find more details in the GitHub page of the project.
5 | #20
6 |
7 | #page motivation
8 | Need to make videos of your PDF presentations?
9 | Tired in spending *hours* in recording and editing the audio tracks?
10 | #10
11 | The #sub#pdf-to-video#pdf2video# tool can help you!
12 | It is a small tool, written in the Python programming language,
13 | for making videos from PDF presentations.
14 | #8
15 | As input, it takes a PDF presentation and a textual script file.
16 | The presentation is then turned into a video,
17 | narrated by the Amazon Polly text-to-speech engine.
18 | #10
19 | In order to use the tool,
20 | you should have some common PDF and video tools installed in your computer.
21 | In addition, you should have access to Amazon Web Services.
22 | #20
23 |
24 | #page usage
25 | The use of the tool should be rather simple.
26 | One just provides the names of the inputs,
27 | the PDF file and the script file,
28 | as well as the name of the output video file.
29 | #10
30 | The tool also provides a number of options.
31 | For instance, one can select only some of the PDF pages
32 | to be included in the video.
33 | This makes it easier to split a long presentation into a set of shorter videos.
34 | #10
35 | In addition, the narration voice can be changed.
36 | For instance, this sample video is produced
37 | with the command shown in the slide.
38 | #10
39 | One can find the sample PDF and script files
40 | in the GitHub repository of the project.
41 | #30
42 |
43 | #page scripts_1
44 | The script files are rather simple text files.
45 | They should be easy to produce with *any* text editor.
46 | #10
47 | For each PDF page to be included in the video,
48 | the file contains a special header line,
49 | followed by the actual script text.
50 | #10
51 | In the text, some simple formatting commands can be used.
52 | For instance,
53 | one can make some text to be *#ph#read#red# in an emphasized style*.
54 | #10
55 | Similarly, one can make breaks of arbitrary lengths.
56 | #40
57 |
58 | #page scripts_2
59 | Subtitles are automatically generated from the script file.
60 | #10
61 | They can be customized with the #sub!hash-sub!#sub! modifier,
62 | which can be combined with the reading style modifiers.
63 | #20
64 | For instance,
65 | consider the example shown here.
66 | #30
67 | It is read as "#slow!big-#ph#Theta#Ti:t@# of n squared!"
68 | #8
69 | but the subtitles show #sub#the same in a mathematical form#Θ(n^2)#.
70 | #40
71 | That's all for this sample presentation!
72 | Please find more details in the GitHub page of the tool.
73 | #20
74 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Description
2 |
3 | `pdf2video` is a Python script that combines
4 |
5 | * (selected pages of) a [PDF](https://en.wikipedia.org/wiki/PDF) presentation, and
6 | * a text script
7 |
8 | into a video narrated by the [Amazon Polly](https://aws.amazon.com/polly/) text-to-speech engine.
9 | It can be used to generate, for instance, educational videos.
10 |
11 | Please see this [sample video](https://users.aalto.fi/tjunttil/pdf2video.mp4),
12 | produced with the tool, for a short introduction.
13 | Observe that some browsers don't show the subtitles embedded in MP4 videos,
14 | please see this [sample video with WebVTT subtitles](https://users.aalto.fi/tjunttil/pdf2video.html) in such as case.
15 |
16 | # Requirements
17 |
18 | Using `pdf2video` requires the following external tools and services:
19 |
20 | * [Python](https://www.python.org/) version 3.6 or later.
21 | * The `pdfinfo` and `pdftoppm` command line tools provided in the [poppler PDF rendering library](https://poppler.freedesktop.org/).
22 |
23 | In Ubuntu Linux, you can install these with `sudo apt get poppler-utils`.
24 |
25 | For macOs, they are available at least from [Homebrew](https://brew.sh/) with `brew install poppler`.
26 | * The `ffmpeg` command line tool from the [`FFmpeg`](https://ffmpeg.org/) framework.
27 |
28 | In Ubuntu Linux, you can install it with `sudo apt get ffmpeg`.
29 |
30 | For macOs, it is available at least from [Homebrew](https://brew.sh/) with `brew install ffmpeg`.
31 | * Access to [Amazon Web Services](https://aws.amazon.com/).
32 | * The [AWS Command Line Interface](https://aws.amazon.com/cli/) configured with a [profile](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-profiles.html) that can access the Polly service. To use the [neural voices](https://docs.aws.amazon.com/polly/latest/dg/ntts-voices-main.html) (recommended for the best quality), remember to select [a region in which they are supported](https://docs.aws.amazon.com/polly/latest/dg/NTTS-main.html).
33 |
34 | # Installation
35 |
36 | One can use `pip` to install `pdf2video` directly from GitHub:
37 | ```
38 | python3 -m pip install git+https://github.com/tjunttila/pdf2video.git
39 | ```
40 | See the [PyPA Installing Packages tutorial](https://packaging.python.org/tutorials/installing-packages/) for information on installing Python packages and on Python virtual environments.
41 |
42 | # Usage
43 |
44 | In the simplest case,
45 | ```
46 | pdf2video presentation.pdf script.txt video.mp4
47 | ```
48 | converts the PDF file `presentation.pdf` and
49 | the UTF-8 encoded script file `script.txt`
50 | into the video `video.mp4` narrated by the default voice (Amazon Polly standard voice Joanna in the current version).
51 | The video includes SRT subtitles that can be displayed by most video players.
52 | In addition, for HTML use, [WebVTT subtitles](https://www.w3schools.com/tags/tag_track.asp) are produced in a separate file as well.
53 |
54 | The selected PDF pages as well as the narration voice can be changed easily.
55 | For instance, the [sample video](https://users.aalto.fi/tjunttil/pdf2video.mp4) was produced with the command
56 | ```
57 | pdf2video sample.pdf sample.txt --pages "1,2,4-6" --voice Matthew --neural --conversational sample.mp4
58 | ```
59 | All the options can be printed with `pdf2video --help`.
60 |
61 | The script file is formatted as follows.
62 | The script for each presentation page starts with a line `#page [name]` and
63 | the following text then contains the script. The optional `[name]` parameter, that can be used in the `--only` option of the tool, is a string of ascii letters and underscores, possibly followed by a non-negative number. For instance `defs` and `example_3` are valid names.
64 |
65 | A line starting with `%` is a comment and thus ignored.
66 |
67 | In the script text, one can use the following modifiers:
68 |
69 | * `*text*` to read `text` in an emphasized style,
70 | * `@xyz@` to spell `xyz` as characters,
71 | * `#slow/text/` to read `text` in a slower rate,
72 | * `#high/text/` to use higher pitch for `text`,
73 | * `#low/text/` to use lower pitch for `text`,
74 | * `#n`, where `n` is a positive integer, to have a pause of length of `n`*100ms,
75 | * `#ph/word/pronunciation/` spell the `word` with the [X-SAMPA](https://en.wikipedia.org/wiki/X-SAMPA) `pronunciation`, and
76 | * `#sub/text/subtitle/` to use `subtitle` as the subtitle instead of the spoken `text`.
77 |
78 | Above, the `/` delimiter can be any other symbol not occurring in the "arguments" of the modifier.
79 | This allows one to nest modifiers.
80 | For instance,
81 | `#sub/big-#ph!Theta!Ti:.t@! of n/Θ(n)/`
82 | reads as "big-theta of n" but shows as `Θ(n)` in the subtitles.
83 |
84 | Please see the file [sample.txt](sample.txt) file for examples.
85 |
86 |
87 | # Some good practices and hints
88 |
89 | * Converting a script with many pages to video can take some time. For developing and debugging the script text, it is recommended to name the script pages with `#page pagename`, and then use the `--only` option of the tool to convert only the page under development.
90 | * For pronunciations, one can find [IPA](https://en.wikipedia.org/wiki/International_Phonetic_Alphabet) pronunciations in many online dictionaries, and then convert them to X-SAMPA by using the table in the [X-SAMPA Wikipedia page](https://en.wikipedia.org/wiki/X-SAMPA).
91 | * Whenever possible, avoid using the `@xyz@` construct as it seems to change the pitch of the whole sentence.
92 |
93 |
94 | # License
95 |
96 | The `pdf2video` tool is relased under the [MIT License](https://opensource.org/licenses/MIT).
97 |
--------------------------------------------------------------------------------
/pdf2video/parser.py:
--------------------------------------------------------------------------------
1 | """
2 | Parser for pdf2video script file syntax.
3 | Author: T. Junttila
4 | License: The MIT License
5 | """
6 |
7 | from abc import ABC, abstractmethod
8 | import re
9 | import sys
10 |
11 | class AST(ABC):
12 | """Base class for abstract syntax tree nodes."""
13 |
14 | @abstractmethod
15 | def to_ssml(self, neural):
16 | """Get the SSML representation of the sub-tree."""
17 |
18 | @abstractmethod
19 | def to_words(self):
20 | """Get the plain words representation of the sub-tree."""
21 |
22 | @abstractmethod
23 | def to_sub(self):
24 | """Get the sub-titles representation of the sub-tree."""
25 |
26 | class ASTWord(AST):
27 | """An AST node for a word."""
28 | def __init__(self, text):
29 | super().__init__()
30 | self.text = text
31 | def to_ssml(self, neural):
32 | return self.text
33 | def to_words(self):
34 | return [self.text]
35 | def to_sub(self):
36 | return self.text
37 |
38 | class ASTBreak(AST):
39 | """An AST node for a break."""
40 | def __init__(self, time):
41 | self.time = time
42 | def to_ssml(self, neural):
43 | return ''
44 | def to_words(self):
45 | return []
46 | def to_sub(self):
47 | return ''
48 |
49 | class ASTDelim(AST):
50 | """An AST node for a delimiter."""
51 | def __init__(self, text):
52 | self.text = text
53 | def to_ssml(self, neural):
54 | return self.text
55 | def to_words(self):
56 | return []
57 | def to_sub(self):
58 | return self.text
59 |
60 | class ASTSpace(AST):
61 | """An AST node for a white space."""
62 | def __init__(self):
63 | pass
64 | def to_ssml(self, neural):
65 | return ' '
66 | def to_words(self):
67 | return []
68 | def to_sub(self):
69 | return ' '
70 |
71 | class ASTEmph(AST):
72 | """An AST node for emphasized text."""
73 | def __init__(self, children):
74 | self.children = children
75 | def to_ssml(self, neural):
76 | children_ssml = "".join([child.to_ssml(neural) for child in self.children])
77 | if neural:
78 | return ''+children_ssml+''
79 | return ''+children_ssml+''
80 | def to_words(self):
81 | result = []
82 | for child in self.children:
83 | result += child.to_words()
84 | return result
85 | def to_sub(self):
86 | return "".join([child.to_sub() for child in self.children])
87 |
88 | class ASTPhoneme(AST):
89 | """An AST node for text read with phonemes."""
90 | def __init__(self, text, xsampa):
91 | self.text = text
92 | self.xsampa = xsampa
93 | def to_ssml(self, neural):
94 | return f'{self.text}'
95 | def to_words(self):
96 | return re.split(r'\s+', self.text.strip())
97 | def to_sub(self):
98 | return self.text
99 |
100 | class ASTSub(AST):
101 | """An AST node for text with different sub-title representation."""
102 | def __init__(self, children, subtitles):
103 | self.children = children
104 | self.subtitles = subtitles
105 | def to_ssml(self, neural):
106 | children_ssml = [child.to_ssml(neural) for child in self.children]
107 | return "".join(children_ssml)
108 | def to_words(self):
109 | result = []
110 | for child in self.children:
111 | result += child.to_words()
112 | return result
113 | def to_sub(self):
114 | return self.subtitles
115 |
116 | class ASTSlow(AST):
117 | """An AST node for text read slowly."""
118 | def __init__(self, children):
119 | self.children = children
120 | def to_ssml(self, neural):
121 | children_ssml = "".join([child.to_ssml(neural) for child in self.children])
122 | return ''+children_ssml+''
123 | def to_words(self):
124 | result = []
125 | for child in self.children:
126 | result += child.to_words()
127 | return result
128 | def to_sub(self):
129 | return "".join([child.to_sub() for child in self.children])
130 |
131 | class ASTLow(AST):
132 | """An AST node for text read in low pitch."""
133 | def __init__(self, children):
134 | self.children = children
135 | def to_ssml(self, neural):
136 | children_ssml = "".join([child.to_ssml(neural) for child in self.children])
137 | if neural:
138 | # prosody pitch not yet in neural TTS, make it slightly slower
139 | return ''+children_ssml+''
140 | return ''+children_ssml+''
141 | def to_words(self):
142 | result = []
143 | for child in self.children:
144 | result += child.to_words()
145 | return result
146 | def to_sub(self):
147 | return "".join([child.to_sub() for child in self.children])
148 |
149 | class ASTHigh(AST):
150 | """An AST node for text read in high pitch."""
151 | def __init__(self, children):
152 | self.children = children
153 | def to_ssml(self, neural):
154 | children_ssml = "".join([child.to_ssml(neural) for child in self.children])
155 | if neural:
156 | # prosody pitch not yet in neural TTS, make it slightly faster
157 | return ''+children_ssml+''
158 | return ''+children_ssml+''
159 | def to_words(self):
160 | result = []
161 | for child in self.children:
162 | result += child.to_words()
163 | return result
164 | def to_sub(self):
165 | return "".join([child.to_sub() for child in self.children])
166 |
167 | class ASTSayAs(AST):
168 | """An AST node for text read as letters."""
169 | def __init__(self, letters):
170 | self.letters = letters
171 | def to_ssml(self, neural):
172 | return ''+self.letters+''
173 | def to_words(self):
174 | return re.split(r'\s+', self.letters.strip())
175 | def to_sub(self):
176 | return self.letters
177 |
178 |
179 | def parse_to_ast(string, err_linenum = None):
180 | """Parse the script text string into a sequence of AST nodes."""
181 | i = 0
182 | string_length = len(string)
183 | def read_until(chars):
184 | nonlocal i
185 | tmp = i
186 | while i < string_length and string[i] not in chars:
187 | i += 1
188 | return string[tmp:i]
189 | def err(msg):
190 | linenum_text = '' if err_linenum is None else f'On line {err_linenum}: '
191 | print(linenum_text+msg)
192 | sys.exit(1)
193 | #assert False, msg
194 | result = []
195 | while i < string_length:
196 | if string[i] == '#':
197 | if string[i:i+4] == '#sub':
198 | match = re.match(
199 | '^#sub(.)(?P((?!\1).)*?)\\1(?P((?!\1).)+?)\\1',
200 | string[i:])
201 | if match is None:
202 | err(f'Malformed #sub "{string[i:]}"')
203 | result.append(ASTSub(parse_to_ast(match['text']), match['sub']))
204 | i += len(match.group(0))
205 | continue
206 | if string[i:i+5] == '#slow':
207 | match = re.match('^#slow(.)(?P((?!\1).)+?)\\1', string[i:])
208 | if match is None:
209 | err(f'Malformed #slow "{string[i:]}"')
210 | result.append(ASTSlow(parse_to_ast(match['text'])))
211 | i += len(match.group(0))
212 | continue
213 | if string[i:i+4] == '#low':
214 | match = re.match('^#low(.)(?P((?!\1).)+?)\\1', string[i:])
215 | if match is None:
216 | err(f'Malformed #low "{string[i:]}"')
217 | result.append(ASTLow(parse_to_ast(match['text'])))
218 | i += len(match.group(0))
219 | continue
220 | if string[i:i+5] == '#high':
221 | match = re.match('^#high(.)(?P((?!\1).)+?)\\1', string[i:])
222 | if match is None:
223 | err(f'Malformed #high "{string[i:]}"')
224 | result.append(ASTHigh(parse_to_ast(match['text'])))
225 | i += len(match.group(0))
226 | continue
227 | if string[i:i+3] == '#ph':
228 | match = re.match(
229 | '^#ph(.)(?P((?!\1).)+?)\\1(?P((?!\1).)+?)\\1',
230 | string[i:])
231 | if match is None:
232 | err(f'Malformed #ph "{string[i:]}"')
233 | result.append(ASTPhoneme(match['text'], match['ph']))
234 | i += len(match.group(0))
235 | continue
236 | # Break #10
237 | match = re.match(r'^#(?P