├── pyshaka
├── __init__.py
├── logs
│ └── placeholder
├── text
│ ├── TextEngine.py
│ ├── Mp4TtmlParser.py
│ ├── VttTextParser.py
│ ├── Cue.py
│ ├── Mp4VttParser.py
│ └── TtmlTextParser.py
├── test
│ └── assets
│ │ ├── vtt-init.mp4
│ │ └── vtt-segment.mp4
├── util
│ ├── Functional.py
│ ├── exceptions.py
│ ├── TextParser.py
│ ├── DataViewReader.py
│ ├── Mp4BoxParsers.py
│ └── Mp4Parser.py
├── log.py
└── main.py
├── output.png
├── test
├── new.zip
├── id_wvtt.zip
├── ttml_test.zip
├── assets
│ ├── vtt-init.mp4
│ ├── vtt-segment.mp4
│ └── sintel-audio-init.mp4
├── ismttml_text_TTML_pol.zip
└── dashvtt_subtitle_WVTT_zh-TW.zip
├── sample
└── sample.mp4
├── images
├── Snipaste_2021-09-05_20-04-25.png
├── Snipaste_2021-09-05_20-30-08.png
├── Snipaste_2021-09-06_23-44-22.png
├── Snipaste_2021-09-06_23-46-00.png
├── Snipaste_2021-09-06_23-49-58.png
├── Snipaste_2021-09-06_23-50-20.png
└── Snipaste_2022-02-19_11-36-50.png
├── .gitignore
├── package.json
├── parsertest.js
├── .github
└── workflows
│ └── dev_release.yml
├── README.md
├── 移植shaka-player字幕解析部分为本地程序.md
└── parser.js
/pyshaka/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/pyshaka/logs/placeholder:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/pyshaka/text/TextEngine.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xhlove/dash-subtitle-extractor/HEAD/output.png
--------------------------------------------------------------------------------
/test/new.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xhlove/dash-subtitle-extractor/HEAD/test/new.zip
--------------------------------------------------------------------------------
/test/id_wvtt.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xhlove/dash-subtitle-extractor/HEAD/test/id_wvtt.zip
--------------------------------------------------------------------------------
/sample/sample.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xhlove/dash-subtitle-extractor/HEAD/sample/sample.mp4
--------------------------------------------------------------------------------
/test/ttml_test.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xhlove/dash-subtitle-extractor/HEAD/test/ttml_test.zip
--------------------------------------------------------------------------------
/test/assets/vtt-init.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xhlove/dash-subtitle-extractor/HEAD/test/assets/vtt-init.mp4
--------------------------------------------------------------------------------
/test/assets/vtt-segment.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xhlove/dash-subtitle-extractor/HEAD/test/assets/vtt-segment.mp4
--------------------------------------------------------------------------------
/test/ismttml_text_TTML_pol.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xhlove/dash-subtitle-extractor/HEAD/test/ismttml_text_TTML_pol.zip
--------------------------------------------------------------------------------
/pyshaka/test/assets/vtt-init.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xhlove/dash-subtitle-extractor/HEAD/pyshaka/test/assets/vtt-init.mp4
--------------------------------------------------------------------------------
/test/assets/sintel-audio-init.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xhlove/dash-subtitle-extractor/HEAD/test/assets/sintel-audio-init.mp4
--------------------------------------------------------------------------------
/pyshaka/test/assets/vtt-segment.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xhlove/dash-subtitle-extractor/HEAD/pyshaka/test/assets/vtt-segment.mp4
--------------------------------------------------------------------------------
/test/dashvtt_subtitle_WVTT_zh-TW.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xhlove/dash-subtitle-extractor/HEAD/test/dashvtt_subtitle_WVTT_zh-TW.zip
--------------------------------------------------------------------------------
/images/Snipaste_2021-09-05_20-04-25.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xhlove/dash-subtitle-extractor/HEAD/images/Snipaste_2021-09-05_20-04-25.png
--------------------------------------------------------------------------------
/images/Snipaste_2021-09-05_20-30-08.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xhlove/dash-subtitle-extractor/HEAD/images/Snipaste_2021-09-05_20-30-08.png
--------------------------------------------------------------------------------
/images/Snipaste_2021-09-06_23-44-22.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xhlove/dash-subtitle-extractor/HEAD/images/Snipaste_2021-09-06_23-44-22.png
--------------------------------------------------------------------------------
/images/Snipaste_2021-09-06_23-46-00.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xhlove/dash-subtitle-extractor/HEAD/images/Snipaste_2021-09-06_23-46-00.png
--------------------------------------------------------------------------------
/images/Snipaste_2021-09-06_23-49-58.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xhlove/dash-subtitle-extractor/HEAD/images/Snipaste_2021-09-06_23-49-58.png
--------------------------------------------------------------------------------
/images/Snipaste_2021-09-06_23-50-20.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xhlove/dash-subtitle-extractor/HEAD/images/Snipaste_2021-09-06_23-50-20.png
--------------------------------------------------------------------------------
/images/Snipaste_2022-02-19_11-36-50.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xhlove/dash-subtitle-extractor/HEAD/images/Snipaste_2022-02-19_11-36-50.png
--------------------------------------------------------------------------------
/pyshaka/util/Functional.py:
--------------------------------------------------------------------------------
1 | class Functional:
2 |
3 | @staticmethod
4 | def isNotNull(value) -> bool:
5 | return value is not None
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode
2 | shaka/**/*.js
3 | node_modules
4 | __pycache__
5 | dist
6 | build
7 | aaavb
8 | *.mp4
9 | *.dash
10 | *.exe
11 | *.log
12 | *.vtt
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "dash-subtitle-extractor",
3 | "version": "1.0.0",
4 | "license": "ISC",
5 | "bin": "./parser.js",
6 | "devDependencies": {
7 | "args-parser": "^1.3.0",
8 | "google-closure-compiler": "^20210808.0.0",
9 | "google-closure-library": "^20210808.0.0",
10 | "pkg": "^5.3.1",
11 | "xmldom": "^0.6.0"
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/parsertest.js:
--------------------------------------------------------------------------------
1 | goog.module('parser');
2 |
3 | require("google-closure-library");
4 | goog.require("shaka.text.Mp4VttParser");
5 | const fs = require("fs");
6 |
7 | try {
8 | const vttInitSegment = new Uint8Array(fs.readFileSync("test/assets/vtt-init.mp4"));
9 | const vttSegment = new Uint8Array(fs.readFileSync("test/assets/vtt-segment.mp4"));
10 | console.log("文件加载完成");
11 | const parser = new shaka.text.Mp4VttParser();
12 | console.log("Mp4VttParser初始化析完成");
13 | parser.parseInit(vttInitSegment);
14 | console.log("vttInitSegment解析完成");
15 | const time = {periodStart: 0, segmentStart: 0, segmentEnd: 0};
16 | const result = parser.parseMedia(vttSegment, time);
17 | console.log("vttSegment解析完成");
18 | for (let i = 0; i < result.length; i++){
19 | console.log(result[i]);
20 | }
21 | } catch (err) {
22 | console.trace(err);
23 | }
--------------------------------------------------------------------------------
/pyshaka/log.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import logging
3 | import datetime
4 | from pathlib import Path
5 |
6 |
7 | def setup_logger(name: str, write_to_file: bool = False) -> logging.Logger:
8 | formatter = logging.Formatter('%(asctime)s %(name)s %(filename)s %(lineno)s : %(levelname)s %(message)s')
9 | log_time = datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S")
10 | if getattr(sys, 'frozen', False):
11 | log_folder_path = Path(sys.executable).parent / 'logs'
12 | else:
13 | log_folder_path = Path(__file__).parent.parent / 'logs'
14 | if log_folder_path.exists() is False:
15 | log_folder_path.mkdir()
16 |
17 | ch = logging.StreamHandler()
18 | ch.setLevel(logging.DEBUG)
19 | ch.setFormatter(formatter)
20 | lt = logging.getLogger(f'{name}')
21 | lt.setLevel(logging.DEBUG)
22 | lt.addHandler(ch)
23 | if write_to_file:
24 | log_file_path = log_folder_path / f'{name}-{log_time}.log'
25 | fh = logging.FileHandler(log_file_path.resolve().as_posix(), encoding='utf-8')
26 | fh.setLevel(logging.DEBUG)
27 | fh.setFormatter(formatter)
28 | lt.addHandler(fh)
29 | lt.info(f'log file -> {log_file_path}')
30 | return lt
31 |
32 |
33 | log = setup_logger('pyshaka')
--------------------------------------------------------------------------------
/pyshaka/util/exceptions.py:
--------------------------------------------------------------------------------
1 | class Error(Exception):
2 | '''Base class for shaka errors.'''
3 |
4 |
5 | class SeverityError(Error):
6 | '''Severity Error.'''
7 |
8 |
9 | class CategoryError(Error):
10 | '''Category Error.'''
11 |
12 |
13 | class InvalidMp4VTT(Error):
14 | '''Code INVALID_MP4_VTT Error.'''
15 | def __init__(self, reason: str):
16 | self.reason = reason
17 |
18 | def __str__(self):
19 | return self.reason
20 |
21 |
22 | class InvalidMp4TTML(Error):
23 | '''Code INVALID_MP4_TTML Error.'''
24 | def __init__(self, reason: str):
25 | self.reason = reason
26 |
27 | def __str__(self):
28 | return self.reason
29 |
30 |
31 | class InvalidXML(Error):
32 | '''Code INVALID_XML Error.'''
33 | def __init__(self, reason: str):
34 | self.reason = reason
35 |
36 | def __str__(self):
37 | return self.reason
38 |
39 |
40 | class InvalidTextCue(Error):
41 | '''Code INVALID_TEXT_CUE Error.'''
42 | def __init__(self, reason: str):
43 | self.reason = reason
44 |
45 | def __str__(self):
46 | return self.reason
47 |
48 |
49 | class OutOfBoundsError(Error):
50 | '''Code BUFFER_READ_OUT_OF_BOUNDS Error.'''
51 |
52 |
53 | class IntOverflowError(Error):
54 | '''Code JS_INTEGER_OVERFLOW Error.'''
--------------------------------------------------------------------------------
/pyshaka/util/TextParser.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 |
4 | class TimeContext:
5 | def __init__(self, **kwargs):
6 | self.periodStart = kwargs['periodStart'] # tpye: float
7 | self.segmentStart = kwargs['segmentStart'] # tpye: float
8 | self.segmentEnd = kwargs['segmentEnd'] # tpye: float
9 |
10 |
11 | class TextParser:
12 |
13 | def __init__(self, data: str):
14 | self.data_ = data
15 | self.position_ = 0
16 |
17 | def atEnd(self):
18 | return self.position_ == len(self.data_)
19 |
20 | def readLine(self):
21 | # assert 1 == 0, 'not implemented yet'
22 | return self.readRegexReturnCapture_('(.*?)(\n|$)', 1)
23 |
24 | def readWord(self):
25 | # assert 1 == 0, 'not implemented yet'
26 | return self.readRegexReturnCapture_('[^ \t\n]*', 0)
27 |
28 | def readRegexReturnCapture_(self, regex: str, index: int):
29 | if self.atEnd():
30 | return None
31 | ret = self.readRegex(regex)
32 | if not ret:
33 | return None
34 | else:
35 | return ret[index]
36 |
37 | def readRegex(self, regex: str):
38 | index = self.indexOf_(regex)
39 | if self.atEnd() or index is None or index.position != self.position_:
40 | return None
41 |
42 | self.position_ += index.length
43 | return index.results
44 |
45 | def indexOf_(self, regex: str):
46 | # assert 1 == 0, 'not implemented yet'
47 | results = re.search(regex, self.data_[self.position_:])
48 | if not results:
49 | return
50 | else:
51 | return IndexOf(results)
52 |
53 |
54 | class IndexOf:
55 | def __init__(self, results: re.Match):
56 | self.position = results.regs[0][0]
57 | self.length = len(results[0])
58 | self.results = results
--------------------------------------------------------------------------------
/.github/workflows/dev_release.yml:
--------------------------------------------------------------------------------
1 | # This is a basic workflow to help you get started with Actions
2 |
3 | name: build_dev_pkg
4 |
5 | # Controls when the workflow will run
6 | on:
7 | # Triggers the workflow on push or pull request events but only for the master branch
8 | # push:
9 | # branches: [ master ]
10 | # pull_request:
11 | # branches: [ master ]
12 |
13 | # Allows you to run this workflow manually from the Actions tab
14 | workflow_dispatch:
15 |
16 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel
17 | jobs:
18 | build:
19 | runs-on: windows-latest
20 | steps:
21 | - name: Get current time
22 | uses: josStorer/get-current-time@v2
23 | id: ct
24 | with:
25 | format: YYYYMMDD-HH
26 | utcOffset: "+08:00"
27 | - uses: actions/checkout@v2
28 | - name: build with pyinstaller
29 | uses: actions/setup-python@v2
30 | with:
31 | python-version: 3.7.5
32 | - name: upgrade pip
33 | run: python -m pip install --upgrade pip
34 | - name: install pyinstaller
35 | run: pip install pyinstaller
36 | - name: run pyinstaller command
37 | env:
38 | BUILD_TIME: "${{ steps.ct.outputs.year }}${{ steps.ct.outputs.month }}${{ steps.ct.outputs.day }}_${{ steps.ct.outputs.hour }}${{ steps.ct.outputs.minute }}${{ steps.ct.outputs.second }}"
39 | run: pyinstaller -n pyshaka_dev_${{ env.BUILD_TIME }} -F pyshaka\main.py
40 | - name: Upload Artifact
41 | env:
42 | BUILD_TIME: "${{ steps.ct.outputs.year }}${{ steps.ct.outputs.month }}${{ steps.ct.outputs.day }}_${{ steps.ct.outputs.hour }}${{ steps.ct.outputs.minute }}${{ steps.ct.outputs.second }}"
43 | uses: actions/upload-artifact@v1.0.0
44 | with:
45 | name: pyshaka_dev_${{ env.BUILD_TIME }}
46 | path: dist\pyshaka_dev_${{ env.BUILD_TIME }}.exe
--------------------------------------------------------------------------------
/pyshaka/text/Mp4TtmlParser.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | from pyshaka.text.Cue import Cue
4 | from pyshaka.text.TtmlTextParser import TtmlTextParser
5 | from pyshaka.util.Mp4Parser import Mp4Parser, ParsedBox
6 | from pyshaka.util.exceptions import InvalidMp4TTML
7 | from pyshaka.util.TextParser import TimeContext
8 |
9 |
10 | class Mp4TtmlParser:
11 |
12 | def __init__(self):
13 | self.parser_ = TtmlTextParser()
14 |
15 | def set_timescale(self, timescale: int):
16 | pass
17 |
18 | def parseInit(self, data: memoryview):
19 | '''
20 | 这个函数不调用也没什么问题
21 | '''
22 | def stpp_callback(box: ParsedBox):
23 | nonlocal sawSTPP
24 | sawSTPP = True
25 | box.parser.stop()
26 |
27 | sawSTPP = False
28 | # 初始化解析器
29 | mp4parser = Mp4Parser()
30 | # 给要准备解析的box添加对应的解析函数 后面回调
31 | mp4parser = mp4parser.box('moov', Mp4Parser.children)
32 | mp4parser = mp4parser.box('trak', Mp4Parser.children)
33 | mp4parser = mp4parser.box('mdia', Mp4Parser.children)
34 | mp4parser = mp4parser.box('minf', Mp4Parser.children)
35 | mp4parser = mp4parser.box('stbl', Mp4Parser.children)
36 | mp4parser = mp4parser.fullBox('stsd', Mp4Parser.sampleDescription)
37 | mp4parser = mp4parser.box('stpp', stpp_callback)
38 | # 解析数据
39 | mp4parser = mp4parser.parse(data)
40 |
41 | if not sawSTPP:
42 | raise InvalidMp4TTML(f'is sawSTPP? {sawSTPP}')
43 |
44 | def parseMedia(self, data: memoryview, time: TimeContext, dont_raise: bool = True) -> List[Cue]:
45 |
46 | def mdat_callback(data: bytes):
47 | nonlocal payload
48 | nonlocal sawMDAT
49 | sawMDAT = True
50 | payload.extend(self.parser_.parseMedia(data, time))
51 |
52 | sawMDAT = False
53 | payload = []
54 |
55 | mp4parser = Mp4Parser()
56 | mp4parser = mp4parser.box('mdat', Mp4Parser.allData(mdat_callback))
57 | mp4parser = mp4parser.parse(data, partialOkay=False)
58 |
59 | if not sawMDAT:
60 | if dont_raise:
61 | return payload
62 | else:
63 | raise InvalidMp4TTML(f'is sawMDAT? {sawMDAT}')
64 | return payload
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # dash subtitle extractor
2 |
3 | Ttranslated from shaka-player project by xhlove.
4 |
5 | - output sample
6 |
7 | 
8 |
9 | # python移植版
10 |
11 | 单文件需要先使用[mp4split.exe](https://github.com/nilaoda/Mp4SubtitleParser/blob/main/bin/mp4split.exe)分割
12 |
13 | ## usage
14 |
15 | ```bash
16 | pip install argparse
17 | ```
18 |
19 | ```bash
20 | usage: python -m pyshaka.main [OPTION]...
21 |
22 | A tool that to parse subtitle embedded in DASH stream
23 |
24 | optional arguments:
25 | -h, --help show this help message and exit
26 | -debug, --debug debug is needed
27 | -type TYPE, --type TYPE
28 | subtitle codec, only support wvtt and ttml now
29 | -timescale TIMESCALE, --timescale TIMESCALE
30 | set timescale manually if no init segment
31 | -init-path INIT_PATH, --init-path INIT_PATH
32 | init segment path
33 | -segments-path SEGMENTS_PATH, --segments-path SEGMENTS_PATH
34 | segments folder path
35 | -segment-time SEGMENT_TIME, --segment-time SEGMENT_TIME
36 | single segment duration, usually needed for ttml content, calculation method: d / timescale
37 | ```
38 |
39 | e.g.
40 |
41 | ```bash
42 | python -m pyshaka.main --init-path "test/dashvtt_subtitle_WVTT_zh-TW/init.mp4" --segments-path "test/dashvtt_subtitle_WVTT_zh-TW" --type wvtt
43 | python -m pyshaka.main --segments-path "test/ismttml_text_TTML_pol" --segment-time 60 --type ttml
44 | python -m pyshaka.main --segments-path "test/new" --type ttml
45 | ```
46 |
47 | python移植版本只是完成了部分工作,早期是通过移植原版到node执行,如果你有兴趣知道如何移植为node本地执行,请点击下面按钮展开
48 |
49 |
50 | node本地移植版
51 |
52 | # node本地移植版
53 |
54 | 参考[移植shaka-player字幕解析部分为本地程序](移植shaka-player字幕解析部分为本地程序.md)
55 |
56 | 目前完成了demo,适配ing
57 |
58 | 如果自行修改了`parser.js`,那么记得重新编译下,编译前的配置参考上面的移植文档
59 |
60 | ```bash
61 | npx google-closure-compiler --js parser.js --js shaka/**/*.js --js=node_modules/xmldom/**/*.js --js=node_modules/google-closure-library/**/*.js --js=!**/goog/asserts/asserts.js --dependency_mode=PRUNE --entry_point=goog:parser --js_output_file=parser_compiled.js
62 | ```
63 |
64 | 使用命令
65 |
66 | ```bash
67 | node parser_compiled.js --init-segment=test/dashvtt_subtitle_WVTT_zh-TW/init.mp4 --segments-path=test/dashvtt_subtitle_WVTT_zh-TW --type=wvtt
68 |
69 | node parser_compiled.js --init-segment=test/ttml_test/000.mp4 --segments-path=test/ttml_test --type=ttml
70 |
71 | node parser_compiled.js --segments-path=test/ismttml_text_TTML_pol --type=ttml
72 | ```
73 |
74 | - **路径参数请不要使用反斜杠**
75 | - **参数后面必须跟=**
76 | - --init-segment 是init文件的路径 对于TTML该选项不是必要的
77 | - --segments-path 是分段文件所在的路径
78 | - --type 指定字幕类型 wvtt ttml 二选一
79 | - --debug 可以输出一些debug信息
80 |
81 | 如果要用测试命令记得解压`dashvtt_subtitle_WVTT_zh-TW.zip`和`ttml_test.zip`
82 |
--------------------------------------------------------------------------------
/移植shaka-player字幕解析部分为本地程序.md:
--------------------------------------------------------------------------------
1 | # 前言
2 |
3 | 目前[shaka-packager](https://github.com/google/shaka-packager)并不支持`dash mp4内嵌字幕`转回文本字幕
4 |
5 | 之前从[shaka-player](https://github.com/google/shaka-player)翻译了部分代码,实现了dash mp4内嵌vtt的解析,不过不太完善
6 |
7 | 于是决定直接从shaka-player里面把解析部分单独拿出来
8 |
9 | - https://github.com/google/shaka-player
10 |
11 | # 步骤
12 |
13 | 首先下载shaka-player的源代码,github直接下载zip就可以了
14 |
15 | 然后将`lib`文件夹解压,重命名为`shaka`
16 |
17 | 通过简单阅读源代码,可以发现代码中大量使用`goog.provide`和`goog.require`
18 |
19 | 我们需要知道的是该项目使用了`google-closure-library`这个库
20 |
21 | 以及使用`google-closure-compiler`进行编译,即将整个项目编译为单个js文件
22 |
23 | 接下来新建一个`parsertest.js`文件,在这个文件里面,我们通过类似的写法调用解析的方法即可
24 |
25 | 这部分可以参考[mp4_vtt_parser_unit.js](https://github.com/google/shaka-player/blob/master/test/text/mp4_vtt_parser_unit.js)
26 |
27 | 更多官方案例(PS 大多数时候看官方的测试用例可以学到很多东西)
28 |
29 | - https://github.com/google/shaka-player/blob/master/test/text
30 |
31 | 这里简化了下,最终代码如下
32 |
33 | ```javascript
34 | goog.module('parser');
35 |
36 | require("google-closure-library");
37 | goog.require("shaka.text.Mp4VttParser");
38 | const fs = require("fs");
39 |
40 | try {
41 | const vttInitSegment = new Uint8Array(fs.readFileSync("test/assets/vtt-init.mp4"));
42 | const vttSegment = new Uint8Array(fs.readFileSync("test/assets/vtt-segment.mp4"));
43 | console.log("文件加载完成");
44 | const parser = new shaka.text.Mp4VttParser();
45 | console.log("Mp4VttParser初始化析完成");
46 | parser.parseInit(vttInitSegment);
47 | console.log("vttInitSegment解析完成");
48 | const time = {periodStart: 0, segmentStart: 0, segmentEnd: 0};
49 | const result = parser.parseMedia(vttSegment, time);
50 | console.log("vttSegment解析完成");
51 | for (let i = 0; i < result.length; i++){
52 | console.log(result[i]);
53 | }
54 | } catch (err) {
55 | console.trace(err);
56 | }
57 | ```
58 |
59 | 这里读取了本地文件,将`shaka-player`用的测试文件复制过来即可
60 |
61 | 这里直接弄好了,路径是`test/assets/vtt-init.mp4`和`test/assets/vtt-segment.mp4`
62 |
63 | 现在代码是没办法直接运行的,因为node不认识`goog.xxx`
64 |
65 | 所以要使用`google-closure-compiler`将全部js编译成一个js
66 |
67 | 第一步执行`npm install`命令
68 |
69 | 第二步将`shaka/debug/log.js`里面的`window.console && window.console.log.bind`改为`true`
70 |
71 | 因为node没有`window`
72 |
73 | 第三步在`shaka/util/xml_utils.js`的`class`之前,也就是`shaka.util.XmlUtils = class`前面加一句
74 |
75 | - `var { Node, DOMParser, Element } = require('xmldom');`
76 |
77 | 
78 |
79 | 第四步在`node_modules/xmldom/lib/dom-parser.js`末尾导出的地方加三句
80 |
81 | 
82 |
83 | ```javascript
84 | exports.Node = require('./dom').Node;
85 | exports.Element = require('./dom').Element;
86 | exports.Document = require('./dom').Document;
87 | ```
88 |
89 | 第五步在`node_modules/xmldom/lib/dom.js`末尾导出的地方加两句
90 |
91 | 
92 |
93 | ```javascript
94 | exports.Element = Element;
95 | exports.Document = Document;
96 | ```
97 |
98 | 第六步在`shaka/text/ttml_text_parser.js`开头加两句
99 |
100 | 
101 |
102 | ```javascript
103 | var { Document } = require('xmldom');
104 | var doc = new Document();
105 | ```
106 |
107 | 然后把`const span = document.createElement('span');`的`document`改为`doc`
108 |
109 | 
110 |
111 | 这样才能正常解析ttml
112 |
113 | 现在可以执行下面的命令编译为一个单独的js了
114 |
115 | 有兴趣可以参考`closure-library`官方文档,看看`hello world`的整体过程
116 |
117 | - `https://google.github.io/closure-library/develop/get-started`
118 |
119 | 命令如下
120 |
121 | ```bash
122 | npx google-closure-compiler --js parsertest.js --js shaka/**/*.js --js=node_modules/xmldom/**/*.js --js=node_modules/google-closure-library/**/*.js --js=!**/goog/asserts/asserts.js --dependency_mode=PRUNE --entry_point=goog:parser --js_output_file=parsertest_compiled.js
123 | ```
124 |
125 | 具体可以通过`npx google-closure-compiler --help`查看详细说明
126 |
127 | 简单解释一下
128 |
129 | - `--js` 后面接要一起编译的js文件
130 | - 可以是单个文件
131 | - 可以是`xx/*.js`或者`xx/**/*.js`这样的通配符表示
132 | - 最前面加一个`!`表示排除
133 | - `--dependency_mode` 表示编译的模式
134 | - `PRUNE` 表示根据`--entry_point`设定的文件,然后只导入需要的依赖,这很重要,不然会把很多用不到的也编译了,然后出现很多找不到`window`的报错(被坑了很久)
135 | - `--entry_point` 入口点,姑且理解为主程序入口吧
136 | - `--js_output_file` 最终输出文件
137 |
138 | 注意命令中的`=`不是必须的
139 |
140 | 现在可以执行`node parsertest_compiled.js`命令测试了
141 |
142 | 
143 |
144 | 解析正常~
--------------------------------------------------------------------------------
/pyshaka/util/DataViewReader.py:
--------------------------------------------------------------------------------
1 | import struct
2 | from enum import Enum
3 |
4 | from pyshaka.util.exceptions import OutOfBoundsError
5 | from pyshaka.util.exceptions import IntOverflowError
6 |
7 |
8 | class Endianness(Enum):
9 | BIG_ENDIAN = 0
10 | LITTLE_ENDIAN = 1
11 |
12 |
13 | class DataView:
14 | '''
15 | shaka/util/buffer_utils.js
16 | '''
17 | def __init__(self, data: bytes):
18 | self.buffer = memoryview(bytearray(data))
19 | # self.buffer = memoryview(bytearray([0x96, 0x87, 0xac]))
20 | self.byteLength = len(self.buffer) # type: int
21 |
22 | def getUint8(self):
23 | pass
24 |
25 | def getUint16(self):
26 | pass
27 |
28 | def getUint32(self, position: int, littleEndian: bool = False):
29 | # 这里记得切片长度要补齐4位 不然unpack会报错
30 | buf = self.buffer[position:position + 4].tobytes()
31 | if len(buf) < 4:
32 | buf = b'\x00' * (4 - len(buf)) + buf
33 | if littleEndian:
34 | return struct.unpack("I", buf)[0]
37 |
38 | def getUint64(self, position: int, littleEndian: bool = False):
39 | # 这里记得切片长度要补齐4位 不然
40 | buf = self.buffer[position:position + 4].tobytes()
41 | if len(buf) < 4:
42 | buf = b'\x00' * (4 - len(buf)) + buf
43 | if littleEndian:
44 | return struct.unpack("I", buf)[0]
47 |
48 | def getInt8(self):
49 | pass
50 |
51 | def getInt16(self):
52 | pass
53 |
54 | def getInt32(self, position: int, littleEndian: bool = False):
55 | buf = self.buffer[position:position + 4].tobytes()
56 | if len(buf) < 4:
57 | buf = b'\x00' * (4 - len(buf)) + buf
58 | if littleEndian:
59 | return struct.unpack("i", buf)[0]
62 |
63 | def getInt64(self):
64 | pass
65 |
66 | def readUint8(self):
67 | pass
68 |
69 | def readUint16(self):
70 | pass
71 |
72 | def readUint32(self):
73 | pass
74 |
75 | def readInt8(self):
76 | pass
77 |
78 | def readInt16(self):
79 | pass
80 |
81 | def readInt32(self):
82 | pass
83 |
84 | def readInt64(self):
85 | pass
86 |
87 | @staticmethod
88 | def toUint8(data: 'DataView', offset: int = 0, length: int = None):
89 | # 由于python中float('inf')表示无穷大 但不能作为索引
90 | # 所以这里直接将最大长度视为byteLength
91 | if length is None:
92 | length = data.byteLength
93 | return data.buffer[offset:offset + length].tobytes()
94 |
95 |
96 | class DataViewReader(DataView):
97 | '''
98 | shaka/util/data_view_reader.js
99 | '''
100 |
101 | def __init__(self, data: bytes, endianness: Endianness):
102 | self.dataView_ = DataView(data) # type: DataView
103 | self.littleEndian_ = endianness == Endianness.LITTLE_ENDIAN # type: bool
104 | self.position_ = 0 # type: int
105 |
106 | def getDataView(self) -> DataView:
107 | return self.dataView_
108 |
109 | def hasMoreData(self) -> bool:
110 | return self.position_ < self.dataView_.byteLength
111 |
112 | def getPosition(self) -> int:
113 | return self.position_
114 |
115 | def getLength(self) -> int:
116 | return self.dataView_.byteLength
117 |
118 | def readUint8(self):
119 | pass
120 |
121 | def readUint16(self):
122 | pass
123 |
124 | def readUint32(self) -> int:
125 | value = self.dataView_.getUint32(self.position_, self.littleEndian_)
126 | self.position_ += 4
127 | return value
128 |
129 | def readInt32(self):
130 | value = self.dataView_.getInt32(self.position_, self.littleEndian_)
131 | self.position_ += 4
132 | return value
133 |
134 | def readUint64(self) -> int:
135 | if self.littleEndian_:
136 | low = self.dataView_.getUint32(self.position_, True)
137 | high = self.dataView_.getUint32(self.position_ + 4, True)
138 | else:
139 | high = self.dataView_.getUint32(self.position_, False)
140 | low = self.dataView_.getUint32(self.position_ + 4, False)
141 |
142 | if high > 0x1FFFFF:
143 | raise IntOverflowError
144 |
145 | self.position_ += 8
146 | return (high * (2 ** 32)) + low
147 |
148 | def readBytes(self, length: int):
149 | assert length >= 0, 'Bad call to DataViewReader.readBytes'
150 | if self.position_ + length > self.dataView_.byteLength:
151 | raise OutOfBoundsError
152 | data = DataView.toUint8(self.dataView_, self.position_, length)
153 | self.position_ += length
154 | return data
155 |
156 | def skip(self, length: int):
157 | assert length >= 0, 'Bad call to DataViewReader.skip'
158 | if self.position_ + length > self.dataView_.byteLength:
159 | raise OutOfBoundsError
160 | self.position_ += length
161 |
162 | def rewind(self, length: int):
163 | pass
164 |
165 | def seek(self, position: int):
166 | pass
167 |
168 | def readTerminatedString(self):
169 | pass
170 |
171 | def outOfBounds_(self):
172 | pass
--------------------------------------------------------------------------------
/pyshaka/text/VttTextParser.py:
--------------------------------------------------------------------------------
1 | import re
2 | from typing import Dict, List, Union
3 | from xml.dom.minidom import parseString, Node, Element, Text
4 | from xml.sax.saxutils import escape
5 | from pyshaka.text.Cue import Cue, defaultTextColor, fontStyle, fontWeight, textDecoration
6 | from pyshaka.log import log
7 |
8 |
9 | class VttTextParser:
10 |
11 | def __init__(self):
12 | pass
13 |
14 | def parseInit(self, data: bytes):
15 | assert False, 'VTT does not have init segments'
16 |
17 | def parseMedia(self, data: bytes, time: int):
18 | pass
19 |
20 | @staticmethod
21 | def parseCueStyles(payload: str, rootCue: Cue, styles: Dict[str, Cue]):
22 | if len(styles) == 0:
23 | VttTextParser.addDefaultTextColor_(styles)
24 | payload = VttTextParser.replaceColorPayload_(payload)
25 | xmlPayload = '' + escape(payload) + ''
26 | elements = parseString(xmlPayload).getElementsByTagName('span') # type: List[Element]
27 | if len(elements) > 0 and elements[0]:
28 | element = elements[0]
29 | cues = [] # type: List[Cue]
30 | childNodes = element.childNodes # type: List[Element]
31 | if len(childNodes) == 1:
32 | childNode = childNodes[0]
33 | if childNode.nodeType == Node.TEXT_NODE or childNode.nodeType == Node.CDATA_SECTION_NODE:
34 | rootCue.payload = payload
35 | return
36 | for childNode in childNodes:
37 | if childNode.nodeValue and childNode.nodeValue.startswith('i>'):
38 | continue
39 | VttTextParser.generateCueFromElement_(childNode, rootCue, cues, styles)
40 | rootCue.nestedCues = cues
41 | else:
42 | log.warning(f'The cue\'s markup could not be parsed: {payload}')
43 | rootCue.payload = payload
44 |
45 | @staticmethod
46 | def generateCueFromElement_(element: Union[Element, Text], rootCue: Cue, cues: List[Cue], styles: Dict[str, Cue]):
47 | nestedCue = rootCue.clone()
48 | if element.nodeType == Node.ELEMENT_NODE and element.nodeName:
49 | bold = fontWeight.BOLD
50 | italic = fontStyle.ITALIC
51 | underline = textDecoration.UNDERLINE
52 | tags = re.split('[ .]+', element.nodeName)
53 | for tag in tags:
54 | if styles.get(tag):
55 | VttTextParser.mergeStyle_(nestedCue, styles.get(tag))
56 | if tag == 'b':
57 | nestedCue.fontWeight = bold
58 | elif tag == 'i':
59 | nestedCue.fontStyle = italic
60 | elif tag == 'u':
61 | nestedCue.textDecoration.append(underline)
62 | isTextNode = element.nodeType == Node.TEXT_NODE or element.nodeType == Node.CDATA_SECTION_NODE
63 | if isTextNode:
64 | # element 这里是 Text 类型 js的textContent对应这里的data
65 | textArr = element.data.split('\n')
66 | isFirst = True
67 | for text in textArr:
68 | if not isFirst:
69 | lineBreakCue = rootCue.clone()
70 | lineBreakCue.lineBreak = True
71 | cues.append(lineBreakCue)
72 | if len(text) > 0:
73 | textCue = nestedCue.clone()
74 | textCue.payload = text
75 | cues.append(textCue)
76 | isFirst = False
77 | else:
78 | for childNode in element.childNodes:
79 | VttTextParser.generateCueFromElement_(childNode, nestedCue, cues, styles)
80 |
81 | @staticmethod
82 | def replaceColorPayload_(payload: str):
83 | '''
84 | 这里没有找到相关样本测试 可能有bug
85 | '''
86 | names = []
87 | nameStart = -1
88 | newPayload = ''
89 | for i in range(len(payload)):
90 | if payload[i] == '/':
91 | try:
92 | end = payload.index('>', i)
93 | except ValueError:
94 | end = -1
95 | if end <= i:
96 | return payload
97 | tagEnd = payload[i + 1:end]
98 | tagStart = names.pop(-1)
99 | if not tagEnd or not tagStart:
100 | return payload
101 | elif tagStart == tagEnd:
102 | newPayload += '/' + tagEnd + '>'
103 | i += len(tagEnd) + 1
104 | else:
105 | if not tagStart.startsWith('c.') or tagEnd != 'c':
106 | return payload
107 | newPayload += '/' + tagStart + '>'
108 | i += len(tagEnd) + 1
109 | else:
110 | if payload[i] == '<':
111 | nameStart = i + 1
112 | elif payload[i] == '>':
113 | if nameStart > 0:
114 | names.append(payload[nameStart:i])
115 | nameStart = -1
116 | newPayload += payload[i]
117 | return newPayload
118 |
119 | @staticmethod
120 | def addDefaultTextColor_(styles: Dict[str, Cue]):
121 | for key, value in defaultTextColor.__members__.items():
122 | cue = Cue(0, 0, '')
123 | cue.color = value
124 | styles[key] = cue
--------------------------------------------------------------------------------
/pyshaka/util/Mp4BoxParsers.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 | from pyshaka.util.DataViewReader import DataViewReader
3 |
4 |
5 | class ParsedTFHDBox:
6 |
7 | def __init__(self, **kwargs):
8 | self.trackId = kwargs['trackId'] # type: int
9 | self.defaultSampleDuration = kwargs['defaultSampleDuration'] # type: int
10 | self.defaultSampleSize = kwargs['defaultSampleSize'] # type: int
11 |
12 |
13 | class ParsedTFDTBox:
14 |
15 | def __init__(self, **kwargs):
16 | self.baseMediaDecodeTime = kwargs['baseMediaDecodeTime'] # type: int
17 |
18 |
19 | class ParsedMDHDBox:
20 |
21 | def __init__(self, **kwargs):
22 | self.timescale = kwargs['timescale'] # type: int
23 |
24 |
25 | class ParsedTREXBox:
26 |
27 | def __init__(self, **kwargs):
28 | self.defaultSampleDuration = kwargs['defaultSampleDuration'] # type: int
29 | self.defaultSampleSize = kwargs['defaultSampleSize'] # type: int
30 |
31 |
32 | class ParsedTRUNBox:
33 |
34 | def __init__(self, **kwargs):
35 | self.sampleCount = kwargs['sampleCount'] # type: int
36 | self.sampleData = kwargs['sampleData'] # type: List[ParsedTRUNSample]
37 |
38 |
39 | class ParsedTRUNSample:
40 |
41 | def __init__(self, **kwargs):
42 | self.sampleDuration = kwargs['sampleDuration'] # type: int
43 | self.sampleSize = kwargs['sampleSize'] # type: int
44 | self.sampleCompositionTimeOffset = kwargs['sampleCompositionTimeOffset'] # type: int
45 |
46 |
47 | class ParsedTKHDBox:
48 |
49 | def __init__(self, **kwargs):
50 | self.trackId = kwargs['trackId'] # type: int
51 |
52 |
53 | class Mp4BoxParsers:
54 |
55 | @staticmethod
56 | def parseTFHD(reader: DataViewReader, flags: int) -> ParsedTFHDBox:
57 | defaultSampleDuration = None
58 | defaultSampleSize = None
59 |
60 | # Read "track_ID"
61 | trackId = reader.readUint32()
62 |
63 | # Skip "base_data_offset" if present.
64 | if flags & 0x000001:
65 | reader.skip(8)
66 |
67 | # Skip "sample_description_index" if present.
68 | if flags & 0x000002:
69 | reader.skip(4)
70 |
71 | # Read "default_sample_duration" if present.
72 | if flags & 0x000008:
73 | defaultSampleDuration = reader.readUint32()
74 |
75 | # Read "default_sample_size" if present.
76 | if flags & 0x000010:
77 | defaultSampleSize = reader.readUint32()
78 |
79 | return ParsedTFHDBox(**{
80 | 'trackId': trackId,
81 | 'defaultSampleDuration': defaultSampleDuration,
82 | 'defaultSampleSize': defaultSampleSize,
83 | })
84 |
85 | @staticmethod
86 | def parseTFDT(reader: DataViewReader, version: int) -> ParsedTFDTBox:
87 | if version == 1:
88 | baseMediaDecodeTime = reader.readUint64()
89 | else:
90 | baseMediaDecodeTime = reader.readUint32()
91 | return ParsedTFDTBox(**{'baseMediaDecodeTime': baseMediaDecodeTime})
92 |
93 | @staticmethod
94 | def parseMDHD(reader: DataViewReader, version: int) -> ParsedMDHDBox:
95 | if version == 1:
96 | # Skip "creation_time"
97 | reader.skip(8)
98 | # Skip "modification_time"
99 | reader.skip(8)
100 | else:
101 | # Skip "creation_time"
102 | reader.skip(4)
103 | # Skip "modification_time"
104 | reader.skip(4)
105 | timescale = reader.readUint32()
106 | return ParsedMDHDBox(**{'timescale': timescale})
107 |
108 | @staticmethod
109 | def parseTREX(reader: DataViewReader) -> ParsedTREXBox:
110 | pass
111 |
112 | @staticmethod
113 | def parseTRUN(reader: DataViewReader, version: int, flags: int) -> ParsedTRUNBox:
114 | sampleCount = reader.readUint32()
115 | sampleData = []
116 |
117 | # Skip "data_offset" if present.
118 | if flags & 0x000001:
119 | reader.skip(4)
120 |
121 | # Skip "first_sample_flags" if present.
122 | if flags & 0x000004:
123 | reader.skip(4)
124 |
125 | for _ in range(sampleCount):
126 | sample = ParsedTRUNSample(**{
127 | 'sampleDuration': None,
128 | 'sampleSize': None,
129 | 'sampleCompositionTimeOffset': None,
130 | })
131 |
132 | # Read "sample duration" if present.
133 | if flags & 0x000100:
134 | sample.sampleDuration = reader.readUint32()
135 |
136 | # Read "sample_size" if present.
137 | if flags & 0x000200:
138 | sample.sampleSize = reader.readUint32()
139 |
140 | # Skip "sample_flags" if present.
141 | if flags & 0x000400:
142 | reader.skip(4)
143 |
144 | # Read "sample_time_offset" if present.
145 | if flags & 0x000800:
146 | if version == 0:
147 | sample.sampleCompositionTimeOffset = reader.readUint32()
148 | else:
149 | sample.sampleCompositionTimeOffset = reader.readInt32()
150 | sampleData.append(sample)
151 |
152 | return ParsedTRUNBox(**{'sampleCount': sampleCount, 'sampleData': sampleData})
153 |
154 | @staticmethod
155 | def parseTKHD(reader: DataViewReader, version: int) -> ParsedTKHDBox:
156 | pass
--------------------------------------------------------------------------------
/pyshaka/text/Cue.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 |
3 |
4 | class positionAlign(Enum):
5 | LEFT = 'line-left'
6 | RIGHT = 'line-right'
7 | CENTER = 'center'
8 | AUTO = 'auto'
9 |
10 |
11 | class textAlign(Enum):
12 | LEFT = 'left'
13 | RIGHT = 'right'
14 | CENTER = 'center'
15 | START = 'start'
16 | END = 'end'
17 |
18 |
19 | class displayAlign(Enum):
20 | BEFORE = 'before'
21 | CENTER = 'center'
22 | AFTER = 'after'
23 |
24 |
25 | class direction(Enum):
26 | HORIZONTAL_LEFT_TO_RIGHT = 'ltr'
27 | HORIZONTAL_RIGHT_TO_LEFT = 'rtl'
28 |
29 |
30 | class writingMode(Enum):
31 | HORIZONTAL_TOP_TO_BOTTOM = 'horizontal-tb'
32 | VERTICAL_LEFT_TO_RIGHT = 'vertical-lr'
33 | VERTICAL_RIGHT_TO_LEFT = 'vertical-rl'
34 |
35 |
36 | class lineInterpretation(Enum):
37 | LINE_NUMBER = 0
38 | PERCENTAGE = 1
39 |
40 |
41 | class lineAlign(Enum):
42 | CENTER = 'center'
43 | START = 'start'
44 | END = 'end'
45 |
46 |
47 | class defaultTextColor(Enum):
48 | white = '#FFF'
49 | lime = '#0F0'
50 | cyan = '#0FF'
51 | red = '#F00'
52 | yellow = '#FF0'
53 | magenta = '#F0F'
54 | blue = '#00F'
55 | black = '#000'
56 |
57 |
58 | class defaultTextBackgroundColor(Enum):
59 | bg_white = '#FFF'
60 | bg_lime = '#0F0'
61 | bg_cyan = '#0FF'
62 | bg_red = '#F00'
63 | bg_yellow = '#FF0'
64 | bg_magenta = '#F0F'
65 | bg_blue = '#00F'
66 | bg_black = '#000'
67 |
68 |
69 | class fontWeight(Enum):
70 | NORMAL = 400
71 | BOLD = 700
72 |
73 |
74 | class fontStyle(Enum):
75 | NORMAL = 'normal'
76 | ITALIC = 'italic'
77 | OBLIQUE = 'oblique'
78 |
79 |
80 | class textDecoration(Enum):
81 | UNDERLINE = 'underline'
82 | LINE_THROUGH = 'lineThrough'
83 | OVERLINE = 'overline'
84 |
85 |
86 | class Cue:
87 |
88 | def __init__(self, startTime: float, endTime: float, payload: str, _settings: str = ''):
89 | self.startTime = startTime
90 | self.direction = direction.HORIZONTAL_LEFT_TO_RIGHT
91 | self.endTime = endTime
92 | self.payload = payload
93 | self.region = CueRegion()
94 | self.position = None
95 | self.positionAlign = positionAlign.AUTO
96 | self.size = 0
97 | self.textAlign = textAlign.CENTER
98 | self.writingMode = writingMode.HORIZONTAL_TOP_TO_BOTTOM
99 | self.lineInterpretation = lineInterpretation.LINE_NUMBER
100 | self.line = None
101 | self.lineHeight = ''
102 | self.lineAlign = lineAlign.START
103 | self.displayAlign = displayAlign.AFTER
104 | self.color = ''
105 | self.backgroundColor = ''
106 | self.backgroundImage = ''
107 | self.border = ''
108 | self.fontSize = ''
109 | self.fontWeight = fontWeight.NORMAL
110 | self.fontStyle = fontStyle.NORMAL
111 | self.fontFamily = ''
112 | self.letterSpacing = ''
113 | self.linePadding = ''
114 | self.opacity = 1
115 | self.textDecoration = []
116 | self.wrapLine = True
117 | self.id = ''
118 | self.nestedCues = []
119 | self.lineBreak = False
120 | self.spacer = False
121 | self.cellResolution = {'columns': 32, 'rows': 15}
122 | self._settings = _settings
123 |
124 | @staticmethod
125 | def lineBreak(start: float, end: float) -> 'Cue':
126 | cue = Cue(start, end, '')
127 | cue.lineBreak = True
128 | return cue
129 |
130 | def clone(self):
131 | cue = Cue(0, 0, '')
132 | for k, v in self.__dict__.items():
133 | if isinstance(v, list):
134 | v = v.copy()
135 | cue.__setattr__(k, v)
136 | return cue
137 |
138 | @staticmethod
139 | def equal(cue1: 'Cue', cue2: 'Cue') -> bool:
140 | if cue1.startTime != cue2.startTime or cue1.endTime != cue2.endTime or cue1.payload != cue2.payload:
141 | return False
142 | for k, v in cue1.__dict__.items():
143 | if k == 'startTime' or k == 'endTime' or k == 'payload':
144 | pass
145 | elif k == 'nestedCues':
146 | if not Cue.equal(cue1.nestedCues, cue2.nestedCues):
147 | return False
148 | elif k == 'region' or k == 'cellResolution':
149 | for k2 in cue1.__getattribute__(k):
150 | if cue1.__getattribute__(k)[k2] != cue2.__getattribute__(k)[k2]:
151 | return False
152 | elif isinstance(cue1.__getattribute__(k), list):
153 | if cue1.__getattribute__(k) != cue2.__getattribute__(k):
154 | return False
155 | else:
156 | if cue1.__getattribute__(k) != cue1.__getattribute__(k):
157 | return False
158 | return True
159 |
160 |
161 | class units(Enum):
162 | PX = 0
163 | PERCENTAGE = 1
164 | LINES = 2
165 |
166 |
167 | class scrollMode(Enum):
168 | NONE = ''
169 | UP = 'up'
170 |
171 |
172 | class CueRegion:
173 |
174 | def __init__(self, **kwargs):
175 | self.id = ''
176 | self.viewportAnchorX = 0
177 | self.viewportAnchorY = 0
178 | self.regionAnchorX = 0
179 | self.regionAnchorY = 0
180 | self.width = 100
181 | self.height = 100
182 | self.heightUnits = units.PERCENTAGE
183 | self.widthUnits = units.PERCENTAGE
184 | self.viewportAnchorUnits = units.PERCENTAGE
185 | self.scroll = scrollMode.NONE
--------------------------------------------------------------------------------
/pyshaka/util/Mp4Parser.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, Callable
2 | from enum import Enum
3 |
4 | # from pyshaka.log import log
5 | from pyshaka.util.DataViewReader import DataViewReader, Endianness
6 |
7 |
8 | class ParsedBox:
9 | '''
10 | js原本是在shaka.extern.ParsedBox中
11 | 但是python中做分离会引起循环导入的问题
12 | 加上ParsedBox定义是在externs/shaka/mp4_parser.js
13 | 于是这里就把ParsedBox放到这里了
14 | '''
15 | def __init__(self, **kwargs):
16 | self.parser = kwargs['parser'] # type: Mp4Parser
17 | self.partialOkay = kwargs['partialOkay'] # type: bool
18 | self.start = kwargs['start'] # type: int
19 | self.size = kwargs['size'] # type: int
20 | self.version = kwargs['version'] # type: int
21 | self.flags = kwargs['flags'] # type: int
22 | self.reader = kwargs['reader'] # type: DataViewReader
23 | self.has64BitSize = kwargs['has64BitSize'] # type: bool
24 |
25 |
26 | class Mp4Parser:
27 |
28 | class BoxType_(Enum):
29 | BASIC_BOX = 0
30 | FULL_BOX = 1
31 |
32 | def __init__(self):
33 | self.headers_ = {} # type: Dict[int, Mp4Parser.BoxType_]
34 | self.boxDefinitions_ = {} # type: Dict[int, Callable]
35 | self.done_ = False # type: bool
36 |
37 | def box(self, _type: str, definition: Callable) -> 'Mp4Parser':
38 | typeCode = Mp4Parser.typeFromString_(_type)
39 | self.headers_[typeCode] = Mp4Parser.BoxType_.BASIC_BOX
40 | self.boxDefinitions_[typeCode] = definition
41 | return self
42 |
43 | def fullBox(self, _type: str, definition: Callable) -> 'Mp4Parser':
44 | typeCode = Mp4Parser.typeFromString_(_type)
45 | self.headers_[typeCode] = Mp4Parser.BoxType_.FULL_BOX
46 | self.boxDefinitions_[typeCode] = definition
47 | return self
48 |
49 | def stop(self):
50 | self.done_ = True
51 |
52 | def parse(self, data, partialOkay: bool = False, stopOnPartial: bool = False):
53 | reader = DataViewReader(data, Endianness.BIG_ENDIAN)
54 | self.done_ = False
55 | while reader.hasMoreData() and not self.done_:
56 | self.parseNext(0, reader, partialOkay, stopOnPartial)
57 |
58 | def parseNext(self, absStart: int, reader: DataViewReader, partialOkay: bool, stopOnPartial: bool = False):
59 | start = reader.getPosition()
60 |
61 | # size(4 bytes) + type(4 bytes) = 8 bytes
62 | if stopOnPartial and start + 8 > reader.getLength():
63 | self.done_ = True
64 | return
65 |
66 | size = reader.readUint32()
67 | _type = reader.readUint32()
68 | name = Mp4Parser.typeToString(_type)
69 | has64BitSize = False
70 | # log.info(f'[{name}] Parsing MP4 box')
71 |
72 | if size == 0:
73 | size = reader.getLength() - start
74 | elif size == 1:
75 | if stopOnPartial and reader.getPosition() + 8 > reader.getLength():
76 | self.done_ = True
77 | return
78 | size = reader.readUint64()
79 | has64BitSize = True
80 | # 和js不一样 py中不存在key会直接异常 所以这里用get方法
81 | boxDefinition = self.boxDefinitions_.get(_type)
82 |
83 | if boxDefinition:
84 | version = None
85 | flags = None
86 |
87 | if self.headers_[_type] == Mp4Parser.BoxType_.FULL_BOX:
88 | if stopOnPartial and reader.getPosition() + 4 > reader.getLength():
89 | self.done_ = True
90 | return
91 | versionAndFlags = reader.readUint32()
92 | version = versionAndFlags >> 24
93 | flags = versionAndFlags & 0xFFFFFF
94 |
95 | end = start + size
96 | if partialOkay and end > reader.getLength():
97 | end = reader.getLength()
98 |
99 | if stopOnPartial and end > reader.getLength():
100 | self.done_ = True
101 | return
102 | payloadSize = end - reader.getPosition()
103 | payload = reader.readBytes(payloadSize) if payloadSize > 0 else b''
104 |
105 | payloadReader = DataViewReader(payload, Endianness.BIG_ENDIAN)
106 |
107 | box = {
108 | 'parser': self,
109 | 'partialOkay': partialOkay or False,
110 | 'version': version,
111 | 'flags': flags,
112 | 'reader': payloadReader,
113 | 'size': size,
114 | 'start': start + absStart,
115 | 'has64BitSize': has64BitSize,
116 | }
117 | box = ParsedBox(**box)
118 |
119 | boxDefinition(box)
120 | else:
121 | skipLength = min(start + size - reader.getPosition(), reader.getLength() - reader.getPosition())
122 | reader.skip(skipLength)
123 |
124 | @staticmethod
125 | def children(box: ParsedBox):
126 | headerSize = Mp4Parser.headerSize(box)
127 | while box.reader.hasMoreData() and not box.parser.done_:
128 | box.parser.parseNext(box.start + headerSize, box.reader, box.partialOkay)
129 |
130 | @staticmethod
131 | def sampleDescription(box: ParsedBox):
132 | headerSize = Mp4Parser.headerSize(box)
133 | count = box.reader.readUint32()
134 | for _ in range(count):
135 | box.parser.parseNext(box.start + headerSize, box.reader, box.partialOkay)
136 | if box.parser.done_:
137 | break
138 |
139 | @staticmethod
140 | def allData(callback: Callable):
141 | def alldata_callback(box: ParsedBox):
142 | _all = box.reader.getLength() - box.reader.getPosition()
143 | return callback(box.reader.readBytes(_all))
144 | return alldata_callback
145 |
146 | @staticmethod
147 | def typeFromString_(name: str):
148 | assert len(name) == 4, 'Mp4 box names must be 4 characters long'
149 |
150 | code = 0
151 | for char in name:
152 | code = (code << 8) | ord(char)
153 | return code
154 |
155 | @staticmethod
156 | def typeToString(_type: int):
157 | name = bytes([
158 | (_type >> 24) & 0xff,
159 | (_type >> 16) & 0xff,
160 | (_type >> 8) & 0xff,
161 | _type & 0xff
162 | ]).decode('utf-8')
163 | return name
164 |
165 | @staticmethod
166 | def headerSize(box: ParsedBox):
167 | return 8 + (8 if box.has64BitSize else 0) + (4 if box.flags is not None else 0)
--------------------------------------------------------------------------------
/pyshaka/main.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 | from pathlib import Path
3 | from datetime import datetime
4 | from argparse import ArgumentParser
5 |
6 | from pyshaka.util.TextParser import TimeContext
7 | from pyshaka.text.Mp4VttParser import Mp4VttParser
8 | from pyshaka.text.Mp4TtmlParser import Mp4TtmlParser
9 | from pyshaka.text.Cue import Cue
10 | from pyshaka.log import log
11 |
12 |
13 | class CmdArgs:
14 | def __init__(self):
15 | self.debug = None # type: bool
16 | self.type = None # type: str
17 | self.timescale = None # type: int
18 | self.init_path = None # type: str
19 | self.segments_path = None # type: str
20 | self.segment_time = None # type: float
21 |
22 |
23 | def command_handler(args: CmdArgs):
24 | '''
25 | 对命令参数进行校验和修正
26 | '''
27 | assert args.type in ['wvtt', 'ttml'], f'not support {args.type} now'
28 | args.timescale = int(args.timescale)
29 | if args.init_path:
30 | args.init_path = args.init_path.strip()
31 | args.segments_path = args.segments_path.strip()
32 | args.segment_time = float(args.segment_time)
33 |
34 |
35 | def loop_nestedCues(lines: List[str], nestedCues: List[Cue], index: int, segment_time: float):
36 | payload = ''
37 | for cue in nestedCues:
38 | if len(cue.nestedCues) > 0:
39 | loop_nestedCues(lines, cue.nestedCues, index, segment_time)
40 | if cue.payload != '':
41 | if payload == '':
42 | payload = cue.payload
43 | else:
44 | payload = f'{payload} {cue.payload}'
45 | # 这里突然想不起注释掉的原因了 好像是会重复...
46 | # lines.append(cue)
47 | cue = nestedCues[0]
48 | payload = payload
49 | if payload != '':
50 | cue.payload = payload
51 | cue.startTime += segment_time * index
52 | cue.endTime += segment_time * index
53 | lines.append(cue)
54 |
55 |
56 | def compare(cue: Cue):
57 | return cue.startTime
58 |
59 |
60 | # def compare(cue1: Cue, cue2: Cue):
61 | # if cue1.startTime < cue2.startTime:
62 | # return -1
63 | # if cue1.startTime > cue2.startTime:
64 | # return 1
65 | # return 0
66 |
67 |
68 | def gentm(tm: float):
69 | return datetime.utcfromtimestamp(tm).strftime('%H:%M:%S.%f')[:-3]
70 |
71 |
72 | def test_parse_mp4vtt():
73 | mp4vttparser = Mp4VttParser()
74 | vttInitSegment = Path("test/assets/vtt-init.mp4").read_bytes()
75 | mp4vttparser.parseInit(vttInitSegment)
76 | vttSegment = Path("test/assets/vtt-segment.mp4").read_bytes()
77 | timecontext = TimeContext(**{'periodStart': 0, 'segmentStart': 0, 'segmentEnd': 0})
78 | mp4vttparser.parseMedia(vttSegment, timecontext)
79 |
80 |
81 | def parse(args: CmdArgs):
82 | if args.type == 'wvtt':
83 | parser = Mp4VttParser()
84 | elif args.type == 'ttml':
85 | parser = Mp4TtmlParser()
86 | else:
87 | assert 1 == 0, 'never should be here'
88 | if args.init_path:
89 | init_path = Path(args.init_path)
90 | parser.parseInit(init_path.read_bytes())
91 | else:
92 | parser.set_timescale(args.timescale)
93 | segments_path = Path(args.segments_path)
94 | time = TimeContext(**{'periodStart': 0, 'segmentStart': 0, 'segmentEnd': 0})
95 | index = 0
96 | cues = []
97 | for segment_path in segments_path.iterdir():
98 | if segment_path.is_dir():
99 | if args.debug:
100 | log.debug(f'{segment_path} is not a file, skip it')
101 | continue
102 | if segment_path.suffix not in ['.mp4', '.m4s', '.dash', '.ts']:
103 | if args.debug:
104 | log.debug(f"{segment_path} suffix is not in ['.mp4', '.m4s', '.dash', '.ts'], skip it")
105 | continue
106 | if args.init_path and segment_path.name == init_path.name:
107 | if args.debug:
108 | log.debug(f"{segment_path} is init_path , skip it")
109 | continue
110 | if args.debug:
111 | log.debug(f'start parseMedia for {segment_path}')
112 | _cues = parser.parseMedia(segment_path.read_bytes(), time)
113 |
114 | for cue in _cues:
115 | cue.file = segment_path.name
116 | if len(cue.nestedCues) > 0:
117 | loop_nestedCues(cues, cue.nestedCues, index, args.segment_time)
118 | if cue.payload != '':
119 | cue.startTime += args.segment_time * index
120 | cue.endTime += args.segment_time * index
121 | cues.append(cue)
122 | index += 1
123 | # 按Cue.startTime从小到大排序
124 | cues.sort(key=compare)
125 | if args.debug:
126 | log.debug(f'cues count {len(cues)}')
127 | assert len(cues) > 0, 'ohh, it is a bug...'
128 | # 去重
129 | # 1. 如果当前行的endTime等于下一行的startTime 并且下一行内容与当前行相同 取下一行的endTime作为当前行的endTime 然后去除下一行
130 | # 2. 否则将下一行作为当前行 再次进行比较 直到比较结束
131 | offset = 0
132 | cues_fix = [] # type: List[Cue]
133 | cue = cues[offset]
134 | while offset < len(cues) - 1:
135 | offset += 1
136 | # 跳过空的行
137 | next_cue = cues[offset]
138 | if cue.payload == '':
139 | cue = next_cue
140 | continue
141 | if cue.payload == next_cue.payload and cue.endTime == next_cue.startTime:
142 | cue.endTime = next_cue.endTime
143 | else:
144 | cues_fix.append(cue)
145 | cue = next_cue
146 | # 最后一行也不能掉
147 | next_cue = cues[offset]
148 | if cue.payload == next_cue.payload and cue.endTime == next_cue.startTime:
149 | cue.endTime = next_cue.endTime
150 | else:
151 | cues_fix.append(cue)
152 | cue = next_cue
153 | if args.debug:
154 | log.debug(f'after reduce duplicated lines, now lines count is {len(cues_fix)}')
155 | # 先用列表放内容 最后join
156 | contents = ["WEBVTT"] # type: List[str]
157 | for cue in cues_fix:
158 | settings = cue._settings
159 | if settings != '':
160 | settings = ' ' + settings
161 | contents.append(f'{gentm(cue.startTime)} --> {gentm(cue.endTime)}{settings}\n{cue.payload}')
162 | content = '\n\n'.join(contents)
163 | segments_path.with_suffix(".vtt").write_text(content, encoding='utf-8')
164 | log.info(f'{len(cues_fix)} lines of subtitle was founded. (*^▽^*)')
165 | log.info(f'write to {segments_path.with_suffix(".vtt").resolve()}')
166 |
167 |
168 | def main():
169 |
170 | parser = ArgumentParser(
171 | prog='dash-subtitle-extractor',
172 | usage='python -m pyshaka.main [OPTION]...',
173 | description='A tool that to parse subtitle embedded in DASH stream',
174 | add_help=True,
175 | )
176 | parser.add_argument('-debug', '--debug', action='store_true', help='debug is needed')
177 | parser.add_argument('-type', '--type', choices=['wvtt', 'ttml'], help='subtitle codec, only support wvtt and ttml now')
178 | parser.add_argument('-timescale', '--timescale', default='1000', help='set timescale manually if no init segment')
179 | parser.add_argument('-init-path', '--init-path', help='init segment path')
180 | parser.add_argument('-segments-path', '--segments-path', help='segments folder path')
181 | parser.add_argument('-segment-time', '--segment-time', default='0', help='single segment duration, usually needed for ttml content, calculation method: d / timescale')
182 | args = parser.parse_args() # type: CmdArgs
183 | command_handler(args)
184 | parse(args)
185 | # python -m pyshaka.main --init-path "test/dashvtt_subtitle_WVTT_zh-TW/init.mp4" --segments-path "test/dashvtt_subtitle_WVTT_zh-TW"
186 |
187 |
188 | if __name__ == '__main__':
189 | main()
--------------------------------------------------------------------------------
/parser.js:
--------------------------------------------------------------------------------
1 | goog.module('parser');
2 |
3 | require("google-closure-library");
4 | goog.require("shaka.text.Mp4VttParser");
5 | goog.require('shaka.text.Mp4TtmlParser');
6 | const fs = require("fs");
7 | const path = require('path');
8 | const args = require('args-parser')(process.argv);
9 |
10 | let debug = false;
11 | if (args["debug"]){
12 | debug = true;
13 | console.info(`[ info] args => ${JSON.stringify(args)}`);
14 | }
15 |
16 | let init_segment = args["init-segment"]
17 | if (init_segment && !fs.existsSync(init_segment)){
18 | console.log(`[error] init segment file ${init_segment} is not exists`)
19 | process.exit()
20 | }
21 | if (!args["segments-path"]){
22 | console.log(`[error] --segments-path option is required`)
23 | process.exit()
24 | }
25 | let segments_path = args["segments-path"]
26 | if (!fs.existsSync(segments_path)){
27 | console.log(`[error] segments folder path ${segments_path} is not exists`)
28 | process.exit()
29 | }
30 | let codecs = ["wvtt", "ttml"];
31 | let subtype = args["type"]
32 | if (!subtype || !codecs.includes(subtype)){
33 | console.log(`[error] must set --type option which is one of ${codecs}, not ${subtype}`);
34 | process.exit()
35 | }
36 | let segment_time = args["segment-time"]
37 | if (!segment_time){
38 | if (subtype == "ttml"){
39 | console.warn(`[ warn] --segment-time has been auto set to 60.0, set if needed. Calculation method: d / timescale`)
40 | segment_time = 60.0;
41 | }
42 | else{
43 | segment_time = 0;
44 | }
45 | }
46 | else{
47 | segment_time = Number.parseFloat(segment_time);
48 | }
49 |
50 | function travel(dir, callback) {
51 | fs.readdirSync(dir).forEach((file) => {
52 | var pathname = path.join(dir, file)
53 | if (fs.statSync(pathname).isDirectory()) {
54 | travel(pathname, callback)
55 | } else {
56 | callback(pathname)
57 | }
58 | })
59 | }
60 |
61 | function compare(a, b) {
62 | if (a.startTime < b.startTime) {
63 | return -1;
64 | }
65 | if (a.startTime > b.startTime) {
66 | return 1;
67 | }
68 | return 0;
69 | }
70 |
71 | function gentm(tm){
72 | return new Date(tm * 1000).toISOString().slice(11, -1);
73 | }
74 |
75 | function loop_nestedCues(lines, nestedCues, index){
76 | let payload = "";
77 | for (let i = 0; i < nestedCues.length; i++) {
78 | let cue = nestedCues[i];
79 | if (cue.nestedCues && cue.nestedCues.length > 0){
80 | loop_nestedCues(lines, cue.nestedCues, index)
81 | }
82 | if (cue.payload != ""){
83 | if (payload == ""){
84 | payload = cue.payload;
85 | }
86 | else{
87 | payload = `${payload} ${cue.payload}`;
88 | }
89 | }
90 | // lines.push(cue);
91 | }
92 | let cue = nestedCues[0];
93 | cue.payload = payload;
94 | if(cue.payload != ""){
95 | cue.startTime += segment_time * index
96 | cue.endTime += segment_time * index
97 | lines.push(cue);
98 | }
99 | }
100 |
101 | let parser = null;
102 |
103 | try {
104 | switch (subtype) {
105 | case "wvtt":
106 | parser = new shaka.text.Mp4VttParser();
107 | break;
108 | case "ttml":
109 | parser = new shaka.text.Mp4TtmlParser();
110 | break;
111 | default:
112 | process.exit();
113 | }
114 | if (init_segment){
115 | let InitSegment = new Uint8Array(fs.readFileSync(init_segment));
116 | parser.parseInit(InitSegment);
117 | // let time = { periodStart: 0, segmentStart: 0, segmentEnd: 0 };
118 | // console.log("start call parseMedia");
119 | // parser.parseMedia(new Uint8Array(fs.readFileSync("test/assets/vtt-segment.mp4")), time)
120 | }
121 | // process.exit()
122 | let index = 0;
123 | let time = { periodStart: 0, segmentStart: 0, segmentEnd: 0 };
124 | let lines = [];
125 | let debug_contents = [];
126 | travel(segments_path, function (pathname) {
127 | // console.log(path.basename(pathname), pathname)
128 | let name = path.basename(pathname);
129 | // skip init segment
130 | if (init_segment && name == path.basename(init_segment)) return;
131 | // now only allow mp4 and m4s file
132 | if (!name.endsWith(".mp4") && !name.endsWith(".m4s")) return;
133 | let Segment = new Uint8Array(fs.readFileSync(pathname));
134 | let results = parser.parseMedia(Segment, time);
135 | // console.log(`${name} 解析完成`);
136 | for (let i = 0; i < results.length; i++) {
137 | let result = results[i];
138 | result.name = name
139 | if (debug){
140 | debug_contents.push(JSON.stringify(result, null, 4));
141 | }
142 | if (result.nestedCues && result.nestedCues.length > 0){
143 | loop_nestedCues(lines, result.nestedCues, index)
144 | }
145 | if (result.payload != ""){
146 | result.startTime += segment_time * index
147 | result.endTime += segment_time * index
148 | lines.push(result);
149 | }
150 | }
151 | index += 1;
152 | });
153 | if (debug){
154 | let content = debug_contents.join("\n-----------------\n");
155 | fs.writeFileSync(`${path.basename(segments_path)}.log`, content, "utf-8");
156 | console.log(`[ info] write debug log to ${path.basename(segments_path)}.log`)
157 | }
158 | // 按startTime从小到大排序
159 | lines.sort(compare);
160 | // 去重
161 | // 1. 如果当前行的endTime等于下一行的startTime 并且下一行内容与当前行相同 取下一行的endTime作为当前行的endTime 然后去除下一行
162 | // 2. 否则将下一行作为当前行 再次进行比较 直到比较结束
163 | let offset = 0;
164 | let lines_fix = [];
165 | let line = lines[offset];
166 | while (offset < lines.length - 1){
167 | offset += 1;
168 | // 跳过空的行
169 | let next_line = lines[offset];
170 | if (line.payload == "") {
171 | line = next_line;
172 | continue
173 | }
174 | if (line.payload == next_line.payload && line.endTime == next_line.startTime){
175 | line.endTime = next_line.endTime;
176 | }
177 | else{
178 | lines_fix.push(line);
179 | line = next_line;
180 | }
181 | };
182 | // 最后一行也不能掉
183 | let next_line = lines[offset];
184 | if (line.payload == next_line.payload && line.endTime == next_line.startTime){
185 | line.endTime = next_line.endTime;
186 | }
187 | else{
188 | lines_fix.push(line);
189 | line = next_line;
190 | }
191 | if (debug){
192 | console.log(`[ info] after reduce duplicated lines, now lines count is ${lines_fix.length}`)
193 | }
194 | // 先用列表放内容 最后join
195 | let contents = ["WEBVTT"];
196 | for (let i = 0; i < lines_fix.length; i++){
197 | let line = lines_fix[i];
198 | contents.push(`${gentm(line.startTime)} --> ${gentm(line.endTime)}\n${line.payload}`)
199 | }
200 | let content = contents.join("\n\n");
201 | fs.writeFileSync(`${path.basename(segments_path)}.vtt`, content, "utf-8");
202 | console.log(`[ info] ${lines_fix.length} lines of subtitle was founded. (*^▽^*)`)
203 | console.log(`[ info] write to ${path.basename(segments_path)}.vtt`)
204 | } catch (err) {
205 | console.trace(err);
206 | }
207 |
208 | // node parser_compiled.js --init-segment=path/to/init.mp4 --segments-path=path/to/segments/folder
209 | // node parser_compiled.js --init-segment=test/dashvtt_subtitle_WVTT_zh-TW/init.mp4 --segments-path=test/dashvtt_subtitle_WVTT_zh-TW
--------------------------------------------------------------------------------
/pyshaka/text/Mp4VttParser.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 | from pyshaka.text.Cue import Cue
3 | # from pyshaka.text.TextEngine import TextEngine
4 | from pyshaka.text.VttTextParser import VttTextParser
5 | from pyshaka.util.DataViewReader import DataViewReader, Endianness
6 | # from pyshaka.util.Error import Error
7 | # from pyshaka.util.Functional import Functional
8 | from pyshaka.util.Mp4Parser import Mp4Parser, ParsedBox
9 | from pyshaka.util.Mp4BoxParsers import Mp4BoxParsers, ParsedTRUNSample
10 | # from pyshaka.util.StringUtils import StringUtils
11 | # from pyshaka.util.TextParser import TextParser
12 | from pyshaka.util.TextParser import TimeContext
13 | from pyshaka.util.exceptions import InvalidMp4VTT
14 | from pyshaka.log import log
15 |
16 |
17 | class Mp4VttParser:
18 |
19 | def __init__(self):
20 | self.timescale_ = None # type: int
21 |
22 | def set_timescale(self, timescale: int):
23 | self.timescale_ = timescale
24 |
25 | def parseInit(self, data: memoryview):
26 | # log.info('parseInit start')
27 |
28 | def mdhd_callback(box: ParsedBox):
29 | assert box.version == 0 or box.version == 1, 'MDHD version can only be 0 or 1'
30 | parsedMDHDBox = Mp4BoxParsers.parseMDHD(box.reader, box.version)
31 | self.timescale_ = parsedMDHDBox.timescale
32 |
33 | def wvtt_callback(box: ParsedBox):
34 | nonlocal sawWVTT
35 | sawWVTT = True
36 |
37 | sawWVTT = False
38 | # 初始化解析器
39 | mp4parser = Mp4Parser()
40 | # 给要准备解析的box添加对应的解析函数 后面回调
41 | mp4parser = mp4parser.box('moov', Mp4Parser.children)
42 | mp4parser = mp4parser.box('trak', Mp4Parser.children)
43 | mp4parser = mp4parser.box('mdia', Mp4Parser.children)
44 | mp4parser = mp4parser.fullBox('mdhd', mdhd_callback)
45 | mp4parser = mp4parser.box('minf', Mp4Parser.children)
46 | mp4parser = mp4parser.box('stbl', Mp4Parser.children)
47 | mp4parser = mp4parser.fullBox('stsd', Mp4Parser.sampleDescription)
48 | mp4parser = mp4parser.box('wvtt', wvtt_callback)
49 | # 解析数据
50 | mp4parser = mp4parser.parse(data)
51 |
52 | if not self.timescale_:
53 | raise InvalidMp4VTT('Missing timescale for VTT content. It should be located in the MDHD.')
54 |
55 | if not sawWVTT:
56 | raise InvalidMp4VTT('A WVTT box should have been seen (a valid vtt init segment with no actual subtitles')
57 |
58 | def parseMedia(self, data: memoryview, time: TimeContext) -> List[Cue]:
59 |
60 | def tfdt_callback(box: ParsedBox):
61 | nonlocal baseTime
62 | nonlocal sawTFDT
63 | sawTFDT = True
64 | assert box.version == 0 or box.version == 1, 'TFDT version can only be 0 or 1'
65 | parsedTFDTBox = Mp4BoxParsers.parseTFDT(box.reader, box.version)
66 | baseTime = parsedTFDTBox.baseMediaDecodeTime
67 |
68 | def tfhd_callback(box: ParsedBox):
69 | nonlocal defaultDuration
70 | assert box.flags is not None, 'A TFHD box should have a valid flags value'
71 | parsedTFHDBox = Mp4BoxParsers.parseTFHD(box.reader, box.flags)
72 | defaultDuration = parsedTFHDBox.defaultSampleDuration
73 |
74 | def trun_callback(box: ParsedBox):
75 | nonlocal sawTRUN
76 | nonlocal presentations
77 | sawTRUN = True
78 | assert box.version is not None, 'A TRUN box should have a valid version value'
79 | assert box.version is not None, 'A TRUN box should have a valid flags value'
80 | parsedTRUNBox = Mp4BoxParsers.parseTRUN(box.reader, box.version, box.flags)
81 | presentations = parsedTRUNBox.sampleData
82 |
83 | def mdat_callback(data: bytes):
84 | nonlocal sawMDAT
85 | nonlocal rawPayload
86 | assert not sawMDAT, 'VTT cues in mp4 with multiple MDAT are not currently supported'
87 | sawMDAT = True
88 | rawPayload = data
89 |
90 | if not self.timescale_:
91 | raise InvalidMp4VTT('No init segment for MP4+VTT!')
92 |
93 | baseTime = 0
94 | presentations = [] # type: List[ParsedTRUNSample]
95 | rawPayload = b'' # type: bytes
96 | cues = [] # type: List[Cue]
97 |
98 | sawTFDT = False
99 | sawTRUN = False
100 | sawMDAT = False
101 | defaultDuration = None
102 |
103 | mp4parser = Mp4Parser()
104 | mp4parser = mp4parser.box('moof', Mp4Parser.children)
105 | mp4parser = mp4parser.box('traf', Mp4Parser.children)
106 | mp4parser = mp4parser.fullBox('tfdt', tfdt_callback)
107 | mp4parser = mp4parser.fullBox('tfhd', tfhd_callback)
108 | mp4parser = mp4parser.fullBox('trun', trun_callback)
109 | mp4parser = mp4parser.box('mdat', Mp4Parser.allData(mdat_callback))
110 | mp4parser = mp4parser.parse(data, partialOkay=False)
111 |
112 | if not sawMDAT and not sawTFDT and not sawTRUN:
113 | raise InvalidMp4VTT(f'A required box is missing. Is saw: MDAT {sawMDAT} TFDT {sawTFDT} TRUN {sawTRUN}')
114 |
115 | currentTime = baseTime
116 |
117 | reader = DataViewReader(rawPayload, Endianness.BIG_ENDIAN)
118 | for presentation in presentations:
119 | duration = presentation.sampleDuration or defaultDuration
120 | if presentation.sampleCompositionTimeOffset:
121 | startTime = baseTime + presentation.sampleCompositionTimeOffset
122 | else:
123 | startTime = currentTime
124 | currentTime = startTime + (duration or 0)
125 | totalSize = 0
126 | while True:
127 | # Read the payload size.
128 | payloadSize = reader.readUint32()
129 | totalSize += payloadSize
130 | # Skip the type.
131 | payloadType = reader.readUint32()
132 | payloadName = Mp4Parser.typeToString(payloadType)
133 |
134 | # Read the data payload.
135 | payload = None
136 | if payloadName == 'vttc':
137 | if payloadSize > 8:
138 | payload = reader.readBytes(payloadSize - 8)
139 | elif payloadName == 'vtte':
140 | # It's a vtte, which is a vtt cue that is empty. Ignore any data that does exist.
141 | reader.skip(payloadSize - 8)
142 | else:
143 | log.error(f'Unknown box {payloadName}! Skipping!')
144 | reader.skip(payloadSize - 8)
145 |
146 | if duration:
147 | if payload:
148 | assert self.timescale_ is not None, 'Timescale should not be null!'
149 | cue = Mp4VttParser.parseVTTC_(
150 | payload,
151 | time.periodStart + startTime / self.timescale_,
152 | time.periodStart + currentTime / self.timescale_
153 | )
154 | cues.append(cue)
155 | else:
156 | log.error('WVTT sample duration unknown, and no default found!')
157 | assert not presentation.sampleSize or totalSize <= presentation.sampleSize, 'The samples do not fit evenly into the sample sizes given in the TRUN box!'
158 |
159 | # 检查是不是应该结束循环
160 | if presentation.sampleSize and totalSize < presentation.sampleSize:
161 | continue
162 | else:
163 | break
164 | assert not reader.hasMoreData(), 'MDAT which contain VTT cues and non-VTT data are not currently supported!'
165 | # parseVTTC_ 有可能返回的是 None 这里过滤一下
166 | return [cue for cue in cues if cue]
167 |
168 | @staticmethod
169 | def parseVTTC_(data: bytes, startTime: float, endTime: float):
170 |
171 | def payl_callback(data: bytes):
172 | nonlocal payload
173 | payload = data.decode('utf-8')
174 |
175 | def iden_callback(data: bytes):
176 | nonlocal _id
177 | _id = data.decode('utf-8')
178 |
179 | def sttg_callback(data: bytes):
180 | nonlocal settings
181 | settings = data.decode('utf-8')
182 |
183 | payload = None
184 | _id = None
185 | settings = ''
186 |
187 | mp4parser = Mp4Parser()
188 | mp4parser = mp4parser.box('payl', Mp4Parser.allData(payl_callback))
189 | mp4parser = mp4parser.box('iden', Mp4Parser.allData(iden_callback))
190 | mp4parser = mp4parser.box('sttg', Mp4Parser.allData(sttg_callback))
191 | mp4parser = mp4parser.parse(data)
192 |
193 | if payload:
194 | return Mp4VttParser.assembleCue_(payload, _id, settings, startTime, endTime)
195 | else:
196 | return None
197 |
198 | @staticmethod
199 | def assembleCue_(payload: bytes, _id: str, settings: str, startTime: float, endTime: float):
200 | cue = Cue(startTime, endTime, '', _settings=settings)
201 |
202 | styles = {}
203 | VttTextParser.parseCueStyles(payload, cue, styles)
204 |
205 | if _id:
206 | cue.id = _id
207 |
208 | # if settings:
209 | # # TextParser not fully implemented yet
210 | # parser = TextParser(settings)
211 | # word = parser.readWord()
212 | # while word:
213 | # if not VttTextParser.parseCueSetting(cue, word, VTTRegions=[]):
214 | # log.warning(f'VTT parser encountered an invalid VTT setting: {word}, The setting will be ignored.')
215 |
216 | # parser.skipWhitespace()
217 | # word = parser.readWord()
218 | return cue
--------------------------------------------------------------------------------
/pyshaka/text/TtmlTextParser.py:
--------------------------------------------------------------------------------
1 | import re
2 | from xml.dom.minidom import parseString, Element, Node, Document
3 | from enum import Enum
4 | from typing import List, Union
5 |
6 | from pyshaka.text.Cue import Cue, CueRegion, units, direction, writingMode
7 | from pyshaka.text.Cue import textAlign, lineAlign, positionAlign, displayAlign
8 | from pyshaka.text.Cue import fontStyle, textDecoration
9 | from pyshaka.util.TextParser import TimeContext
10 | from pyshaka.util.exceptions import InvalidXML, InvalidTextCue
11 | from pyshaka.log import log
12 |
13 | document = Document()
14 |
15 |
16 | class RateInfo_:
17 | def __init__(self, frameRate: str, subFrameRate: str, frameRateMultiplier: str, tickRate: str):
18 | try:
19 | self.frameRate = float(frameRate)
20 | except Exception:
21 | self.frameRate = 30
22 | try:
23 | self.subFrameRate = float(subFrameRate)
24 | except Exception:
25 | self.subFrameRate = 1
26 | try:
27 | self.tickRate = float(tickRate)
28 | except Exception:
29 | self.tickRate = 0
30 | if self.tickRate == 0:
31 | if frameRate:
32 | self.tickRate = self.frameRate * self.subFrameRate
33 | else:
34 | self.tickRate = 1
35 | if frameRateMultiplier:
36 | multiplierResults = re.findall('^(\d+) (\d+)$', frameRateMultiplier)
37 | if len(multiplierResults) > 0:
38 | numerator = float(multiplierResults[1])
39 | denominator = float(multiplierResults[2])
40 | multiplierNum = numerator / denominator
41 | self.frameRate *= multiplierNum
42 |
43 |
44 | class TtmlTextParser:
45 |
46 | def parseInit(self):
47 | assert False, 'TTML does not have init segments'
48 |
49 | def parseMedia(self, data: bytes, time: TimeContext) -> List[Cue]:
50 | ttpNs = parameterNs_
51 | ttsNs = styleNs_
52 | text = data.decode('utf-8')
53 | cues = [] # type: List[Cue]
54 | xml = None
55 |
56 | if text == '':
57 | return cues
58 | try:
59 | xml = parseString(text)
60 | except Exception as e:
61 | log.error('xml parseString', exc_info=e)
62 | if xml is None:
63 | return cues
64 | parsererrors = xml.getElementsByTagName('parsererror') # type: List[Element]
65 | if len(parsererrors) > 0 and parsererrors[0]:
66 | raise InvalidXML('ttml parsererror')
67 | tts = xml.getElementsByTagName('tt') # type: List[Element]
68 | if len(tts) == 0:
69 | raise InvalidXML('TTML does not contain tag.')
70 | tt = tts[0]
71 | bodys = tt.getElementsByTagName('body') # type: List[Element]
72 | if len(bodys) == 0:
73 | return []
74 | frameRate = tt.getAttributeNS(ttpNs, 'frameRate')
75 | subFrameRate = tt.getAttributeNS(ttpNs, 'subFrameRate')
76 | frameRateMultiplier = tt.getAttributeNS(ttpNs, 'frameRateMultiplier')
77 | tickRate = tt.getAttributeNS(ttpNs, 'tickRate')
78 | cellResolution = tt.getAttributeNS(ttpNs, 'cellResolution')
79 | spaceStyle = tt.getAttribute('xml:space') or 'default'
80 | extent = tt.getAttributeNS(ttsNs, 'extent')
81 |
82 | if spaceStyle != 'default' and spaceStyle != 'preserve':
83 | raise InvalidXML(f'Invalid xml:space value: {spaceStyle}')
84 | whitespaceTrim = spaceStyle == 'default'
85 | rateInfo = RateInfo_(frameRate, subFrameRate, frameRateMultiplier, tickRate)
86 | cellResolutionInfo = TtmlTextParser.getCellResolution_(cellResolution)
87 |
88 | metadatas = tt.getElementsByTagName('metadata') # type: List[Element]
89 | metadataElements = []
90 | if len(metadatas) > 0:
91 | for childNode in metadatas[0].childNodes:
92 | if isinstance(childNode, Element):
93 | metadataElements.append(childNode)
94 | styles = tt.getElementsByTagName('style') # type: List[Element]
95 | regionElements = tt.getElementsByTagName('region') # type: List[Element]
96 | cueRegions = []
97 |
98 | for region in regionElements:
99 | cueRegion = TtmlTextParser.parseCueRegion_(region, styles, extent)
100 | if cueRegion:
101 | cueRegions.append(cueRegion)
102 |
103 | body = bodys[0]
104 | if len([childNode for childNode in body.childNodes if isinstance(childNode, Element) and childNode.tagName == 'p']) > 0:
105 | raise InvalidTextCue('
can only be inside
in TTML')
106 | for divNode in body.childNodes:
107 | if isinstance(divNode, Element) is False:
108 | continue
109 | if divNode.tagName != 'div':
110 | continue
111 | has_p = False
112 | for pChildren in divNode.childNodes:
113 | if isinstance(pChildren, Element) is False:
114 | continue
115 | if pChildren.tagName == 'span':
116 | raise InvalidTextCue('
can only be inside in TTML')
117 | if pChildren.tagName == 'p':
118 | has_p = True
119 | cue = TtmlTextParser.parseCue_(pChildren, time.periodStart, rateInfo, metadataElements, styles, regionElements, cueRegions, whitespaceTrim, False, cellResolutionInfo)
120 | if cue:
121 | cues.append(cue)
122 | if not has_p:
123 | cue = TtmlTextParser.parseCue_(divNode, time.periodStart, rateInfo, metadataElements, styles, regionElements, cueRegions, whitespaceTrim, False, cellResolutionInfo)
124 | if cue:
125 | cues.append(cue)
126 | return cues
127 |
128 | @staticmethod
129 | def parseCue_(cueNode: Union[Node, Element], offset, rateInfo, metadataElements, styles, regionElements, cueRegions, whitespaceTrim, isNested, cellResolution):
130 | cueElement = None # type: Element
131 | parentElement = cueNode.parentNode # type: Element
132 |
133 | if cueNode.nodeType == Node.TEXT_NODE:
134 | span = document.createElement('span') # tpye: Text
135 | span.appendChild(cueNode)
136 | cueElement = span
137 | else:
138 | assert cueNode.nodeType == Node.ELEMENT_NODE, 'nodeType should be ELEMENT_NODE!'
139 | cueElement = cueNode
140 | assert cueElement, 'cueElement should be non-None!'
141 |
142 | spaceStyle = cueElement.getAttribute('xml:space') or 'default' if whitespaceTrim else 'preserve'
143 | localWhitespaceTrim = spaceStyle == 'default'
144 | if cueElement.firstChild and cueElement.firstChild.nodeValue:
145 | # hasTextContent = re.match('\S', cueElement.firstChild.nodeValue)
146 | # \S 不匹配换行 但是js的test却会返回true
147 | # 所以python这里会误判 那么strip下达到修复效果
148 | hasTextContent = re.match('\S', cueElement.firstChild.nodeValue.strip())
149 | else:
150 | hasTextContent = False
151 | hasTimeAttributes = cueElement.hasAttribute('begin') or cueElement.hasAttribute('end') or cueElement.hasAttribute('dur')
152 | if not hasTimeAttributes and not hasTextContent and cueElement.tagName != 'br':
153 | if not isNested:
154 | return None
155 | elif localWhitespaceTrim:
156 | return None
157 | start, end = TtmlTextParser.parseTime_(cueElement, rateInfo)
158 | while parentElement and parentElement.nodeType == Node.ELEMENT_NODE and parentElement.tagName != 'tt':
159 | start, end = TtmlTextParser.resolveTime_(parentElement, rateInfo, start, end)
160 | parentElement = parentElement.parentNode
161 | if start is None:
162 | start = 0
163 | start += offset
164 | if end is None:
165 | end = -1
166 | else:
167 | end += offset
168 | if cueElement.tagName == 'br':
169 | cue = Cue(start, end, '')
170 | cue.lineBreak = True
171 | return cue
172 | payload = ''
173 | nestedCues = []
174 | flag = True
175 | for childNode in cueElement.childNodes:
176 | if childNode.nodeType != Node.TEXT_NODE:
177 | flag = False
178 | break
179 | if flag:
180 | payload: str = cueElement.firstChild.nodeValue
181 | if localWhitespaceTrim:
182 | payload = payload.strip()
183 | payload = re.sub('\s+', ' ', payload)
184 | else:
185 | for childNode in [_ for _ in cueElement.childNodes]:
186 | nestedCue = TtmlTextParser.parseCue_(
187 | childNode,
188 | offset,
189 | rateInfo,
190 | metadataElements,
191 | styles,
192 | regionElements,
193 | cueRegions,
194 | localWhitespaceTrim,
195 | True,
196 | cellResolution,
197 | )
198 | if nestedCue:
199 | nestedCues.append(nestedCue)
200 | cue = Cue(start, end, payload)
201 | cue.nestedCues = nestedCues
202 |
203 | if cellResolution:
204 | cue.cellResolution = cellResolution
205 |
206 | regionElements = TtmlTextParser.getElementsFromCollection_(cueElement, 'region', regionElements, '')
207 | regionElement = None
208 | if len(regionElements) > 0 and regionElements[0].getAttribute('xml:id'):
209 | regionElement = regionElements[0]
210 | regionId = regionElement.getAttribute('xml:id')
211 | cue.region = [_ for _ in cueRegions if _.id == regionId][0]
212 | imageElement = None
213 | for nameSpace in smpteNsList_:
214 | imageElements = TtmlTextParser.getElementsFromCollection_(cueElement, 'backgroundImage', metadataElements, '#', nameSpace)
215 | if len(imageElements) > 0:
216 | imageElement = imageElements[0]
217 | break
218 |
219 | isLeaf = len(nestedCues) == 0
220 |
221 | TtmlTextParser.addStyle_(
222 | cue,
223 | cueElement,
224 | regionElement,
225 | imageElement,
226 | styles,
227 | isNested,
228 | isLeaf
229 | )
230 |
231 | return cue
232 |
233 | @staticmethod
234 | def resolveTime_(parentElement, rateInfo: RateInfo_, start, end):
235 | # 这里有可能存在bug
236 | parentTime = TtmlTextParser.parseTime_(parentElement, rateInfo)
237 |
238 | if start is None:
239 | # No start time of your own? Inherit from the parent.
240 | start = parentTime[0]
241 | else:
242 | # Otherwise, the start time is relative to the parent's start time.
243 | if parentTime[0] is not None:
244 | start += parentTime[0]
245 |
246 | if end is None:
247 | # No end time of your own? Inherit from the parent.
248 | end = parentTime[1]
249 | else:
250 | # Otherwise, the end time is relative to the parent's _start_ time.
251 | # This is not a typo. Both times are relative to the parent's _start_.
252 | if parentTime[0] is not None:
253 | end += parentTime[0]
254 |
255 | return start, end
256 |
257 | @staticmethod
258 | def parseTime_(element: Element, rateInfo: RateInfo_):
259 | start = TtmlTextParser.parseTimeAttribute_(element.getAttribute('begin'), rateInfo)
260 | end = TtmlTextParser.parseTimeAttribute_(element.getAttribute('end'), rateInfo)
261 | duration = TtmlTextParser.parseTimeAttribute_(element.getAttribute('dur'), rateInfo)
262 | if end is None and duration is not None:
263 | end = start + duration
264 | return start, end
265 |
266 | @staticmethod
267 | def parseFramesTime_(rateInfo: RateInfo_, text):
268 | # 50t or 50.5t
269 | results = timeFramesFormat_.findall(text)
270 | frames = float(results[0])
271 | return frames / rateInfo.frameRate
272 |
273 | @staticmethod
274 | def parseTickTime_(rateInfo: RateInfo_, text):
275 | # 50t or 50.5t
276 | results = timeTickFormat_.findall(text)
277 | ticks = float(results[0])
278 | return ticks / rateInfo.tickRate
279 |
280 | @staticmethod
281 | def parseTimeFromRegex_(regex: re.Pattern, text: str) -> int:
282 | results = regex.findall(text)
283 | if len(results) == 0:
284 | return None
285 | if results[0][0] == '':
286 | return None
287 |
288 | hours = 0
289 | minutes = 0
290 | seconds = 0
291 | milliseconds = 0
292 | try:
293 | hours = int(results[0][0])
294 | minutes = int(results[0][1])
295 | seconds = float(results[0][2])
296 | milliseconds = float(results[0][3])
297 | except Exception:
298 | pass
299 | # 对于 timeColonFormatMilliseconds_ 来说 这里是匹配不到 milliseconds 的
300 | # 不过下一步计算的时候 由于seconds是小数 所以又修正了...
301 |
302 | return (milliseconds / 1000) + seconds + (minutes * 60) + (hours * 3600)
303 |
304 | @staticmethod
305 | def parseColonTimeWithFrames_(rateInfo: RateInfo_, text: str) -> int:
306 | # 01:02:43:07 ('07' is frames) or 01:02:43:07.1 (subframes)
307 | results = timeColonFormatFrames_.findall(text)
308 |
309 | hours = int(results[0][0])
310 | minutes = int(results[0][1])
311 | seconds = int(results[0][2])
312 | frames = int(results[0][3])
313 | subframes = int(results[0][4]) or 0
314 |
315 | frames += subframes / rateInfo.subFrameRate
316 | seconds += frames / rateInfo.frameRate
317 |
318 | return seconds + (minutes * 60) + (hours * 3600)
319 |
320 | @staticmethod
321 | def parseTimeAttribute_(text: str, rateInfo: RateInfo_):
322 | ret = None
323 | if timeColonFormatFrames_.match(text):
324 | ret = TtmlTextParser.parseColonTimeWithFrames_(rateInfo, text)
325 | elif timeColonFormat_.match(text):
326 | ret = TtmlTextParser.parseTimeFromRegex_(timeColonFormat_, text)
327 | elif timeColonFormatMilliseconds_.match(text):
328 | ret = TtmlTextParser.parseTimeFromRegex_(timeColonFormatMilliseconds_, text)
329 | elif timeFramesFormat_.match(text):
330 | ret = TtmlTextParser.parseFramesTime_(rateInfo, text)
331 | elif timeTickFormat_.match(text):
332 | ret = TtmlTextParser.parseTickTime_(rateInfo, text)
333 | elif timeHMSFormat_.match(text):
334 | ret = TtmlTextParser.parseTimeFromRegex_(timeHMSFormat_, text)
335 | elif text:
336 | raise InvalidTextCue('Could not parse cue time range in TTML')
337 | return ret
338 |
339 | @staticmethod
340 | def addStyle_(cue, cueElement, region, imageElement: Element, styles: List[Element], isNested: bool, isLeaf: bool):
341 | shouldInheritRegionStyles = isNested or isLeaf
342 |
343 | _direction = TtmlTextParser.getStyleAttribute_(cueElement, region, styles, 'direction', shouldInheritRegionStyles)
344 | if _direction == 'rtl':
345 | cue.direction = direction.HORIZONTAL_RIGHT_TO_LEFT
346 |
347 | _writingMode = TtmlTextParser.getStyleAttribute_(cueElement, region, styles, 'writingMode', shouldInheritRegionStyles)
348 | if _writingMode == 'tb' or _writingMode == 'tblr':
349 | cue.writingMode = writingMode.VERTICAL_LEFT_TO_RIGHT
350 | elif _writingMode == 'tbrl':
351 | cue.writingMode = writingMode.VERTICAL_RIGHT_TO_LEFT
352 | elif _writingMode == 'rltb' or _writingMode == 'rl':
353 | cue.direction = direction.HORIZONTAL_RIGHT_TO_LEFT
354 | elif _writingMode:
355 | cue.direction = direction.HORIZONTAL_LEFT_TO_RIGHT
356 |
357 | align = TtmlTextParser.getStyleAttribute_(cueElement, region, styles, 'textAlign', shouldInheritRegionStyles)
358 | if align:
359 | cue.positionAlign = textAlignToPositionAlign_[align]
360 | cue.lineAlign = textAlignToLineAlign_[align]
361 |
362 | assert textAlign.__members__.get(align.upper()), f'{align.upper()} Should be in Cue.textAlign values!'
363 | else:
364 | cue.textAlign = textAlign.START
365 |
366 | _displayAlign = TtmlTextParser.getStyleAttribute_(cueElement, region, styles, 'displayAlign', shouldInheritRegionStyles)
367 | if _displayAlign:
368 | assert displayAlign.__members__.get(_displayAlign.upper()), f'{_displayAlign.upper()} Should be in Cue.displayAlign values!'
369 | cue.displayAlign = displayAlign[_displayAlign.upper()]
370 |
371 | color = TtmlTextParser.getStyleAttribute_(cueElement, region, styles, 'color', shouldInheritRegionStyles)
372 | if color:
373 | cue.color = color
374 |
375 | backgroundColor = TtmlTextParser.getStyleAttribute_(cueElement, region, styles, 'backgroundColor', shouldInheritRegionStyles)
376 | if backgroundColor:
377 | cue.backgroundColor = backgroundColor
378 |
379 | border = TtmlTextParser.getStyleAttribute_(cueElement, region, styles, 'border', shouldInheritRegionStyles)
380 | if border:
381 | cue.border = border
382 |
383 | fontFamily = TtmlTextParser.getStyleAttribute_(cueElement, region, styles, 'fontFamily', shouldInheritRegionStyles)
384 | if fontFamily:
385 | cue.fontFamily = fontFamily
386 |
387 | fontWeight = TtmlTextParser.getStyleAttribute_(cueElement, region, styles, 'fontWeight', shouldInheritRegionStyles)
388 | if fontWeight and fontWeight == 'bold':
389 | cue.fontWeight = fontWeight.BOLD
390 |
391 | wrapOption = TtmlTextParser.getStyleAttribute_(cueElement, region, styles, 'wrapOption', shouldInheritRegionStyles)
392 | if wrapOption and wrapOption == 'noWrap':
393 | cue.wrapLine = False
394 | else:
395 | cue.wrapLine = True
396 |
397 | lineHeight = TtmlTextParser.getStyleAttribute_(cueElement, region, styles, 'lineHeight', shouldInheritRegionStyles)
398 | if lineHeight and unitValues_.match(lineHeight):
399 | cue.lineHeight = lineHeight
400 |
401 | fontSize = TtmlTextParser.getStyleAttribute_(cueElement, region, styles, 'fontSize', shouldInheritRegionStyles)
402 |
403 | if fontSize:
404 | isValidFontSizeUnit = unitValues_.match(fontSize) or percentValue_.match(fontSize)
405 | if isValidFontSizeUnit:
406 | cue.fontSize = fontSize
407 |
408 | _fontStyle = TtmlTextParser.getStyleAttribute_(cueElement, region, styles, 'fontStyle', shouldInheritRegionStyles)
409 | if _fontStyle:
410 | assert fontStyle.__members__.get(_fontStyle.upper()), f'{_fontStyle.upper()} Should be in Cue.fontStyle values!'
411 | cue.fontStyle = fontStyle[_fontStyle.upper()]
412 |
413 | if imageElement:
414 | backgroundImageType = imageElement.getAttribute('imageType') or imageElement.getAttribute('imagetype')
415 | backgroundImageEncoding = imageElement.getAttribute('encoding')
416 | backgroundImageData = imageElement.textContent.trim()
417 | if backgroundImageType == 'PNG' and backgroundImageEncoding == 'Base64' and backgroundImageData:
418 | cue.backgroundImage = 'data:image/pngbase64,' + backgroundImageData
419 |
420 | letterSpacing = TtmlTextParser.getStyleAttribute_(cueElement, region, styles, 'letterSpacing', shouldInheritRegionStyles)
421 | if letterSpacing and unitValues_.match(letterSpacing):
422 | cue.letterSpacing = letterSpacing
423 |
424 | linePadding = TtmlTextParser.getStyleAttribute_(cueElement, region, styles, 'linePadding', shouldInheritRegionStyles)
425 | if linePadding and unitValues_.match(linePadding):
426 | cue.linePadding = linePadding
427 |
428 | opacity = TtmlTextParser.getStyleAttribute_(cueElement, region, styles, 'opacity', shouldInheritRegionStyles)
429 | if opacity:
430 | cue.opacity = float(opacity)
431 |
432 | textDecorationRegion = TtmlTextParser.getStyleAttributeFromRegion_(region, styles, 'textDecoration')
433 | if textDecorationRegion:
434 | TtmlTextParser.addTextDecoration_(cue, textDecorationRegion)
435 |
436 | textDecorationElement = TtmlTextParser.getStyleAttributeFromElement_(cueElement, styles, 'textDecoration')
437 | if textDecorationElement:
438 | TtmlTextParser.addTextDecoration_(cue, textDecorationElement)
439 |
440 | @staticmethod
441 | def addTextDecoration_(cue: Cue, decoration):
442 | # 这里可能有问题 .value
443 | for value in decoration.split(' '):
444 | if value == 'underline':
445 | if textDecoration.UNDERLINE not in cue.textDecoration:
446 | cue.textDecoration.append(textDecoration.UNDERLINE)
447 | elif value == 'noUnderline':
448 | cue.textDecoration = [_ for _ in cue.textDecoration if textDecoration.UNDERLINE != _]
449 | elif value == 'lineThrough':
450 | if textDecoration.LINE_THROUGH not in cue.textDecoration:
451 | cue.textDecoration.append(textDecoration.LINE_THROUGH)
452 | elif value == 'noLineThrough':
453 | cue.textDecoration = [_ for _ in cue.textDecoration if textDecoration.LINE_THROUGH != _]
454 | elif value == 'overline':
455 | if textDecoration.OVERLINE not in cue.textDecoration:
456 | cue.textDecoration.append(textDecoration.OVERLINE)
457 | elif value == 'noOverline':
458 | cue.textDecoration = [_ for _ in cue.textDecoration if textDecoration.OVERLINE != _]
459 |
460 | @staticmethod
461 | def getStyleAttribute_(cueElement, region, styles, attribute, shouldInheritRegionStyles=True):
462 | attr = TtmlTextParser.getStyleAttributeFromElement_(cueElement, styles, attribute)
463 | if attr:
464 | return attr
465 | if shouldInheritRegionStyles:
466 | return TtmlTextParser.getStyleAttributeFromRegion_(region, styles, attribute)
467 | return None
468 |
469 | @staticmethod
470 | def parseCueRegion_(regionElement: Element, styles: List[Element], globalExtent: str):
471 | region = CueRegion()
472 | _id = regionElement.getAttribute('xml:id')
473 | if not _id:
474 | log.warning('TtmlTextParser parser encountered a region with no id. Region will be ignored.')
475 | return None
476 | region.id = _id
477 | globalResults = None
478 | if globalExtent:
479 | globalResults = percentValues_.findall(globalExtent) or pixelValues_.findall(globalExtent)
480 | if globalResults is not None and len(globalResults) == 2:
481 | globalWidth = int(globalResults[0][0])
482 | globalHeight = int(globalResults[0][1])
483 | else:
484 | globalWidth = None
485 | globalHeight = None
486 | results = None
487 | percentage = None
488 |
489 | extent = TtmlTextParser.getStyleAttributeFromRegion_(regionElement, styles, 'extent')
490 | if extent:
491 | percentage = percentValues_.findall(extent)
492 | results = percentage or pixelValues_.findall(extent)
493 | if results is not None:
494 | region.width = int(results[0][0])
495 | region.height = int(results[0][1])
496 |
497 | if not percentage:
498 | if globalWidth is not None:
499 | region.width = region.width * 100 / globalWidth
500 | if globalHeight is not None:
501 | region.height = region.height * 100 / globalHeight
502 | if percentage or globalWidth is not None:
503 | region.widthUnits = units.PERCENTAGE
504 | else:
505 | region.widthUnits = units.PX
506 | if percentage or globalHeight is not None:
507 | region.heightUnits = units.PERCENTAGE
508 | else:
509 | region.heightUnits = units.PX
510 | origin = TtmlTextParser.getStyleAttributeFromRegion_(regionElement, styles, 'origin')
511 | if origin:
512 | percentage = percentValues_.findall(origin)
513 | results = percentage or pixelValues_.findall(origin)
514 | if len(results) > 0:
515 | region.viewportAnchorX = int(results[0][0])
516 | region.viewportAnchorY = int(results[0][1])
517 | if len(percentage) == 0:
518 | if globalHeight is not None:
519 | region.viewportAnchorY = region.viewportAnchorY * 100 / globalHeight
520 | if globalWidth is not None:
521 | region.viewportAnchorX = region.viewportAnchorX * 100 / globalHeight
522 | if percentage or globalWidth is not None:
523 | region.viewportAnchorUnits = units.PERCENTAGE
524 | else:
525 | region.viewportAnchorUnits = units.PX
526 | return region
527 |
528 | @staticmethod
529 | def getInheritedStyleAttribute_(element: Element, styles, attribute):
530 | ttsNs = styleNs_
531 | ebuttsNs = styleEbuttsNs_
532 |
533 | inheritedStyles = TtmlTextParser.getElementsFromCollection_(element, 'style', styles, '') # tpye: List[Element]
534 |
535 | styleValue = None
536 | # The last value in our styles stack takes the precedence over the others
537 | for inheritedStyle in inheritedStyles:
538 | # Check ebu namespace first.
539 | styleAttributeValue = inheritedStyle.getAttributeNS(ebuttsNs, attribute)
540 |
541 | if not styleAttributeValue:
542 | # Fall back to tts namespace.
543 | styleAttributeValue = inheritedStyle.getAttributeNS(ttsNs, attribute)
544 |
545 | if not styleAttributeValue:
546 | # Next, check inheritance.
547 | # Styles can inherit from other styles, so traverse up that chain.
548 | styleAttributeValue = TtmlTextParser.getStyleAttributeFromElement_(inheritedStyle, styles, attribute)
549 |
550 | if styleAttributeValue:
551 | styleValue = styleAttributeValue
552 |
553 | return styleValue
554 |
555 | @staticmethod
556 | def getStyleAttributeFromElement_(cueElement: Element, styles, attribute: str):
557 | ttsNs = styleNs_
558 | elementAttribute = cueElement.getAttributeNS(ttsNs, attribute)
559 | if elementAttribute:
560 | return elementAttribute
561 | return TtmlTextParser.getInheritedStyleAttribute_(cueElement, styles, attribute)
562 |
563 | @staticmethod
564 | def getInheritedAttribute_(element: Element, attributeName: str, nsName: str):
565 | ret = None
566 | while element:
567 | if nsName:
568 | ret = element.getAttributeNS(nsName, attributeName)
569 | else:
570 | ret = element.getAttribute(attributeName)
571 | if ret:
572 | break
573 | parentNode = element.parentNode
574 | if isinstance(parentNode, Element):
575 | element = parentNode
576 | else:
577 | break
578 | return ret
579 |
580 | @staticmethod
581 | def getElementsFromCollection_(element: Element, attributeName: str, collection: list, prefixName: str, nsName: str = None):
582 | items = []
583 | if not element or len(collection) < 1:
584 | return items
585 | attributeValue = TtmlTextParser.getInheritedAttribute_(element, attributeName, nsName)
586 | if not attributeValue:
587 | return items
588 | itemNames = attributeValue.split(' ')
589 | for name in itemNames:
590 | for item in collection:
591 | if prefixName + item.getAttribute('xml:id') == name:
592 | items.append(item)
593 | break
594 | return items
595 |
596 | @staticmethod
597 | def getStyleAttributeFromRegion_(region: Element, styles, attribute):
598 | ttsNs = styleNs_
599 | if not region:
600 | return None
601 | attr = region.getAttributeNS(ttsNs, attribute)
602 | if attr:
603 | return attr
604 | return TtmlTextParser.getInheritedStyleAttribute_(region, styles, attribute)
605 |
606 | @staticmethod
607 | def getCellResolution_(cellResolution: str):
608 | if cellResolution is None or cellResolution == '':
609 | return None
610 | matches = re.findall('^(\d+) (\d+)$', cellResolution)
611 | if len(matches) == 0:
612 | return None
613 | columns = int(matches[0][0])
614 | rows = int(matches[0][1])
615 | return {'columns': columns, 'rows': rows}
616 |
617 |
618 | # 50.17% 10%
619 | percentValues_ = re.compile('^(\d{1,2}(?:\.\d+)?|100(?:\.0+)?)% (\d{1,2}(?:\.\d+)?|100(?:\.0+)?)%$')
620 |
621 | # 0.6% 90%
622 | percentValue_ = re.compile('^(\d{1,2}(?:\.\d+)?|100)%$')
623 |
624 | # 100px, 8em, 0.80c
625 | unitValues_ = re.compile('^(\d+px|\d+em|\d*\.?\d+c)$')
626 |
627 | # 100px
628 | pixelValues_ = re.compile('^(\d+)px (\d+)px$')
629 |
630 | # 00:00:40:07 (7 frames) or 00:00:40:07.1 (7 frames, 1 subframe)
631 | timeColonFormatFrames_ = re.compile('^(\d{2,}):(\d{2}):(\d{2}):(\d{2})\.?(\d+)?$')
632 |
633 | # 00:00:40 or 00:40
634 | timeColonFormat_ = re.compile('^(?:(\d{2,}):)?(\d{2}):(\d{2})$')
635 |
636 | # 01:02:43.0345555 or 02:43.03
637 | timeColonFormatMilliseconds_ = re.compile('^(?:(\d{2,}):)?(\d{2}):(\d{2}\.\d{2,})$')
638 |
639 | # 75f or 75.5f
640 | timeFramesFormat_ = re.compile('^(\d*(?:\.\d*)?)f$')
641 |
642 | # 50t or 50.5t
643 | timeTickFormat_ = re.compile('^(\d*(?:\.\d*)?)t$')
644 |
645 | # 3.45h, 3m or 4.20s
646 | timeHMSFormat_ = re.compile('^(?:(\d*(?:\.\d*)?)h)?(?:(\d*(?:\.\d*)?)m)?(?:(\d*(?:\.\d*)?)s)?(?:(\d*(?:\.\d*)?)ms)?$')
647 |
648 |
649 | class textAlignToLineAlign_(Enum):
650 | left = lineAlign.START
651 | center = lineAlign.CENTER
652 | right = lineAlign.END
653 | start = lineAlign.START
654 | end = lineAlign.END
655 |
656 |
657 | class textAlignToPositionAlign_(Enum):
658 | left = positionAlign.LEFT
659 | center = positionAlign.CENTER
660 | right = positionAlign.RIGHT
661 |
662 |
663 | parameterNs_ = 'http://www.w3.org/ns/ttml#parameter'
664 | styleNs_ = 'http://www.w3.org/ns/ttml#styling'
665 | styleEbuttsNs_ = 'urn:ebu:tt:style'
666 | smpteNsList_ = [
667 | 'http://www.smpte-ra.org/schemas/2052-1/2010/smpte-tt',
668 | 'http://www.smpte-ra.org/schemas/2052-1/2013/smpte-tt',
669 | ]
670 |
--------------------------------------------------------------------------------