├── .github
└── workflows
│ └── ci.yml
├── .gitignore
├── Dockerfile
├── Dockerfile.cuda
├── LICENSE
├── README.md
├── autocut
├── __init__.py
├── __main__.py
├── cut.py
├── daemon.py
├── main.py
├── transcribe.py
└── utils.py
├── imgs
└── typora.jpg
├── setup.cfg
├── setup.py
└── test
├── config.py
├── content
├── test.srt
├── test_md.md
└── test_srt.srt
├── media
├── test001.mp4
├── test001_en.mp4
├── test002.mov
├── test003.mkv
├── test004.flv
└── test005.mp3
├── test_cut.py
└── test_transcribe.py
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: Test
2 |
3 | on:
4 | pull_request:
5 | push:
6 | branches:
7 | - main
8 |
9 | jobs:
10 | lint_and_test:
11 | runs-on: ${{ matrix.os }}-latest
12 | strategy:
13 | matrix:
14 | python-version: ['3.9', '3.10']
15 | # macos did not support m1 for now
16 | os: [ubuntu, windows, macos]
17 | steps:
18 | - uses: actions/checkout@v3
19 | - name: Set up Python ${{ matrix.python-version }}
20 | uses: actions/setup-python@v4
21 | with:
22 | python-version: ${{ matrix.python-version }}
23 | - name: Set Variables
24 | id: set_variables
25 | shell: bash
26 | run: |
27 | echo "PY=$(python -c 'import hashlib, sys;print(hashlib.sha256(sys.version.encode()+sys.executable.encode()).hexdigest())')" >> $GITHUB_OUTPUT
28 | echo "PIP_CACHE=$(pip cache dir)" >> $GITHUB_OUTPUT
29 | - name: Cache PIP
30 | uses: actions/cache@v3
31 | with:
32 | path: ${{ steps.set_variables.outputs.PIP_CACHE }}
33 | key: ${{ runner.os }}-pip-${{ steps.set_variables.outputs.PY }}
34 |
35 | - name: Setup ffmpeg for differnt platforms
36 | uses: FedericoCarboni/setup-ffmpeg@master
37 |
38 | - name: Install dependencies
39 | run: |
40 | python -m pip install --upgrade pip
41 | pip install .
42 | pip install pytest black
43 | - name: Run Test
44 | run: pytest test/
45 | - name: Run Lint
46 | run: black . --check
47 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 | log/
131 |
132 | # IDE
133 | .vscode/
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.9-slim as base
2 |
3 | RUN mkdir /autocut
4 | COPY ./ /autocut
5 | WORKDIR /autocut
6 |
7 | RUN apt update && \
8 | apt install -y git && \
9 | apt install -y ffmpeg
10 |
11 | RUN pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu && \
12 | pip install .
--------------------------------------------------------------------------------
/Dockerfile.cuda:
--------------------------------------------------------------------------------
1 | FROM pytorch/pytorch:1.13.0-cuda11.6-cudnn8-runtime
2 |
3 | RUN mkdir /autocut
4 | COPY ./ /autocut
5 | WORKDIR /autocut
6 |
7 | RUN apt update && \
8 | apt install -y git && \
9 | apt install -y ffmpeg
10 |
11 | RUN pip install .
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # AutoCut: 通过字幕来剪切视频
2 |
3 | AutoCut 对你的视频自动生成字幕。然后你选择需要保留的句子,AutoCut 将对你视频中对应的片段裁切并保存。你无需使用视频编辑软件,只需要编辑文本文件即可完成剪切。
4 |
5 | ## 使用例子
6 |
7 | 假如你录制的视频放在 `2022-11-04/` 这个文件夹里。那么运行
8 |
9 | ```bash
10 | autocut -d 2022-11-04
11 | ```
12 |
13 | > 提示:如果你使用 OBS 录屏,可以在 `设置->高级->录像->文件名格式` 中将空格改成 `/`,即 `%CCYY-%MM-%DD/%hh-%mm-%ss`。那么视频文件将放在日期命名的文件夹里。
14 |
15 | AutoCut 将持续对这个文件夹里视频进行字幕抽取和剪切。例如,你刚完成一个视频录制,保存在 `11-28-18.mp4`。AutoCut 将生成 `11-28-18.md`。你在里面选择需要保留的句子后,AutoCut 将剪切出 `11-28-18_cut.mp4`,并生成 `11-28-18_cut.md` 来预览结果。
16 |
17 | 你可以使用任何的 Markdown 编辑器。例如我常用 VS Code 和 Typora。下图是通过 Typora 来对 `11-28-18.md` 编辑。
18 |
19 | 
20 |
21 | 全部完成后在 `autocut.md` 里选择需要拼接的视频后,AutoCut 将输出 `autocut_merged.mp4` 和对应的字幕文件。
22 |
23 | ## 安装
24 |
25 | 首先安装 Python 包
26 |
27 | ```
28 | pip install git+https://github.com/mli/autocut.git
29 | ```
30 |
31 | ## 本地安装测试
32 |
33 |
34 | ```
35 | git clone https://github.com/mli/autocut
36 | cd autocut
37 | pip install .
38 | ```
39 |
40 |
41 | > 上面将安装 [pytorch](https://pytorch.org/)。如果你需要 GPU 运行,且默认安装的版本不匹配的话,你可以先安装 Pytorch。如果安装 Whipser 出现问题,请参考[官方文档](https://github.com/openai/whisper#setup)。
42 |
43 | 另外需要安装 [ffmpeg](https://ffmpeg.org/)
44 |
45 | ```
46 | # on Ubuntu or Debian
47 | sudo apt update && sudo apt install ffmpeg
48 |
49 | # on Arch Linux
50 | sudo pacman -S ffmpeg
51 |
52 | # on MacOS using Homebrew (https://brew.sh/)
53 | brew install ffmpeg
54 |
55 | # on Windows using Scoop (https://scoop.sh/)
56 | scoop install ffmpeg
57 | ```
58 |
59 | ## Docker 安装
60 |
61 | 首先将项目克隆到本地。
62 |
63 | ```bash
64 | git clone https://github.com/mli/autocut.git
65 | ```
66 |
67 | ### 安装 CPU 版本
68 |
69 | 进入项目根目录,然后构建 docker 映像。
70 |
71 | ```bash
72 | docker build -t autocut .
73 | ```
74 |
75 | 运行下面的命令创建 docker 容器,就可以直接使用了。
76 |
77 | ```bash
78 | docker run -it --rm -v E:\autocut:/autocut/video autocut /bin/bash
79 | ```
80 |
81 | 其中 `-v` 是将主机存放视频的文件夹 `E:\autocut` 映射到虚拟机的 `/autocut/video` 目录。`E:\autocut` 是主机存放视频的目录,需修改为自己主机存放视频的目录。
82 |
83 | ### 安装 GPU 版本
84 |
85 | 使用 GPU 加速需要主机有 Nvidia 的显卡并安装好相应驱动。然后在项目根目录,执行下面的命令构建 docker 映像。
86 |
87 | ```bash
88 | docker build -f ./Dockerfile.cuda -t autocut-gpu .
89 | ```
90 |
91 | 使用 GPU 加速时,运行 docker 容器需添加参数 `--gpus all`。
92 |
93 | ```bash
94 | docker run --gpus all -it --rm -v E:\autocut:/autocut/video autocut-gpu
95 | ```
96 |
97 | ## 更多使用选项
98 |
99 | ### 转录某个视频生成 `.srt` 和 `.md` 结果。
100 |
101 | ```bash
102 | autocut -t 22-52-00.mp4
103 | ```
104 |
105 | 1. 如果对转录质量不满意,可以使用更大的模型,例如
106 |
107 | ```bash
108 | autocut -t 22-52-00.mp4 --whisper-model large
109 | ```
110 |
111 | 默认是 `small`。更好的模型是 `medium` 和 `large`,但推荐使用 GPU 获得更好的速度。也可以使用更快的 `tiny` 和 `base`,但转录质量会下降。
112 |
113 |
114 | ### 剪切某个视频
115 |
116 | ```bash
117 | autocut -c 22-52-00.mp4 22-52-00.srt 22-52-00.md
118 | ```
119 |
120 | 1. 默认视频比特率是 `--bitrate 10m`,你可以根据需要调大调小。
121 | 2. 如果不习惯 Markdown 格式文件,你也可以直接在 `srt` 文件里删除不要的句子,在剪切时不传入 `md` 文件名即可。就是 `autocut -c 22-52-00.mp4 22-52-00.srt`
122 | 3. 如果仅有 `srt` 文件,编辑不方便可以使用如下命令生成 `md` 文件,然后编辑 `md` 文件即可,但此时会完全对照 `srt` 生成,不会出现 `no speech` 等提示文本。
123 |
124 | ```bash
125 | autocut -m test.srt test.mp4
126 | autocut -m test.mp4 test.srt # 支持视频和字幕乱序传入
127 | autocut -m test.srt # 也可以只传入字幕文件
128 | ```
129 |
130 |
131 | ### 一些小提示
132 |
133 |
134 | 1. 讲得流利的视频的转录质量会高一些,这因为是 Whisper 训练数据分布的缘故。对一个视频,你可以先粗选一下句子,然后在剪出来的视频上再剪一次。
135 | 2. ~~最终视频生成的字幕通常还需要做一些小编辑。你可以直接编辑`md`文件(比`srt`文件更紧凑,且嵌入了视频)。然后使用 `autocut -s 22-52-00.md 22-52-00.srt` 来生成更新的字幕 `22-52-00_edited.srt`。注意这里会无视句子是不是被选中,而是全部转换成 `srt`。~~
136 | 3. 最终视频生成的字幕通常还需要做一些小编辑。但 `srt` 里面空行太多。你可以使用 `autocut -s 22-52-00.srt` 来生成一个紧凑些的版本 `22-52-00_compact.srt` 方便编辑(这个格式不合法,但编辑器,例如 VS Code,还是会进行语法高亮)。编辑完成后,`autocut -s 22-52-00_compact.srt` 转回正常格式。
137 | 4. 用 Typora 和 VS Code 编辑 Markdown 都很方便。他们都有对应的快捷键 mark 一行或者多行。但 VS Code 视频预览似乎有点问题。
138 | 5. 视频是通过 ffmpeg 导出。在 Apple M1 芯片上它用不了 GPU,导致导出速度不如专业视频软件。
139 |
140 | ### 常见问题
141 |
142 | 1. **输出的是乱码?**
143 |
144 | AutoCut 默认输出编码是 `utf-8`. 确保你的编辑器也使用了 `utf-8` 解码。你可以通过 `--encoding` 指定其他编码格式。但是需要注意生成字幕文件和使用字幕文件剪辑时的编码格式需要一致。例如使用 `gbk`。
145 |
146 | ```bash
147 | autocut -t test.mp4 --encoding=gbk
148 | autocut -c test.mp4 test.srt test.md --encoding=gbk
149 | ```
150 |
151 | 如果使用了其他编码格式(如 `gbk` 等)生成 `md` 文件并用 Typora 打开后,该文件可能会被 Typora 自动转码为其他编码格式,此时再通过生成时指定的编码格式进行剪辑时可能会出现编码不支持等报错。因此可以在使用 Typora 编辑后再通过 VSCode 等修改到你需要的编码格式进行保存后再使用剪辑功能。
152 |
153 | 2. **如何使用 GPU 来转录?**
154 |
155 | 当你有 Nvidia GPU,而且安装了对应版本的 PyTorch 的时候,转录是在 GPU 上进行。你可以通过命令来查看当前是不是支持 GPU。
156 |
157 | ```bash
158 | python -c "import torch; print(torch.cuda.is_available())"
159 | ```
160 |
161 | 否则你可以在安装 AutoCut 前手动安装对应的 GPU 版本 PyTorch。
162 |
163 | 3. **使用 GPU 时报错显存不够。**
164 |
165 | whisper 的大模型需要一定的 GPU 显存。如果你的显存不够,你可以用小一点的模型,例如 `small`。如果你仍然想用大模型,可以通过 `--device` 来强制使用 CPU。例如
166 |
167 | ```bash
168 | autocut -t 11-28-18.mp4 --whisper-model large --device cpu
169 | ```
170 |
171 | 4. **能不能使用 `pip` 安装?**
172 |
173 | 因为 AutoCut 的依赖 whisper 没有在 PyPI 发布包,所以目前只能用 `pip install git+https://github.com/mli/autocut.git` 这种方式发布。有需求的同学可以查看 whisper 模型是不是能直接在 huggingface hub 下载,从而摆脱 whisper 包的依赖。
174 |
175 |
176 | ## 如何参与贡献
177 |
178 | [这里有一些想做的 feature](https://github.com/mli/autocut/issues/22),欢迎贡献。
179 |
180 | ### 代码结构
181 | ```text
182 | autocut
183 | │ .gitignore
184 | │ LICENSE
185 | │ README.md # 一般新增或修改需要让使用者知道就需要对应更新 README.md 内容
186 | │ setup.py
187 | │
188 | └─autocut # 核心代码位于 autocut 文件夹中,新增功能的实现也一般在这里面进行修改或新增
189 | │ cut.py
190 | │ daemon.py
191 | │ main.py
192 | │ transcribe.py
193 | │ utils.py
194 | └─ __init__.py
195 |
196 | ```
197 |
198 | ### 安装依赖
199 | 开始安装这个项目的需要的依赖之前,建议先了解一下 Anaconda 或者 venv 的虚拟环境使用,推荐**使用虚拟环境来搭建该项目的开发环境**。
200 | 具体安装方式为在你搭建搭建的虚拟环境之中按照[上方安装步骤](./README.md#安装)进行安装。
201 |
202 | > 为什么推荐使用虚拟环境开发?
203 | >
204 | > 一方面是保证各种不同的开发环境之间互相不污染。
205 | >
206 | > 更重要的是在于这个项目实际上是一个 Python Package,所以在你安装之后 AutoCut 的代码实际也会变成你的环境依赖。
207 | > **因此在你更新代码之后,你需要让将新代码重新安装到环境中,然后才能调用到新的代码。**
208 |
209 | ### 开发
210 |
211 | 1. 代码风格目前遵循 PEP-8,可以使用相关的自动格式化软件完成。
212 | 2. `utils.py` 主要是全局共用的一些工具方法。
213 | 3. `transcribe.py` 是调用模型生成`srt`和`md`的部分。
214 | 4. `cut.py` 提供根据标记后`md`或`srt`进行视频剪切合并的功能。
215 | 5. `daemon.py` 提供的是监听文件夹生成字幕和剪切视频的功能。
216 | 6. `main.py` 声明命令行参数,根据输入参数调用对应功能。
217 |
218 | 开发过程中请尽量保证修改在正确的地方,以及合理地复用代码,
219 | 同时工具函数请尽可能放在`utils.py`中。
220 | 代码格式目前是遵循 PEP-8,变量命名尽量语义化即可。
221 |
222 | 在开发完成之后,最重要的一点是需要进行**测试**,请保证提交之前对所有**与你修改直接相关的部分**以及**你修改会影响到的部分**都进行了测试,并保证功能的正常。
223 | 目前使用 `GitHub Actions` CI, Lint 使用 black 提交前请运行 `black`。
224 |
225 | ### 提交
226 |
227 | 1. commit 信息用英文描述清楚你做了哪些修改即可,小写字母开头。
228 | 2. 最好可以保证一次的 commit 涉及的修改比较小,可以简短地描述清楚,这样也方便之后有修改时的查找。
229 | 3. PR 的时候 title 简述有哪些修改, contents 可以具体写下修改内容。
230 | 4. run test `pip install pytest` then `pytest test`
231 | 5. run lint `pip install black` then `black .`
232 |
--------------------------------------------------------------------------------
/autocut/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.0.3"
2 |
--------------------------------------------------------------------------------
/autocut/__main__.py:
--------------------------------------------------------------------------------
1 | from .main import main
2 |
3 | if __name__ == "__main__":
4 | main()
5 |
--------------------------------------------------------------------------------
/autocut/cut.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | import re
4 |
5 | import srt
6 | from moviepy import editor
7 |
8 | from . import utils
9 |
10 |
11 | # Merge videos
12 | class Merger:
13 | def __init__(self, args):
14 | self.args = args
15 |
16 | def write_md(self, videos):
17 | md = utils.MD(self.args.inputs[0], self.args.encoding)
18 | num_tasks = len(md.tasks())
19 | # Not overwrite if already marked as down or no new videos
20 | if md.done_editing() or num_tasks == len(videos) + 1:
21 | return
22 |
23 | md.clear()
24 | md.add_done_editing(False)
25 | md.add("\nSelect the files that will be used to generate `autocut_final.mp4`\n")
26 | base = lambda fn: os.path.basename(fn)
27 | for f in videos:
28 | md_fn = utils.change_ext(f, "md")
29 | video_md = utils.MD(md_fn, self.args.encoding)
30 | # select a few words to scribe the video
31 | desc = ""
32 | if len(video_md.tasks()) > 1:
33 | for _, t in video_md.tasks()[1:]:
34 | m = re.findall(r"\] (.*)", t)
35 | if m and "no speech" not in m[0].lower():
36 | desc += m[0] + " "
37 | if len(desc) > 50:
38 | break
39 | md.add_task(
40 | False,
41 | f'[{base(f)}]({base(md_fn)}) {"[Edited]" if video_md.done_editing() else ""} {desc}',
42 | )
43 | md.write()
44 |
45 | def run(self):
46 | md_fn = self.args.inputs[0]
47 | md = utils.MD(md_fn, self.args.encoding)
48 | if not md.done_editing():
49 | return
50 |
51 | videos = []
52 | for m, t in md.tasks():
53 | if not m:
54 | continue
55 | m = re.findall(r"\[(.*)\]", t)
56 | if not m:
57 | continue
58 | fn = os.path.join(os.path.dirname(md_fn), m[0])
59 | logging.info(f"Loading {fn}")
60 | videos.append(editor.VideoFileClip(fn))
61 |
62 | dur = sum([v.duration for v in videos])
63 | logging.info(f"Merging into a video with {dur / 60:.1f} min length")
64 |
65 | merged = editor.concatenate_videoclips(videos)
66 | fn = os.path.splitext(md_fn)[0] + "_merged.mp4"
67 | merged.write_videofile(
68 | fn, audio_codec="aac", bitrate=self.args.bitrate
69 | ) # logger=None,
70 | logging.info(f"Saved merged video to {fn}")
71 |
72 |
73 | # Cut media
74 | class Cutter:
75 | def __init__(self, args):
76 | self.args = args
77 |
78 | def run(self):
79 | fns = {"srt": None, "media": None, "md": None}
80 | for fn in self.args.inputs:
81 | ext = os.path.splitext(fn)[1][1:]
82 | fns[ext if ext in fns else "media"] = fn
83 |
84 | assert fns["media"], "must provide a media filename"
85 | assert fns["srt"], "must provide a srt filename"
86 |
87 | is_video_file = utils.is_video(fns["media"])
88 | outext = "mp4" if is_video_file else "mp3"
89 | output_fn = utils.change_ext(utils.add_cut(fns["media"]), outext)
90 | if utils.check_exists(output_fn, self.args.force):
91 | return
92 |
93 | with open(fns["srt"], encoding=self.args.encoding) as f:
94 | subs = list(srt.parse(f.read()))
95 |
96 | if fns["md"]:
97 | md = utils.MD(fns["md"], self.args.encoding)
98 | if not md.done_editing():
99 | return
100 | index = []
101 | for mark, sent in md.tasks():
102 | if not mark:
103 | continue
104 | m = re.match(r"\[(\d+)", sent.strip())
105 | if m:
106 | index.append(int(m.groups()[0]))
107 | subs = [s for s in subs if s.index in index]
108 | logging.info(f'Cut {fns["media"]} based on {fns["srt"]} and {fns["md"]}')
109 | else:
110 | logging.info(f'Cut {fns["media"]} based on {fns["srt"]}')
111 |
112 | segments = []
113 | # Avoid disordered subtitles
114 | subs.sort(key=lambda x: x.start)
115 | for x in subs:
116 | if len(segments) == 0:
117 | segments.append(
118 | {"start": x.start.total_seconds(), "end": x.end.total_seconds()}
119 | )
120 | else:
121 | if x.start.total_seconds() - segments[-1]["end"] < 0.5:
122 | segments[-1]["end"] = x.end.total_seconds()
123 | else:
124 | segments.append(
125 | {"start": x.start.total_seconds(), "end": x.end.total_seconds()}
126 | )
127 |
128 | if is_video_file:
129 | media = editor.VideoFileClip(fns["media"])
130 | else:
131 | media = editor.AudioFileClip(fns["media"])
132 |
133 | # Add a fade between two clips. Not quite necessary. keep code here for reference
134 | # fade = 0
135 | # segments = _expand_segments(segments, fade, 0, video.duration)
136 | # clips = [video.subclip(
137 | # s['start'], s['end']).crossfadein(fade) for s in segments]
138 | # final_clip = editor.concatenate_videoclips(clips, padding = -fade)
139 |
140 | clips = [media.subclip(s["start"], s["end"]) for s in segments]
141 | if is_video_file:
142 | final_clip: editor.VideoClip = editor.concatenate_videoclips(clips)
143 | logging.info(
144 | f"Reduced duration from {media.duration:.1f} to {final_clip.duration:.1f}"
145 | )
146 |
147 | aud = final_clip.audio.set_fps(44100)
148 | final_clip = final_clip.without_audio().set_audio(aud)
149 | final_clip = final_clip.fx(editor.afx.audio_normalize)
150 |
151 | # an alternative to birate is use crf, e.g. ffmpeg_params=['-crf', '18']
152 | final_clip.write_videofile(
153 | output_fn, audio_codec="aac", bitrate=self.args.bitrate
154 | )
155 | else:
156 | final_clip: editor.AudioClip = editor.concatenate_audioclips(clips)
157 | logging.info(
158 | f"Reduced duration from {media.duration:.1f} to {final_clip.duration:.1f}"
159 | )
160 |
161 | final_clip = final_clip.fx(editor.afx.audio_normalize)
162 | final_clip.write_audiofile(
163 | output_fn, codec="libmp3lame", fps=44100, bitrate=self.args.bitrate
164 | )
165 |
166 | media.close()
167 | logging.info(f"Saved media to {output_fn}")
168 |
--------------------------------------------------------------------------------
/autocut/daemon.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import glob
3 | import logging
4 | import os
5 | import time
6 |
7 | from . import cut, transcribe, utils
8 |
9 |
10 | class Daemon:
11 | def __init__(self, args):
12 | self.args = args
13 | self.sleep = 1
14 |
15 | def run(self):
16 | assert len(self.args.inputs) == 1, "Must provide a single folder"
17 | while True:
18 | self._iter()
19 | time.sleep(self.sleep)
20 | self.sleep = min(60, self.sleep + 1)
21 |
22 | def _iter(self):
23 | folder = self.args.inputs[0]
24 | files = sorted(list(glob.glob(os.path.join(folder, "*"))))
25 | media_files = [f for f in files if utils.is_video(f) or utils.is_audio(f)]
26 | args = copy.deepcopy(self.args)
27 | for f in media_files:
28 | srt_fn = utils.change_ext(f, "srt")
29 | md_fn = utils.change_ext(f, "md")
30 | is_video_file = utils.is_video(f)
31 | if srt_fn not in files or md_fn not in files:
32 | args.inputs = [f]
33 | try:
34 | transcribe.Transcribe(args).run()
35 | self.sleep = 1
36 | break
37 | except RuntimeError as e:
38 | logging.warn(
39 | "Failed, may be due to the video is still on recording"
40 | )
41 | pass
42 | if md_fn in files:
43 | if utils.add_cut(md_fn) in files:
44 | continue
45 | md = utils.MD(md_fn, self.args.encoding)
46 | ext = "mp4" if is_video_file else "mp3"
47 | if not md.done_editing() or os.path.exists(
48 | utils.change_ext(utils.add_cut(f), ext)
49 | ):
50 | continue
51 | args.inputs = [f, md_fn, srt_fn]
52 | cut.Cutter(args).run()
53 | self.sleep = 1
54 |
55 | args.inputs = [os.path.join(folder, "autocut.md")]
56 | merger = cut.Merger(args)
57 | merger.write_md(media_files)
58 | merger.run()
59 |
--------------------------------------------------------------------------------
/autocut/main.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import logging
3 | import os
4 |
5 | from . import utils
6 |
7 |
8 | def main():
9 | parser = argparse.ArgumentParser(
10 | description="Edit videos based on transcribed subtitles",
11 | formatter_class=argparse.RawDescriptionHelpFormatter,
12 | )
13 |
14 | logging.basicConfig(
15 | format="[autocut:%(filename)s:L%(lineno)d] %(levelname)-6s %(message)s"
16 | )
17 | logging.getLogger().setLevel(logging.INFO)
18 |
19 | parser.add_argument("inputs", type=str, nargs="+", help="Inputs filenames/folders")
20 | parser.add_argument(
21 | "-t",
22 | "--transcribe",
23 | help="Transcribe videos/audio into subtitles",
24 | action=argparse.BooleanOptionalAction,
25 | )
26 | parser.add_argument(
27 | "-c",
28 | "--cut",
29 | help="Cut a video based on subtitles",
30 | action=argparse.BooleanOptionalAction,
31 | )
32 | parser.add_argument(
33 | "-d",
34 | "--daemon",
35 | help="Monitor a folder to transcribe and cut",
36 | action=argparse.BooleanOptionalAction,
37 | )
38 | parser.add_argument(
39 | "-s",
40 | help="Convert .srt to a compact format for easier editing",
41 | action=argparse.BooleanOptionalAction,
42 | )
43 | parser.add_argument(
44 | "-m",
45 | "--to-md",
46 | help="Convert .srt to .md for easier editing",
47 | action=argparse.BooleanOptionalAction,
48 | )
49 | parser.add_argument(
50 | "--lang",
51 | type=str,
52 | default="zh",
53 | choices=["zh", "en"],
54 | help="The output language of transcription",
55 | )
56 | parser.add_argument(
57 | "--prompt", type=str, default="", help="initial prompt feed into whisper"
58 | )
59 | parser.add_argument(
60 | "--whisper-model",
61 | type=str,
62 | default="small",
63 | choices=["tiny", "base", "small", "medium", "large", "large-v2"],
64 | help="The whisper model used to transcribe.",
65 | )
66 | parser.add_argument(
67 | "--bitrate",
68 | type=str,
69 | default="10m",
70 | help="The bitrate to export the cutted video, such as 10m, 1m, or 500k",
71 | )
72 | parser.add_argument(
73 | "--vad", help="If or not use VAD", choices=["1", "0", "auto"], default="auto"
74 | )
75 | parser.add_argument(
76 | "--force",
77 | help="Force write even if files exist",
78 | action=argparse.BooleanOptionalAction,
79 | )
80 | parser.add_argument(
81 | "--encoding", type=str, default="utf-8", help="Document encoding format"
82 | )
83 | parser.add_argument(
84 | "--device",
85 | type=str,
86 | default="cpu",
87 | choices=["cpu", "cuda"],
88 | help="Force to CPU or GPU for transcribing. In default automatically use GPU if available.",
89 | )
90 |
91 | args = parser.parse_args()
92 |
93 | if args.transcribe:
94 | from .transcribe import Transcribe
95 |
96 | Transcribe(args).run()
97 | elif args.to_md:
98 | from .utils import trans_srt_to_md
99 |
100 | if len(args.inputs) == 2:
101 | [input_1, input_2] = args.inputs
102 | base, ext = os.path.splitext(input_1)
103 | if ext != ".srt":
104 | input_1, input_2 = input_2, input_1
105 | trans_srt_to_md(args.encoding, args.force, input_1, input_2)
106 | elif len(args.inputs) == 1:
107 | trans_srt_to_md(args.encoding, args.force, args.inputs[0])
108 | else:
109 | logging.warn(
110 | "Wrong number of files, please pass in a .srt file or an additional video file"
111 | )
112 | elif args.cut:
113 | from .cut import Cutter
114 |
115 | Cutter(args).run()
116 | elif args.daemon:
117 | from .daemon import Daemon
118 |
119 | Daemon(args).run()
120 | elif args.s:
121 | utils.compact_rst(args.inputs[0], args.encoding)
122 | else:
123 | logging.warn("No action, use -c, -t or -d")
124 |
125 |
126 | if __name__ == "__main__":
127 | main()
128 |
--------------------------------------------------------------------------------
/autocut/transcribe.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import logging
3 | import os
4 | import time
5 |
6 | import opencc
7 | import srt
8 | import torch
9 | from faster_whisper import WhisperModel, decode_audio as load_audio
10 |
11 | from . import utils
12 |
13 |
14 | class Transcribe:
15 | def __init__(self, args):
16 | self.args = args
17 | self.sampling_rate = 16000
18 | self.whisper_model = None
19 | self.vad_model = None
20 | self.detect_speech = None
21 |
22 | def run(self):
23 | for input in self.args.inputs:
24 | logging.info(f"Transcribing {input}")
25 | name, _ = os.path.splitext(input)
26 | if utils.check_exists(name + ".md", self.args.force):
27 | continue
28 |
29 | # audio = whisper.load_audio(input, sr=self.sampling_rate)
30 | audio = load_audio(input, sampling_rate=self.sampling_rate)
31 | if (
32 | self.args.vad == "1"
33 | or self.args.vad == "auto"
34 | and not name.endswith("_cut")
35 | ):
36 | speech_timestamps = self._detect_voice_activity(audio)
37 | else:
38 | speech_timestamps = [{"start": 0, "end": len(audio)}]
39 | transcribe_results = self._transcribe(audio, speech_timestamps)
40 |
41 | output = name + ".srt"
42 | self._save_srt(output, transcribe_results)
43 | logging.info(f"Transcribed {input} to {output}")
44 | self._save_md(name + ".md", output, input)
45 | logging.info(f'Saved texts to {name + ".md"} to mark sentences')
46 |
47 | def _detect_voice_activity(self, audio):
48 | """Detect segments that have voice activities"""
49 | tic = time.time()
50 | if self.vad_model is None or self.detect_speech is None:
51 | # torch load limit https://github.com/pytorch/vision/issues/4156
52 | torch.hub._validate_not_a_forked_repo = lambda a, b, c: True
53 | self.vad_model, funcs = torch.hub.load(
54 | repo_or_dir="snakers4/silero-vad", model="silero_vad", trust_repo=True
55 | )
56 |
57 | self.detect_speech = funcs[0]
58 |
59 | speeches = self.detect_speech(
60 | audio, self.vad_model, sampling_rate=self.sampling_rate
61 | )
62 |
63 | # Remove too short segments
64 | speeches = utils.remove_short_segments(speeches, 1.0 * self.sampling_rate)
65 |
66 | # Expand to avoid to tight cut. You can tune the pad length
67 | speeches = utils.expand_segments(
68 | speeches, 0.2 * self.sampling_rate, 0.0 * self.sampling_rate, audio.shape[0]
69 | )
70 |
71 | # Merge very closed segments
72 | speeches = utils.merge_adjacent_segments(speeches, 0.5 * self.sampling_rate)
73 |
74 | logging.info(f"Done voice activity detection in {time.time() - tic:.1f} sec")
75 | return speeches if len(speeches) > 1 else [{"start": 0, "end": len(audio)}]
76 |
77 | def _transcribe(self, audio, speech_timestamps):
78 | tic = time.time()
79 | if self.whisper_model is None:
80 | self.whisper_model = WhisperModel(
81 | self.args.whisper_model, self.args.device
82 | )
83 |
84 | res = []
85 | for seg in speech_timestamps:
86 | segments, info = self.whisper_model.transcribe(
87 | audio[int(seg["start"]): int(seg["end"])],
88 | task="transcribe",
89 | language=self.args.lang,
90 | initial_prompt=self.args.prompt,
91 | # verbose=False if len(speech_timestamps) == 1 else None,
92 | )
93 | r = {}
94 | r["origin_timestamp"] = seg
95 | r["segments"] = segments
96 | r["info"] = info
97 | res.append(r)
98 | logging.info(f"Done transcription in {time.time() - tic:.1f} sec")
99 | return res
100 |
101 | def _save_srt(self, output, transcribe_results):
102 | subs = []
103 | # whisper sometimes generate traditional chinese, explicitly convert
104 | cc = opencc.OpenCC("t2s")
105 |
106 | def _add_sub(start, end, text):
107 | subs.append(
108 | srt.Subtitle(
109 | index=0,
110 | start=datetime.timedelta(seconds=start),
111 | end=datetime.timedelta(seconds=end),
112 | content=cc.convert(text.strip()),
113 | )
114 | )
115 |
116 | prev_end = 0
117 | for r in transcribe_results:
118 | origin = r["origin_timestamp"]
119 | for seg in r["segments"]:
120 | s = dict(start=seg.start, end=seg.end, text=seg.text)
121 | start = s["start"] + origin["start"] / self.sampling_rate
122 | end = min(
123 | s["end"] + origin["start"] / self.sampling_rate,
124 | origin["end"] / self.sampling_rate,
125 | )
126 | if start > end:
127 | continue
128 | # mark any empty segment that is not very short
129 | if start > prev_end + 1.0:
130 | _add_sub(prev_end, start, "< No Speech >")
131 | _add_sub(start, end, s["text"])
132 | prev_end = end
133 |
134 | with open(output, "wb") as f:
135 | f.write(srt.compose(subs).encode(self.args.encoding, "replace"))
136 |
137 | def _save_md(self, md_fn, srt_fn, video_fn):
138 | with open(srt_fn, encoding=self.args.encoding) as f:
139 | subs = srt.parse(f.read())
140 |
141 | md = utils.MD(md_fn, self.args.encoding)
142 | md.clear()
143 | md.add_done_editing(False)
144 | md.add_video(os.path.basename(video_fn))
145 | md.add(
146 | f"\nTexts generated from [{os.path.basename(srt_fn)}]({os.path.basename(srt_fn)})."
147 | "Mark the sentences to keep for autocut.\n"
148 | "The format is [subtitle_index,duration_in_second] subtitle context.\n\n"
149 | )
150 |
151 | for s in subs:
152 | sec = s.start.seconds
153 | pre = f"[{s.index},{sec // 60:02d}:{sec % 60:02d}]"
154 | md.add_task(False, f"{pre:11} {s.content.strip()}")
155 | md.write()
156 |
--------------------------------------------------------------------------------
/autocut/utils.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | import re
4 |
5 | import srt
6 | import opencc
7 |
8 |
9 | def is_video(filename):
10 | _, ext = os.path.splitext(filename)
11 | return ext in [".mp4", ".mov", ".mkv", ".avi", ".flv", ".f4v", ".webm"]
12 |
13 |
14 | def is_audio(filename):
15 | _, ext = os.path.splitext(filename)
16 | return ext in [".ogg", ".wav", ".mp3", ".flac", ".m4a"]
17 |
18 |
19 | def change_ext(filename, new_ext):
20 | # Change the extension of filename to new_ext
21 | base, _ = os.path.splitext(filename)
22 | if not new_ext.startswith("."):
23 | new_ext = "." + new_ext
24 | return base + new_ext
25 |
26 |
27 | def add_cut(filename):
28 | # Add cut mark to the filename
29 | base, ext = os.path.splitext(filename)
30 | if base.endswith("_cut"):
31 | base = base[:-4] + "_" + base[-4:]
32 | else:
33 | base += "_cut"
34 | return base + ext
35 |
36 |
37 | # a very simple markdown parser
38 | class MD:
39 | def __init__(self, filename, encoding):
40 | self.lines = []
41 | self.EDIT_DONE_MAKR = "<-- Mark if you are done editing."
42 | self.encoding = encoding
43 | self.filename = filename
44 | if filename:
45 | self.load_file()
46 |
47 | def load_file(self):
48 | if os.path.exists(self.filename):
49 | with open(self.filename, encoding=self.encoding) as f:
50 | self.lines = f.readlines()
51 |
52 | def clear(self):
53 | self.lines = []
54 |
55 | def write(self):
56 | with open(self.filename, "wb") as f:
57 | f.write("\n".join(self.lines).encode(self.encoding, "replace"))
58 |
59 | def tasks(self):
60 | # get all tasks with their status
61 | ret = []
62 | for l in self.lines:
63 | mark, task = self._parse_task_status(l)
64 | if mark is not None:
65 | ret.append((mark, task))
66 | return ret
67 |
68 | def done_editing(self):
69 | for m, t in self.tasks():
70 | if m and self.EDIT_DONE_MAKR in t:
71 | return True
72 | return False
73 |
74 | def add(self, line):
75 | self.lines.append(line)
76 |
77 | def add_task(self, mark, contents):
78 | self.add(f'- [{"x" if mark else " "}] {contents.strip()}')
79 |
80 | def add_done_editing(self, mark):
81 | self.add_task(mark, self.EDIT_DONE_MAKR)
82 |
83 | def add_video(self, video_fn):
84 | ext = os.path.splitext(video_fn)[1][1:]
85 | self.add(
86 | f'\n\n'
87 | )
88 |
89 | def _parse_task_status(self, line):
90 | # return (is_marked, rest) or (None, line) if not a task
91 | m = re.match(r"- +\[([ x])\] +(.*)", line)
92 | if not m:
93 | return None, line
94 | return m.groups()[0].lower() == "x", m.groups()[1]
95 |
96 |
97 | def check_exists(output, force):
98 | if os.path.exists(output):
99 | if force:
100 | logging.info(f"{output} exists. Will overwrite it")
101 | else:
102 | logging.info(
103 | f"{output} exists, skipping... Use the --force flag to overwrite"
104 | )
105 | return True
106 | return False
107 |
108 |
109 | def expand_segments(segments, expand_head, expand_tail, total_length):
110 | # Pad head and tail for each time segment
111 | results = []
112 | for i in range(len(segments)):
113 | t = segments[i]
114 | start = max(t["start"] - expand_head, segments[i - 1]["end"] if i > 0 else 0)
115 | end = min(
116 | t["end"] + expand_tail,
117 | segments[i + 1]["start"] if i < len(segments) - 1 else total_length,
118 | )
119 | results.append({"start": start, "end": end})
120 | return results
121 |
122 |
123 | def remove_short_segments(segments, threshold):
124 | # Remove segments whose length < threshold
125 | return [s for s in segments if s["end"] - s["start"] > threshold]
126 |
127 |
128 | def merge_adjacent_segments(segments, threshold):
129 | # Merge two adjacent segments if their distance < threshold
130 | results = []
131 | i = 0
132 | while i < len(segments):
133 | s = segments[i]
134 | for j in range(i + 1, len(segments)):
135 | if segments[j]["start"] < s["end"] + threshold:
136 | s["end"] = segments[j]["end"]
137 | i = j
138 | else:
139 | break
140 | i += 1
141 | results.append(s)
142 | return results
143 |
144 |
145 | def compact_rst(sub_fn, encoding):
146 | cc = opencc.OpenCC("t2s")
147 |
148 | base, ext = os.path.splitext(sub_fn)
149 | COMPACT = "_compact"
150 | if ext != ".srt":
151 | logging.fatal("only .srt file is supported")
152 |
153 | if base.endswith(COMPACT):
154 | # to original rst
155 | with open(sub_fn, encoding=encoding) as f:
156 | lines = f.readlines()
157 | subs = []
158 | for l in lines:
159 | items = l.split(" ")
160 | if len(items) < 4:
161 | continue
162 | subs.append(
163 | srt.Subtitle(
164 | index=0,
165 | start=srt.srt_timestamp_to_timedelta(items[0]),
166 | end=srt.srt_timestamp_to_timedelta(items[2]),
167 | content=" ".join(items[3:]).strip(),
168 | )
169 | )
170 | with open(base[: -len(COMPACT)] + ext, "wb") as f:
171 | f.write(srt.compose(subs).encode(encoding, "replace"))
172 | else:
173 | # to a compact version
174 | with open(sub_fn, encoding=encoding) as f:
175 | subs = srt.parse(f.read())
176 | with open(base + COMPACT + ext, "wb") as f:
177 | for s in subs:
178 | f.write(
179 | f"{srt.timedelta_to_srt_timestamp(s.start)} --> {srt.timedelta_to_srt_timestamp(s.end)} "
180 | f"{cc.convert(s.content.strip())}\n".encode(encoding, "replace")
181 | )
182 |
183 |
184 | def trans_srt_to_md(encoding, force, srt_fn, video_fn=None):
185 | base, ext = os.path.splitext(srt_fn)
186 | if ext != ".srt":
187 | logging.fatal("only .srt file is supported")
188 | md_fn = base + ext.split(".")[0] + ".md"
189 |
190 | check_exists(md_fn, force)
191 |
192 | with open(srt_fn, encoding=encoding) as f:
193 | subs = srt.parse(f.read())
194 |
195 | md = MD(md_fn, encoding)
196 | md.clear()
197 | md.add_done_editing(False)
198 | if video_fn:
199 | if not is_video(video_fn):
200 | logging.fatal(f"{video_fn} may not be a video")
201 | md.add_video(os.path.basename(video_fn))
202 | md.add(
203 | f"\nTexts generated from [{os.path.basename(srt_fn)}]({os.path.basename(srt_fn)})."
204 | "Mark the sentences to keep for autocut.\n"
205 | "The format is [subtitle_index,duration_in_second] subtitle context.\n\n"
206 | )
207 |
208 | for s in subs:
209 | sec = s.start.seconds
210 | pre = f"[{s.index},{sec // 60:02d}:{sec % 60:02d}]"
211 | md.add_task(False, f"{pre:11} {s.content.strip()}")
212 | md.write()
213 |
--------------------------------------------------------------------------------
/imgs/typora.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lewangdev/autocut/e631c342d61004309ca6f7ab30aee5c30d25e354/imgs/typora.jpg
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | name = autocut
3 | version = attr: autocut.__version__
4 | license = Apache Software License
5 | description = Cut video by subtitles
6 | long_description = file: README.md
7 | classifiers =
8 | License :: OSI Approved :: Apache Software License
9 | Operating System :: OS Independent
10 | Programming Language :: Python :: 3
11 |
12 | [options]
13 | packages = find:
14 | include_package_data = True
15 | python_requires = >= 3.9
16 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 | requirements = [
4 | "srt",
5 | "moviepy",
6 | "opencc-python-reimplemented",
7 | "torchaudio",
8 | "parameterized",
9 | "faster-whisper",
10 | "tqdm",
11 | ]
12 |
13 |
14 | setup(
15 | name="autocut",
16 | install_requires=requirements,
17 | packages=find_packages(),
18 | entry_points={
19 | "console_scripts": [
20 | "autocut = autocut.main:main",
21 | ]
22 | },
23 | )
24 |
--------------------------------------------------------------------------------
/test/config.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 |
4 | # 定义一个日志收集器
5 | logger = logging.getLogger()
6 | # 设置收集器的级别,不设定的话,默认收集warning及以上级别的日志
7 | logger.setLevel("DEBUG")
8 | # 设置日志格式
9 | fmt = logging.Formatter("%(filename)s-%(lineno)d-%(asctime)s-%(levelname)s-%(message)s")
10 | # 设置日志处理器-输出到文件,并且设置编码格式
11 | if not os.path.exists("./log"):
12 | os.makedirs("./log")
13 | file_handler = logging.FileHandler("./log/log.txt", encoding="utf-8")
14 | # 设置日志处理器级别
15 | file_handler.setLevel("DEBUG")
16 | # 处理器按指定格式输出日志
17 | file_handler.setFormatter(fmt)
18 | # 输出到控制台
19 | ch = logging.StreamHandler()
20 | # 设置日志处理器级别
21 | ch.setLevel("DEBUG")
22 | # 处理器按指定格式输出日志
23 | ch.setFormatter(fmt)
24 | # 收集器和处理器对接,指定输出渠道
25 | # 日志输出到文件
26 | logger.addHandler(file_handler)
27 | # 日志输出到控制台
28 | logger.addHandler(ch)
29 |
30 | TEST_MEDIA_PATH = "./test/media/"
31 | TEST_CONTENT_PATH = "./test/content/"
32 | TEST_MEDIA_FILE = [
33 | "test001.mp4",
34 | "test002.mov",
35 | "test003.mkv",
36 | "test004.flv",
37 | "test005.mp3",
38 | ]
39 |
40 | TEST_MEDIA_FILE_LANG = ["test001_en.mp4"]
41 | TEST_MEDIA_FILE_SIMPLE = ["test001.mp4", "test005.mp3"]
42 |
43 |
44 | class TestArgs:
45 | def __init__(
46 | self,
47 | encoding="utf-8",
48 | sampling_rate=16000,
49 | bitrate="10m",
50 | lang="zh",
51 | prompt="",
52 | whisper_model="tiny",
53 | device='cpu',
54 | vad=False,
55 | force=False,
56 | ):
57 | self.inputs = []
58 | self.bitrate = bitrate
59 | self.encoding = encoding
60 | self.sampling_rate = sampling_rate
61 | self.lang = lang
62 | self.prompt = prompt
63 | self.whisper_model = whisper_model
64 | self.device = device
65 | self.vad = vad
66 | self.force = force
67 |
--------------------------------------------------------------------------------
/test/content/test.srt:
--------------------------------------------------------------------------------
1 | 1
2 | 00:00:00,000 --> 00:00:05,000
3 | 大家好,我的名字是AutoCut.这是一条用于测试的视频。
4 |
5 | 2
6 | 00:00:05,000 --> 00:00:10,260
7 | Hello, my name is AutoCut. This is a video for testing.
8 |
9 |
--------------------------------------------------------------------------------
/test/content/test_md.md:
--------------------------------------------------------------------------------
1 | - [x] <-- Mark if you are done editing.
2 |
3 |
4 |
5 | Texts generated from [test001.srt](test001.srt).Mark the sentences to keep for autocut.
6 | The format is [subtitle_index,duration_in_second] subtitle context.
7 |
8 | - [ ] [1,00:00] 大家好,我的名字是AutoCut.这是一条用于测试的视频。
9 | - [x] [2,00:05] Hello, my name is AutoCut. This is a video for testing.
10 |
--------------------------------------------------------------------------------
/test/content/test_srt.srt:
--------------------------------------------------------------------------------
1 | 1
2 | 00:00:00,000 --> 00:00:05,000
3 | 大家好,我的名字是AutoCut.这是一条用于测试的视频。
4 |
5 |
--------------------------------------------------------------------------------
/test/media/test001.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lewangdev/autocut/e631c342d61004309ca6f7ab30aee5c30d25e354/test/media/test001.mp4
--------------------------------------------------------------------------------
/test/media/test001_en.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lewangdev/autocut/e631c342d61004309ca6f7ab30aee5c30d25e354/test/media/test001_en.mp4
--------------------------------------------------------------------------------
/test/media/test002.mov:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lewangdev/autocut/e631c342d61004309ca6f7ab30aee5c30d25e354/test/media/test002.mov
--------------------------------------------------------------------------------
/test/media/test003.mkv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lewangdev/autocut/e631c342d61004309ca6f7ab30aee5c30d25e354/test/media/test003.mkv
--------------------------------------------------------------------------------
/test/media/test004.flv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lewangdev/autocut/e631c342d61004309ca6f7ab30aee5c30d25e354/test/media/test004.flv
--------------------------------------------------------------------------------
/test/media/test005.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lewangdev/autocut/e631c342d61004309ca6f7ab30aee5c30d25e354/test/media/test005.mp3
--------------------------------------------------------------------------------
/test/test_cut.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | import unittest
4 |
5 | from parameterized import parameterized, param
6 |
7 | from autocut.cut import Cutter
8 | from config import TestArgs, TEST_MEDIA_PATH, TEST_MEDIA_FILE_SIMPLE, TEST_CONTENT_PATH
9 |
10 |
11 | class TestCut(unittest.TestCase):
12 | @classmethod
13 | def setUpClass(cls):
14 | logging.info("检查测试文件是否正常存在")
15 | scan_file = os.listdir(TEST_MEDIA_PATH)
16 | logging.info(
17 | "应存在文件列表:" + str(TEST_MEDIA_FILE_SIMPLE) + " 扫描到文件列表:" + str(scan_file)
18 | )
19 | for file in TEST_MEDIA_FILE_SIMPLE:
20 | assert file in scan_file
21 |
22 | def tearDown(self):
23 | for file in TEST_MEDIA_FILE_SIMPLE:
24 | namepart = os.path.join(
25 | TEST_MEDIA_PATH, os.path.splitext(file)[0] + "_cut."
26 | )
27 | if os.path.exists(namepart + "mp4"):
28 | os.remove(namepart + "mp4")
29 | if os.path.exists(namepart + "mp3"):
30 | os.remove(namepart + "mp3")
31 |
32 | @parameterized.expand([param(file) for file in TEST_MEDIA_FILE_SIMPLE])
33 | def test_srt_cut(self, file_name):
34 | args = TestArgs()
35 | args.inputs = [
36 | os.path.join(TEST_MEDIA_PATH, file_name),
37 | os.path.join(TEST_CONTENT_PATH, "test_srt.srt"),
38 | ]
39 | cut = Cutter(args)
40 | cut.run()
41 | namepart = os.path.join(
42 | TEST_MEDIA_PATH, os.path.splitext(file_name)[0] + "_cut."
43 | )
44 | self.assertTrue(
45 | os.path.exists(namepart + "mp4") or os.path.exists(namepart + "mp3")
46 | )
47 |
48 | @parameterized.expand([param(file) for file in TEST_MEDIA_FILE_SIMPLE])
49 | def test_md_cut(self, file_name):
50 | args = TestArgs()
51 | args.inputs = [
52 | TEST_MEDIA_PATH + file_name,
53 | os.path.join(TEST_CONTENT_PATH, "test.srt"),
54 | os.path.join(TEST_CONTENT_PATH, "test_md.md"),
55 | ]
56 | cut = Cutter(args)
57 | cut.run()
58 | namepart = os.path.join(
59 | TEST_MEDIA_PATH, os.path.splitext(file_name)[0] + "_cut."
60 | )
61 | self.assertTrue(
62 | os.path.exists(namepart + "mp4") or os.path.exists(namepart + "mp3")
63 | )
64 |
--------------------------------------------------------------------------------
/test/test_transcribe.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | import unittest
4 |
5 | from parameterized import parameterized, param
6 |
7 | from autocut.utils import MD
8 | from config import (
9 | TEST_MEDIA_FILE,
10 | TestArgs,
11 | TEST_MEDIA_FILE_SIMPLE,
12 | TEST_MEDIA_FILE_LANG,
13 | TEST_MEDIA_PATH,
14 | )
15 | from autocut.transcribe import Transcribe
16 |
17 |
18 | class TestTranscribe(unittest.TestCase):
19 | @classmethod
20 | def setUpClass(cls):
21 | logging.info("检查测试文件是否正常存在")
22 | scan_file = os.listdir(TEST_MEDIA_PATH)
23 | logging.info(
24 | "应存在文件列表:"
25 | + str(TEST_MEDIA_FILE)
26 | + str(TEST_MEDIA_FILE_LANG)
27 | + str(TEST_MEDIA_FILE_SIMPLE)
28 | + " 扫描到文件列表:"
29 | + str(scan_file)
30 | )
31 | for file in TEST_MEDIA_FILE:
32 | assert file in scan_file
33 | for file in TEST_MEDIA_FILE_LANG:
34 | assert file in scan_file
35 | for file in TEST_MEDIA_FILE_SIMPLE:
36 | assert file in scan_file
37 |
38 | @classmethod
39 | def tearDownClass(cls):
40 | for file in os.listdir(TEST_MEDIA_PATH):
41 | if file.endswith("md") or file.endswith("srt"):
42 | os.remove(TEST_MEDIA_PATH + file)
43 |
44 | def tearDown(self):
45 | for file in TEST_MEDIA_FILE_SIMPLE:
46 | if os.path.exists(TEST_MEDIA_PATH + file.split(".")[0] + ".md"):
47 | os.remove(TEST_MEDIA_PATH + file.split(".")[0] + ".md")
48 | if os.path.exists(TEST_MEDIA_PATH + file.split(".")[0] + ".srt"):
49 | os.remove(TEST_MEDIA_PATH + file.split(".")[0] + ".srt")
50 |
51 | @parameterized.expand([param(file) for file in TEST_MEDIA_FILE])
52 | def test_default_transcribe(self, file_name):
53 | logging.info("检查默认参数生成字幕")
54 | args = TestArgs()
55 | args.inputs = [TEST_MEDIA_PATH + file_name]
56 | transcribe = Transcribe(args)
57 | transcribe.run()
58 | self.assertTrue(
59 | os.path.exists(TEST_MEDIA_PATH + file_name.split(".")[0] + ".md")
60 | )
61 |
62 | @parameterized.expand([param(file) for file in TEST_MEDIA_FILE])
63 | def test_jump_done_transcribe(self, file_name):
64 | logging.info("检查默认参数跳过生成字幕")
65 | args = TestArgs()
66 | args.inputs = [TEST_MEDIA_PATH + file_name]
67 | transcribe = Transcribe(args)
68 | transcribe.run()
69 | self.assertTrue(
70 | os.path.exists(TEST_MEDIA_PATH + file_name.split(".")[0] + ".md")
71 | )
72 |
73 | @parameterized.expand([param(file) for file in TEST_MEDIA_FILE_LANG])
74 | def test_en_transcribe(self, file_name):
75 | logging.info("检查--lang='en'参数生成字幕")
76 | args = TestArgs()
77 | args.lang = "en"
78 | args.inputs = [TEST_MEDIA_PATH + file_name]
79 | transcribe = Transcribe(args)
80 | transcribe.run()
81 | self.assertTrue(
82 | os.path.exists(TEST_MEDIA_PATH + file_name.split(".")[0] + ".md")
83 | )
84 |
85 | @parameterized.expand([param(file) for file in TEST_MEDIA_FILE_LANG])
86 | def test_force_transcribe(self, file_name):
87 | logging.info("检查--force参数生成字幕")
88 | args = TestArgs()
89 | args.force = True
90 | args.inputs = [TEST_MEDIA_PATH + file_name]
91 | md0_lens = len(
92 | "".join(
93 | MD(
94 | TEST_MEDIA_PATH + file_name.split(".")[0] + ".md", args.encoding
95 | ).lines
96 | )
97 | )
98 | transcribe = Transcribe(args)
99 | transcribe.run()
100 | md1_lens = len(
101 | "".join(
102 | MD(
103 | TEST_MEDIA_PATH + file_name.split(".")[0] + ".md", args.encoding
104 | ).lines
105 | )
106 | )
107 | self.assertLessEqual(md1_lens, md0_lens)
108 |
109 | @parameterized.expand([param(file) for file in TEST_MEDIA_FILE_SIMPLE])
110 | def test_encoding_transcribe(self, file_name):
111 | logging.info("检查--encoding参数生成字幕")
112 | args = TestArgs()
113 | args.encoding = "gbk"
114 | args.inputs = [TEST_MEDIA_PATH + file_name]
115 | transcribe = Transcribe(args)
116 | transcribe.run()
117 | with open(
118 | os.path.join(TEST_MEDIA_PATH + file_name.split(".")[0] + ".md"),
119 | encoding="gbk",
120 | ):
121 | self.assertTrue(True)
122 |
123 | @parameterized.expand([param(file) for file in TEST_MEDIA_FILE_SIMPLE])
124 | def test_vad_transcribe(self, file_name):
125 | logging.info("检查--vad参数生成字幕")
126 | args = TestArgs()
127 | args.force = True
128 | args.vad = True
129 | args.inputs = [TEST_MEDIA_PATH + file_name]
130 | transcribe = Transcribe(args)
131 | transcribe.run()
132 | self.assertTrue(
133 | os.path.exists(TEST_MEDIA_PATH + file_name.split(".")[0] + ".md")
134 | )
135 |
--------------------------------------------------------------------------------