├── .github
└── workflows
│ └── release.yml
├── .gitignore
├── .goreleaser.yaml
├── Dockerfile
├── LICENSE
├── README.md
├── cmd
├── desktop
│ └── main.go
└── server
│ └── main.go
├── config
├── config-example.toml
└── config.go
├── docs
├── README_ar.md
├── README_de.md
├── README_es.md
├── README_fr.md
├── README_jp.md
├── README_kr.md
├── README_pt.md
├── README_rus.md
├── README_vi.md
├── README_zh.md
├── aliyun.md
├── docker.md
├── faq.md
├── get_cookies.md
└── images
│ ├── alignment.png
│ ├── aliyun_accesskey_1.png
│ ├── aliyun_oss_1.png
│ ├── aliyun_oss_2.png
│ ├── aliyun_oss_3.png
│ ├── aliyun_oss_4.png
│ ├── aliyun_oss_5.png
│ ├── aliyun_speech_1.png
│ ├── aliyun_speech_2.png
│ ├── aliyun_speech_3.png
│ ├── aliyun_speech_4.png
│ ├── bailian_1.png
│ ├── export_cookies.png
│ ├── logo.png
│ ├── ui.jpg
│ └── ui_desktop.png
├── go.mod
├── go.sum
├── internal
├── api
│ └── subtitle.go
├── deps
│ └── checker.go
├── desktop
│ ├── components.go
│ ├── desktop.go
│ ├── file.go
│ ├── subtitle.go
│ ├── theme.go
│ └── ui.go
├── dto
│ └── subtitle_task.go
├── handler
│ ├── init.go
│ ├── middleware.go
│ └── subtitle_task.go
├── response
│ └── response.go
├── router
│ └── router.go
├── server
│ └── server.go
├── service
│ ├── audio2subtitle.go
│ ├── audio2subtitle_test.go
│ ├── get_video_info.go
│ ├── init.go
│ ├── link2file.go
│ ├── srt2speech.go
│ ├── srt_embed.go
│ ├── subtitle_service.go
│ └── upload_subtitle.go
├── storage
│ ├── bin.go
│ └── subtitle_task.go
└── types
│ ├── embed_subtitle.go
│ ├── fasterwhisper.go
│ ├── interface.go
│ ├── language.go
│ ├── subtitle_task.go
│ ├── whispercpp.go
│ └── whisperkit.go
├── log
└── zap.go
├── pkg
├── aliyun
│ ├── asr.go
│ ├── base.go
│ ├── chat.go
│ ├── oss.go
│ ├── tts.go
│ └── voice_clone.go
├── fasterwhisper
│ ├── init.go
│ └── transcription.go
├── openai
│ ├── init.go
│ └── openai.go
├── util
│ ├── base.go
│ ├── download.go
│ ├── language.go
│ └── subtitle.go
├── whisper
│ ├── init.go
│ └── whisper.go
├── whispercpp
│ ├── init.go
│ └── transcription.go
└── whisperkit
│ ├── init.go
│ └── transcription.go
└── static
├── background.jpg
├── embed.go
└── index.html
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: release
2 |
3 | on:
4 | push:
5 | tags:
6 | - "v*"
7 | - "v*-*"
8 |
9 | permissions:
10 | contents: write
11 |
12 | jobs:
13 | build-desktop:
14 | runs-on: ubuntu-latest
15 | steps:
16 | - name: Checkout code
17 | uses: actions/checkout@v4
18 |
19 | - name: Set up Go
20 | uses: actions/setup-go@v5
21 | with:
22 | go-version-file: "go.mod"
23 |
24 | - name: Install XGO
25 | run: |
26 | go install src.techknowlogick.com/xgo@latest
27 |
28 | - name: Pull Docker Image
29 | run: |
30 | docker pull ghcr.io/techknowlogick/xgo:latest
31 |
32 | - name: Get Version
33 | id: version
34 | uses: actions/github-script@v7
35 | with:
36 | script: |
37 | const rawTag = '${{ github.ref_name }}';
38 | const version = rawTag.replace(/^v/, ''); // Remove the leading 'v' if present
39 | console.log(`Version: ${version}`);
40 | core.setOutput('version', version);
41 |
42 | - name: Build Binary
43 | run: |
44 | targets=(
45 | # macOS (amd64)
46 | "darwin amd64 _amd64 macOS"
47 | # macOS (arm64)
48 | "darwin arm64 _arm64 macOS"
49 | # Windows (amd64)
50 | "windows amd64 .exe Windows"
51 | # Windows (386)
52 | "windows 386 _i386.exe Windows"
53 | )
54 | mkdir -p build
55 |
56 | # 遍历所有平台
57 | for entry in "${targets[@]}"; do
58 | (
59 | # 拆分字符串
60 | IFS=' ' read -r -a parts <<< "$entry"
61 | os="${parts[0]}"
62 | arch="${parts[1]}"
63 | suffix="${parts[2]}"
64 | display_os="${parts[3]}"
65 | log_prefix="[${os}-${arch}]"
66 | # 构建目标目录
67 | target_dir="dist/${os}_${arch}"
68 | mkdir -p "$target_dir"
69 | # 使用 xgo 构建
70 | echo "${log_prefix} 🚀 Building for $os/$arch..."
71 | xgo \
72 | --targets="$os/$arch" \
73 | --out "krillinai_desktop" \
74 | --dest "$target_dir" \
75 | ./cmd/desktop 2>&1 | sed "s/^/${log_prefix} /"
76 | # 生成最终二进制文件名日志输
77 | binary_name="KrillinAI_${{ steps.version.outputs.version }}_Desktop_${display_os}${suffix}"
78 | # 移动并重命名文件
79 | mv "$target_dir"/krillinai_desktop* "build/$binary_name"
80 | echo "${log_prefix} ✅ Built: build/$binary_name"
81 | ) &
82 | done
83 |
84 | wait
85 | echo "✨ All concurrent tasks completed!"
86 |
87 | - name: Upload artifacts
88 | uses: actions/upload-artifact@v4
89 | with:
90 | path: build/*
91 | retention-days: 1
92 |
93 | goreleaser:
94 | needs: build-desktop
95 | if: always()
96 | runs-on: ubuntu-latest
97 | steps:
98 | - name: Set up QEMU
99 | uses: docker/setup-qemu-action@v3
100 |
101 | - name: Set up Docker Buildx
102 | uses: docker/setup-buildx-action@v3
103 |
104 | - name: Checkout code
105 | uses: actions/checkout@v4
106 | with:
107 | fetch-depth: 20
108 |
109 | - name: Download artifacts
110 | uses: actions/download-artifact@v4
111 | with:
112 | path: build
113 |
114 | - name: Set up Go
115 | uses: actions/setup-go@v5
116 | with:
117 | go-version-file: "go.mod"
118 |
119 | - name: Login to Docker Hub
120 | uses: docker/login-action@v3
121 | with:
122 | username: ${{ secrets.DOCKER_USERNAME }}
123 | password: ${{ secrets.DOCKERHUB_TOKEN }}
124 |
125 | - name: Run GoReleaser
126 | uses: goreleaser/goreleaser-action@v6
127 | with:
128 | distribution: goreleaser
129 | version: latest
130 | args: release --clean
131 | env:
132 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
133 | DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }}
134 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | tasks/
3 | cookies.txt
4 | .vscode/
5 | config/config.toml
6 | bin/
7 | models/
8 | uploads/
9 | app.log
10 | build/
11 | dist/
12 |
13 | # MACOS
14 | .DS_Store
15 | ._*
16 |
--------------------------------------------------------------------------------
/.goreleaser.yaml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | builds:
4 | - env:
5 | - CGO_ENABLED=0
6 | goos:
7 | - darwin
8 | - linux
9 | - windows
10 | main: ./cmd/server/main.go
11 |
12 | # Docker 构建配置
13 | dockers:
14 | - image_templates:
15 | - "{{ tolower .Env.DOCKER_USERNAME }}/{{ tolower .ProjectName }}:{{.Version}}-amd64"
16 | - "{{ tolower .Env.DOCKER_USERNAME }}/{{ tolower .ProjectName }}:latest-amd64"
17 | dockerfile: Dockerfile
18 | use: buildx
19 | build_flag_templates:
20 | - "--platform=linux/amd64"
21 | - "--label=org.opencontainers.image.created={{.Date}}"
22 | - "--label=org.opencontainers.image.title={{.ProjectName}}"
23 | - "--label=org.opencontainers.image.revision={{.FullCommit}}"
24 | - "--label=org.opencontainers.image.version={{.Version}}"
25 | - "--label=org.opencontainers.image.source=https://github.com/{{ .Env.GITHUB_REPOSITORY_OWNER }}/{{.ProjectName}}"
26 | - image_templates:
27 | - "{{ tolower .Env.DOCKER_USERNAME }}/{{ tolower .ProjectName }}:{{.Version}}-arm64"
28 | - "{{ tolower .Env.DOCKER_USERNAME }}/{{ tolower .ProjectName }}:latest-arm64"
29 | dockerfile: Dockerfile
30 | use: buildx
31 | build_flag_templates:
32 | - "--platform=linux/arm64"
33 | - "--label=org.opencontainers.image.created={{.Date}}"
34 | - "--label=org.opencontainers.image.title={{.ProjectName}}"
35 | - "--label=org.opencontainers.image.revision={{.FullCommit}}"
36 | - "--label=org.opencontainers.image.version={{.Version}}"
37 | - "--label=org.opencontainers.image.source=https://github.com/{{ .Env.GITHUB_REPOSITORY_OWNER }}/{{.ProjectName}}"
38 |
39 | docker_manifests:
40 | - name_template: "{{ tolower .Env.DOCKER_USERNAME }}/{{ tolower .ProjectName }}:{{.Version}}"
41 | image_templates:
42 | - "{{ tolower .Env.DOCKER_USERNAME }}/{{ tolower .ProjectName }}:{{.Version}}-amd64"
43 | - "{{ tolower .Env.DOCKER_USERNAME }}/{{ tolower .ProjectName }}:{{.Version}}-arm64"
44 | - name_template: "{{ tolower .Env.DOCKER_USERNAME }}/{{ tolower .ProjectName }}:latest"
45 | image_templates:
46 | - "{{ tolower .Env.DOCKER_USERNAME }}/{{ tolower .ProjectName }}:latest-amd64"
47 | - "{{ tolower .Env.DOCKER_USERNAME }}/{{ tolower .ProjectName }}:latest-arm64"
48 |
49 | archives:
50 | - formats: ["binary"]
51 | name_template: >-
52 | {{ .ProjectName }}_
53 | {{- .Version }}_
54 | {{- if eq .Os "darwin" }}macOS_{{ .Arch }}
55 | {{- else if and (eq .Os "windows") (eq .Arch "amd64") }}{{ title .Os }}
56 | {{- else }}{{ title .Os }}_
57 | {{- if eq .Arch "amd64" }}x86_64
58 | {{- else if eq .Arch "386" }}i386
59 | {{- else }}{{ .Arch }}{{ end }}
60 | {{- if .Arm }}v{{ .Arm }}{{ end }}
61 | {{- end }}
62 |
63 | release:
64 | extra_files:
65 | - glob: "build/*"
66 |
67 | changelog:
68 | sort: asc
69 | filters:
70 | exclude:
71 | - "^docs:"
72 | - "^test:"
73 | - "^chore:"
74 | - "^ci:"
75 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM alpine:latest as builder
2 |
3 | # 安装基础工具并创建目录
4 | WORKDIR /build
5 | RUN apk add --no-cache wget && \
6 | mkdir -p bin && \
7 | ARCH=$(uname -m) && \
8 | case "$ARCH" in \
9 | x86_64) \
10 | URL="https://github.com/yt-dlp/yt-dlp/releases/download/2025.01.15/yt-dlp_linux"; \
11 | ;; \
12 | armv7l) \
13 | URL="https://github.com/yt-dlp/yt-dlp/releases/download/2025.01.15/yt-dlp_linux_armv7l"; \
14 | ;; \
15 | aarch64) \
16 | URL="https://github.com/yt-dlp/yt-dlp/releases/download/2025.01.15/yt-dlp_linux_aarch64"; \
17 | ;; \
18 | *) \
19 | echo "Unsupported architecture: $ARCH" && exit 1; \
20 | ;; \
21 | esac && \
22 | wget -O bin/yt-dlp "$URL" && \
23 | chmod +x bin/yt-dlp
24 |
25 | # 最终镜像
26 | FROM jrottenberg/ffmpeg:6.1-alpine
27 |
28 | # 设置工作目录并复制文件
29 | WORKDIR /app
30 | COPY --from=builder /build/bin /app/bin
31 | COPY KrillinAI ./
32 |
33 | # 创建必要目录并设置权限
34 | RUN mkdir -p /app/models && \
35 | chmod +x ./KrillinAI
36 |
37 | # 声明卷
38 | VOLUME ["/app/bin", "/app/models"]
39 |
40 | # 设置环境变量
41 | ENV PATH="/app/bin:${PATH}"
42 |
43 | # 设置端口
44 | EXPOSE 8888/tcp
45 |
46 | # 设置入口点
47 | ENTRYPOINT ["./KrillinAI"]
--------------------------------------------------------------------------------
/cmd/desktop/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "go.uber.org/zap"
5 | "krillin-ai/config"
6 | "krillin-ai/internal/desktop"
7 | "krillin-ai/internal/server"
8 | "krillin-ai/log"
9 | "os"
10 | )
11 |
12 | func main() {
13 | log.InitLogger()
14 | defer log.GetLogger().Sync()
15 |
16 | config.LoadConfig()
17 | if config.Conf.App.TranscribeProvider == "" || config.Conf.App.LlmProvider == "" {
18 | // 确保有最基础的配置
19 | config.Conf.App.TranscribeProvider = "openai"
20 | config.Conf.App.LlmProvider = "openai"
21 | err := config.SaveConfig()
22 | if err != nil {
23 | log.GetLogger().Error("保存配置失败", zap.Error(err))
24 | os.Exit(1)
25 | }
26 | }
27 | config.LoadConfig()
28 | go func() {
29 | if err := server.StartBackend(); err != nil {
30 | log.GetLogger().Error("后端服务启动失败", zap.Error(err))
31 | os.Exit(1)
32 | }
33 | }()
34 | desktop.Show()
35 | }
36 |
--------------------------------------------------------------------------------
/cmd/server/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "go.uber.org/zap"
5 | "krillin-ai/config"
6 | "krillin-ai/internal/deps"
7 | "krillin-ai/internal/server"
8 | "krillin-ai/log"
9 | "os"
10 | )
11 |
12 | func main() {
13 | log.InitLogger()
14 | defer log.GetLogger().Sync()
15 |
16 | var err error
17 | config.LoadConfig()
18 |
19 | if err = config.CheckConfig(); err != nil {
20 | log.GetLogger().Error("加载配置失败", zap.Error(err))
21 | return
22 | }
23 |
24 | if err = deps.CheckDependency(); err != nil {
25 | log.GetLogger().Error("依赖环境准备失败", zap.Error(err))
26 | return
27 | }
28 | if err = server.StartBackend(); err != nil {
29 | log.GetLogger().Error("后端服务启动失败", zap.Error(err))
30 | os.Exit(1)
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/config/config-example.toml:
--------------------------------------------------------------------------------
1 | [app]
2 | segment_duration = 5 # 音频切分处理间隔,单位:分钟,建议值:5-10,如果视频中话语较少可以适当提高
3 | translate_parallel_num = 5 # 并发进行模型转录和翻译的数量上限,建议值:5,如果使用了本地模型,该项自动不生效
4 | proxy = "" # 网络代理地址,格式如http://127.0.0.1:7890,可不填
5 | transcribe_provider = "openai" # 语音识别,当前可选值:openai,fasterwhisper,whisperkit,whisper.cpp,aliyun。(fasterwhisper不支持macOS,whisperkit只支持M芯片)
6 | llm_provider = "openai" # LLM,当前可选值:openai,aliyun
7 |
8 | [server]
9 | host = "127.0.0.1"
10 | port = 8888
11 |
12 | # 下方的配置非必填,请结合上方的选项和文档说明进行配置
13 | [local_model]
14 | fasterwhisper = "large-v2" # fasterwhisper的本地模型可选值:tiny,medium,large-v2,建议medium及以上
15 | whisperkit = "large-v2" # whisperkit的本地模型可选值:large-v2
16 | whispercpp = "large-v2" # whisper.cpp的本地模型
17 |
18 | [openai]
19 | base_url = "" # OpenAI API 自定义base url,可配合转发站密钥使用,留空为默认API地址
20 | model = "" # 指定模型名,可通过此字段结合base_url使用外部任何与OpenAI API兼容的大模型服务,留空默认为gpt-4o-mini
21 | api_key = "sk-XXX" # OpenAI API密钥
22 | [openai.whisper] # 由于使用whisperAPI进行语音识别时,上方可能配置使用了OpenAI格式兼容的其它厂商的模型,所以此处需要独立填入openai的配置信息
23 | base_url = ""
24 | api_key = ""
25 |
26 | [aliyun] # 具体请参考文档中的“阿里云配置说明”
27 | [aliyun.oss]
28 | access_key_id = ""
29 | access_key_secret = ""
30 | bucket = ""
31 | [aliyun.speech]
32 | access_key_id = ""
33 | access_key_secret = ""
34 | app_key= ""
35 | [aliyun.bailian]
36 | api_key = ""
--------------------------------------------------------------------------------
/config/config.go:
--------------------------------------------------------------------------------
1 | package config
2 |
3 | import (
4 | "errors"
5 | "fmt"
6 | "github.com/BurntSushi/toml"
7 | "go.uber.org/zap"
8 | "krillin-ai/log"
9 | "net/url"
10 | "os"
11 | "path/filepath"
12 | "runtime"
13 | )
14 |
15 | type App struct {
16 | SegmentDuration int `toml:"segment_duration"`
17 | TranslateParallelNum int `toml:"translate_parallel_num"`
18 | Proxy string `toml:"proxy"`
19 | ParsedProxy *url.URL `toml:"-"`
20 | TranscribeProvider string `toml:"transcribe_provider"`
21 | LlmProvider string `toml:"llm_provider"`
22 | }
23 |
24 | type Server struct {
25 | Host string `toml:"host"`
26 | Port int `toml:"port"`
27 | }
28 |
29 | type LocalModel struct {
30 | Fasterwhisper string `toml:"fasterwhisper"`
31 | Whisperkit string `toml:"whisperkit"`
32 | Whispercpp string `toml:"whispercpp"`
33 | }
34 |
35 | type OpenAiWhisper struct {
36 | BaseUrl string `toml:"base_url"`
37 | ApiKey string `toml:"api_key"`
38 | }
39 |
40 | type Openai struct {
41 | BaseUrl string `toml:"base_url"`
42 | Model string `toml:"model"`
43 | ApiKey string `toml:"api_key"`
44 | Whisper OpenAiWhisper `toml:"whisper"`
45 | }
46 |
47 | type AliyunOss struct {
48 | AccessKeyId string `toml:"access_key_id"`
49 | AccessKeySecret string `toml:"access_key_secret"`
50 | Bucket string `toml:"bucket"`
51 | }
52 |
53 | type AliyunSpeech struct {
54 | AccessKeyId string `toml:"access_key_id"`
55 | AccessKeySecret string `toml:"access_key_secret"`
56 | AppKey string `toml:"app_key"`
57 | }
58 |
59 | type AliyunBailian struct {
60 | ApiKey string `toml:"api_key"`
61 | }
62 |
63 | type Aliyun struct {
64 | Oss AliyunOss `toml:"oss"`
65 | Speech AliyunSpeech `toml:"speech"`
66 | Bailian AliyunBailian `toml:"bailian"`
67 | }
68 |
69 | type Config struct {
70 | App App `toml:"app"`
71 | Server Server `toml:"server"`
72 | LocalModel LocalModel `toml:"local_model"`
73 | Openai Openai `toml:"openai"`
74 | Aliyun Aliyun `toml:"aliyun"`
75 | }
76 |
77 | var Conf = Config{
78 | App: App{
79 | SegmentDuration: 5,
80 | TranslateParallelNum: 5,
81 | TranscribeProvider: "openai",
82 | LlmProvider: "openai",
83 | },
84 | Server: Server{
85 | Host: "127.0.0.1",
86 | Port: 8888,
87 | },
88 | LocalModel: LocalModel{
89 | Fasterwhisper: "large-v2",
90 | Whisperkit: "large-v2",
91 | Whispercpp: "large-v2",
92 | },
93 | }
94 |
95 | // 检查必要的配置是否完整
96 | func validateConfig() error {
97 | // 检查转写服务提供商配置
98 | switch Conf.App.TranscribeProvider {
99 | case "openai":
100 | if Conf.Openai.Whisper.ApiKey == "" {
101 | return errors.New("使用OpenAI转写服务需要配置 OpenAI API Key")
102 | }
103 | case "fasterwhisper":
104 | if Conf.LocalModel.Fasterwhisper != "tiny" && Conf.LocalModel.Fasterwhisper != "medium" && Conf.LocalModel.Fasterwhisper != "large-v2" {
105 | return errors.New("检测到开启了fasterwhisper,但模型选型配置不正确,请检查配置")
106 | }
107 | case "whisperkit":
108 | Conf.App.TranslateParallelNum = 1
109 | if runtime.GOOS != "darwin" {
110 | log.GetLogger().Error("whisperkit只支持macos", zap.String("当前系统", runtime.GOOS))
111 | return fmt.Errorf("whisperkit只支持macos")
112 | }
113 | if Conf.LocalModel.Whisperkit != "large-v2" {
114 | return errors.New("检测到开启了whisperkit,但模型选型配置不正确,请检查配置")
115 | }
116 | case "whispercpp":
117 | if runtime.GOOS != "windows" { // 当前先仅支持win,模型仅支持large-v2,最小化产品
118 | log.GetLogger().Error("whispercpp only support windows", zap.String("current os", runtime.GOOS))
119 | return fmt.Errorf("whispercpp only support windows")
120 | }
121 | if Conf.LocalModel.Whispercpp != "large-v2" {
122 | return errors.New("检测到开启了whisper.cpp,但模型选型配置不正确,请检查配置")
123 | }
124 | case "aliyun":
125 | if Conf.Aliyun.Speech.AccessKeyId == "" || Conf.Aliyun.Speech.AccessKeySecret == "" || Conf.Aliyun.Speech.AppKey == "" {
126 | return errors.New("使用阿里云语音服务需要配置相关密钥")
127 | }
128 | default:
129 | return errors.New("不支持的转录提供商")
130 | }
131 |
132 | // 检查LLM提供商配置
133 | switch Conf.App.LlmProvider {
134 | case "openai":
135 | if Conf.Openai.ApiKey == "" {
136 | return errors.New("使用OpenAI LLM服务需要配置 OpenAI API Key")
137 | }
138 | case "aliyun":
139 | if Conf.Aliyun.Bailian.ApiKey == "" {
140 | return errors.New("使用阿里云百炼服务需要配置 API Key")
141 | }
142 | default:
143 | return errors.New("不支持的LLM提供商")
144 | }
145 |
146 | return nil
147 | }
148 |
149 | func LoadConfig() {
150 | var err error
151 | configPath := "./config/config.toml"
152 | if _, err = os.Stat(configPath); os.IsNotExist(err) {
153 | return
154 | } else {
155 | log.GetLogger().Info("已找到配置文件,从配置文件中加载配置")
156 | if _, err = toml.DecodeFile(configPath, &Conf); err != nil {
157 | log.GetLogger().Error("加载配置文件失败", zap.Error(err))
158 | return
159 | }
160 | }
161 | }
162 |
163 | // 验证配置
164 | func CheckConfig() error {
165 | var err error
166 | // 解析代理地址
167 | Conf.App.ParsedProxy, err = url.Parse(Conf.App.Proxy)
168 | if err != nil {
169 | return err
170 | }
171 | return validateConfig()
172 | }
173 |
174 | // SaveConfig 保存配置到文件
175 | func SaveConfig() error {
176 | configPath := filepath.Join("config", "config.toml")
177 |
178 | if _, err := os.Stat(configPath); os.IsNotExist(err) {
179 | err = os.MkdirAll(filepath.Dir(configPath), os.ModePerm)
180 | if err != nil {
181 | return err
182 | }
183 | }
184 |
185 | data, err := toml.Marshal(Conf)
186 | if err != nil {
187 | return err
188 | }
189 |
190 | err = os.WriteFile(configPath, data, 0644)
191 | if err != nil {
192 | return err
193 | }
194 |
195 | return nil
196 | }
197 |
--------------------------------------------------------------------------------
/docs/README_ar.md:
--------------------------------------------------------------------------------
1 |
2 |

3 |
4 |
5 | # أداة ترجمة ودبلجة الصوت والفيديو بالذكاء الاصطناعي
6 |
7 |

8 |
9 | **[English](../README.md)|[简体中文](../docs/README_zh.md)|[日本語](../docs/README_jp.md)|[한국어](../docs/README_kr.md)|[Tiếng Việt](../docs/README_vi.md)|[Français](../docs/README_fr.md)|[Deutsch](../docs/README_de.md)|[Español](../docs/README_es.md)|[Português](../docs/README_pt.md)|[Русский](../docs/README_rus.md)|[اللغة العربية](../docs/README_ar.md)**
10 |
11 | [](https://x.com/KrillinAI)
12 | [](https://discord.gg/sKUAsHfy)
13 | [](https://space.bilibili.com/242124650)
14 |
15 |
16 |
17 | ### إصدار جديد لنظامي ويندوز وماك - مرحبًا باختباره وتقديم الملاحظات
18 |
19 | ## نظرة عامة
20 |
21 | كريلين AI هو حل متكامل لتحسين وتوطين الفيديوهات بسهولة. هذه الأداة البسيطة لكن القوية تتعامل مع كل شيء من الترجمة والدبلجة إلى استنساخ الأصوات، وإعادة التنسيق - حيث تقوم بتحويل الفيديوهات بسلاسة بين الوضع الأفقي والعمودي لعرض مثالي على جميع منصات المحتوى (يوتيوب، تيك توك، بيلبلي، دويين، قناة وي تشات، ريد نوت، كوايشو). من خلال سير العمل الشامل، يحوّل كريلين AI اللقطات الخام إلى محتوى نهائي وجاهز للنشر ببضع نقرات فقط.
22 |
23 | الميزات الرئيسية:
24 | 🎯 بدء بنقرة واحدة - ابدأ سير العمل فورًا، النسخة الجديدة لسطح المكتب متاحة الآن - أسهل في الاستخدام!
25 |
26 | 📥 تنزيل الفيديو - يدعم yt-dlp ورفع الملفات المحلية
27 |
28 | 📜 ترجمات دقيقة - تعتمد على Whisper للتعرف عالي الدقة
29 |
30 | 🧠 تقسيم ذكي - تجزئة المحاذاة التلقائية للترجمات بناءً على نماذج اللغات الكبيرة (LLM)
31 |
32 | 🌍 ترجمة احترافية - ترجمة على مستوى الفقرات للحفاظ على الاتساق
33 |
34 | 🔄 استبدال المصطلحات - تبديل المفردات المتخصصة بنقرة واحدة
35 |
36 | 🎙️ الدبلجة واستنساخ الأصوات - اختيار أصوات CosyVoice أو استنساخ الأصوات
37 |
38 | 🎬 تكوين الفيديو - إعادة التنسيق التلقائي للوضع الأفقي/العمودي
39 |
40 | ## عرض توضيحي
41 | الصورة التالية توضح النتيجة بعد إدراج ملف الترجمة - الذي تم إنشاؤه بنقرة واحدة بعد استيراد فيديو محلي مدته 46 دقيقة - في المسار. لم يتم إجراء أي تعديل يدوي على الإطلاق. لا توجد ترجمات ناقصة أو متداخلة، وتقسيم الجمل طبيعي، وجودة الترجمة عالية جدًا.
42 | 
43 |
44 |
45 |
46 |
47 |
48 | ### ترجمة الترجمة النصية
49 | ---
50 | https://github.com/user-attachments/assets/bba1ac0a-fe6b-4947-b58d-ba99306d0339
51 |
52 | |
53 |
54 |
55 | ### الدبلجة
56 | ---
57 | https://github.com/user-attachments/assets/0b32fad3-c3ad-4b6a-abf0-0865f0dd2385
58 |
59 | |
60 |
61 |
62 |
63 | ### الوضع العمودي
64 | ---
65 | https://github.com/user-attachments/assets/c2c7b528-0ef8-4ba9-b8ac-f9f92f6d4e71
66 |
67 | |
68 |
69 |
70 |
71 |
72 | ## 🔍 دعم التعرف على الصوت
73 | _**جميع النماذج المحلية في الجدول أدناه تدعم التثبيت التلقائي للملفات التنفيذية + ملفات النماذج. فقط قم باختيارك، وسيتولى KrillinAI كل شيء آخر لك.**_
74 |
75 | | Service | Supported Platforms | Model Options | Local/Cloud | Notes |
76 | |-----------------|------------------------------|-----------------------------------|-------------|----------------|
77 | | **OpenAI Whisper** | Cross-platform | - | Cloud | Fast with excellent results |
78 | | **FasterWhisper** | Windows/Linux | `tiny`/`medium`/`large-v2` (recommend medium+) | Local | Faster speed, no cloud service overhead |
79 | | **WhisperKit** | macOS (Apple Silicon only) | `large-v2` | Local | Native optimization for Apple chips |
80 | | **Alibaba Cloud ASR** | Cross-platform | - | Cloud | Bypasses China mainland network issues |
81 |
82 | ## 🚀 دعم نماذج اللغة الكبيرة
83 |
84 | ✅ متوافق مع جميع خدمات المتوافقة مع OpenAI API السحابية/المحلية بما في ذلك على سبيل المثال لا الحصر:
85 | - OpenAI
86 | - DeepSeek
87 | - Qwen (Tongyi Qianwen)
88 | - Self-hosted open-source models
89 | - Other OpenAI-format compatible API services
90 |
91 |
92 | ## 🌍 اللغات المدعومة
93 | لغات الإدخال: الصينية، الإنجليزية، اليابانية، الألمانية، التركية (مع إضافة المزيد من اللغات قريبًا)
94 | لغات الترجمة: 101 لغة مدعومة، بما في ذلك الإنجليزية، الصينية، الروسية، الإسبانية، الفرنسية، وغيرها.
95 |
96 | ## معاينة الواجهة
97 | 
98 |
99 | ## 🚀 بدء سريع
100 | ### الخطوات الأساسية
101 |
102 | أولاً، قم بتنزيل ملف الإصدار التنفيذي الذي يتوافق مع نظام جهازك. اتبع التعليمات أدناه للاختيار بين نسخة سطح المكتب أو النسخة العادية، ثم ضع البرنامج في مجلد فارغ. عند تشغيل البرنامج، سيتم إنشاء بعض المجلدات تلقائياً، لذا فإن وضعه في مجلد فارغ يجعل إدارته أسهل.
103 |
104 | [For the desktop version (release files with "desktop" in the name), refer here]
105 | _The desktop version is newly released to address the difficulty beginners face in editing configuration files correctly. It still has some bugs and is being continuously updated._
106 |
107 | انقر نقرًا مزدوجًا على الملف لبدء استخدامه.
108 |
109 | [لنسخة غير سطح المكتب (ملفات الإصدار التي لا تحتوي على "desktop" في الاسم)، ارجع إلى هنا]
110 | _تعتبر نسخة غير سطح المكتب هي الإصدار الأصلي، تتميز بإعدادات أكثر تعقيدًا ولكن بوظائف مستقرة. وهي مناسبة أيضًا للنشر على الخوادم، حيث توفر واجهة مستخدم تعمل عبر الويب._
111 |
112 | قم بإنشاء مجلد `config` في الدليل، ثم أنشئ ملف `config.toml` بداخله. انسخ محتويات ملف `config-example.toml` من مجلد `config` في الكود المصدقي إلى ملف `config.toml` الخاص بك وقم بملء تفاصيل الإعدادات. (إذا كنت ترغب في استخدام نماذج OpenAI ولكنك لا تعرف كيفية الحصول على مفتاح، يمكنك الانضمام إلى المجموعة للحصول على وصول تجريبي مجاني.)
113 |
114 | انقر نقرًا مزدوجًا على الملف التنفيذي أو قم بتشغيله في الطرفية لبدء الخدمة.
115 |
116 | افتح متصفحك وأدخل http://127.0.0.1:8888 لبدء استخدامه. (استبدل 8888 برقم المنفذ الذي حددته في ملف الإعدادات.)
117 |
118 | ### إلى: مستخدمي نظام macOS
119 | [لنسخة سطح المكتب (أي ملفات الإصدار التي تحتوي على "desktop" في الاسم)، ارجع هنا]
120 | طريقة التغليف الحالية لنسخة سطح المكتب لا تدعم التشغيل المباشر بالنقر المزدوج أو التثبيت عبر DMG بسبب مشاكل التوقيع. يتطلب ذلك إعداد الثقة يدوياً كما يلي:
121 |
122 | 1. افتح المجلد الذي يحتوي على الملف التنفيذي (لنفترض أن اسم الملف هو KrillinAI_1.0.0_desktop_macOS_arm64) في Terminal
123 |
124 | 2. نفّذ الأوامر التالية بالتسلسل:
125 |
126 |
127 | ```
128 | sudo xattr -cr ./KrillinAI_1.0.0_desktop_macOS_arm64
129 | sudo chmod +x ./KrillinAI_1.0.0_desktop_macOS_arm64
130 | ./KrillinAI_1.0.0_desktop_macOS_arm64
131 | ```
132 |
133 | [للنسخة العادية (ملفات الإصدار التي لا تحتوي على "desktop" في الاسم)، راجع هنا]
134 | هذا البرنامج غير موقّع، لذا بعد إكمال إعداد الملفات وفق "الخطوات الأساسية"، ستحتاج إلى منح الثقة يدوياً للتطبيق على نظام macOS. اتبع هذه الخطوات:
135 | 1. افتح Terminal وانتقل إلى المجلد الذي يحتوي على الملف التنفيذي (لنفترض أن اسم الملف هو KrillinAI_1.0.0_macOS_arm64).
136 |
137 | 2. نفّذ الأوامر التالية بالتسلسل:
138 |
139 | ```
140 | sudo xattr -rd com.apple.quarantine ./KrillinAI_1.0.0_macOS_arm64
141 | sudo chmod +x ./KrillinAI_1.0.0_macOS_arm64
142 | ./KrillinAI_1.0.0_macOS_arm64
143 | ```
144 | سيؤدي هذا إلى بدء تشغيل الخدمة.
145 |
146 | ### النشر باستخدام Docker
147 |
148 | هذا المشروع يدعم النشر عبر يُرجى الرجوع إلى [Docker Deployment Instructions](../docs/docker.md).
149 |
150 | ### تعليمات إعداد Cookie
151 |
152 | إذا واجهت فشلًا في تنزيل الفيديو، يُرجى الرجوع إلى تعليمات إعداد Cookie لتهيئة معلومات الـ Cookie الخاصة بك.
153 |
154 | (ملاحظة: تم الحفاظ على نفس تنسيق الروابط والعناوين كما في النص الأصلي)
155 |
156 |
157 | ### مساعدة في الإعدادات
158 | أسرع وأكثر طريقة ملائمة للإعداد:
159 | * د openai لكل من transcription_provider و llm_provider. بهذه الطريقة، تحتاج فقط إلى ملء openai.apikey في الفئات الثلاث الرئيسية لبنود الإعداد التالية، وهي openai، local_model، و aliyun، ثم يمكنك إجراء ترجمة الترجمة النصية. (املأ app.proxy، model و openai.base_url حسب حالتك الخاصة.)
160 |
161 | طريقة الإعداد لاستخدام نموذج التعرف على الكلام المحلي (غير مدعوم على macOS في الوقت الحالي) (خيار يأخذ في الاعتبار التكلفة والسرعة والجودة):
162 |
163 | * املأ fasterwhisper لـ transcription_provider و openai لـ llm_provider. بهذه الطريقة، تحتاج فقط إلى ملء openai.apikey و local_model.faster_whisper في الفئتين الرئيسيتين لبنود الإعداد التالية، وهما openai و local_model، ثم يمكنك إجراء ترجمة الترجمة النصية. سيتم تنزيل النموذج المحلي تلقائيًا. (ينطبق نفس الأمر على app.proxy و openai.base_url كما ذكر أعلاه.)
164 |
165 | حالات الاستخدام التي تتطلب إعدادات علي بابا السحابية
166 | * إذا تم تعيين llm_provider إلى aliyun، فهذا يعني أنه سيتم استخدام خدمة النماذج الكبيرة من علي بابا السحابية. وبالتالي، يجب إعداد عنصر aliyun.bailian في الإعدادات.
167 | * إذا تم تعيين transcription_provider إلى aliyun، أو إذا تم تمكين وظيفة "الدبلجة الصوتية" عند بدء المهمة، فسيتم استخدام خدمة الصوت من علي بابا السحابية. لذلك، يجب ملء عنصر aliyun.speech في الإعدادات.
168 | * إذا تم تمكين وظيفة "الدبلجة الصوتية" وتم تحميل ملفات صوتية محلية لاستنساخ نبرة الصوت في نفس الوقت، فسيتم أيضًا استخدام خدمة التخزين السحابي OSS من علي بابا السحابية. وبالتالي، يجب ملء عنصر aliyun.oss في الإعدادات.
169 | دليل الإعدادات: [Alibaba Cloud Configuration Instructions](../docs/aliyun.md)
170 |
171 | ## الأسئلة الشائعة
172 | يُرجى الرجوع إلى [Frequently Asked Questions](../docs/faq.md)
173 |
174 | ## إرشادات المساهمة
175 |
176 | - لا تقم بإرسال ملفات غير ضرورية مثل .vscode، .idea، وغيرها. يُرجى استخدام .gitignore بشكل صحيح لتصفيتها.
177 | - لا تقم بإرسال ملف config.toml؛ بدلاً من ذلك، قم بإرسال ملف config-example.toml.
178 | ## تاريخ النجوم
179 |
180 | [](https://star-history.com/#krillinai/KrillinAI&Date)
181 |
182 |
--------------------------------------------------------------------------------
/docs/README_fr.md:
--------------------------------------------------------------------------------
1 |
2 |

3 |
4 |
5 | # Outil de Traduction et Doublage Audio/Video par IA
6 |
7 |

8 |
9 | **[English](../README.md)|[简体中文](../docs/README_zh.md)|[日本語](../docs/README_jp.md)|[한국어](../docs/README_kr.md)|[Tiếng Việt](../docs/README_vi.md)|[Français](../docs/README_fr.md)|[Deutsch](../docs/README_de.md)|[Español](../docs/README_es.md)|[Português](../docs/README_pt.md)|[Русский](../docs/README_rus.md)|[اللغة العربية](../docs/README_ar.md)**
10 |
11 | [](https://x.com/KrillinAI)
12 | [](https://discord.gg/sKUAsHfy)
13 | [](https://space.bilibili.com/242124650)
14 |
15 |
16 |
17 | ### 📢 Nouvelle Version Bureau pour Windows & Mac – Testez et Donnez Votre Avis
18 |
19 | ## Présentation
20 |
21 | Krillin AI est une solution tout-en-un pour la localisation et l'amélioration simplifiée de vidéos. Cet outil minimaliste mais puissant gère tout : traduction, doublage, clonage vocal, et reformatage – convertissant facilement les vidéos entre formats paysage et portrait pour un affichage optimal sur toutes les plateformes (YouTube, TikTok, Bilibili, Douyin, WeChat Channel, RedNote, Kuaishou). Avec son workflow intégré, Krillin AI transforme des vidéos brutes en contenu professionnel en quelques clics.
22 |
23 | ## Fonctionnalités Clés :
24 |
25 | 🎯 **Lancement Instantané** - Démarrez votre workflow en un clic. Nouvelle version bureau plus intuitive !
26 |
27 | 📥 **Téléchargement Vidéo** - Prise en charge d'yt-dlp et des fichiers locaux
28 |
29 | 📜 **Sous-titres Précis** - Reconnaissance haute précision via Whisper
30 |
31 | 🧠 **Segmentation Intelligente** - Découpage des sous-titres par IA (LLM)
32 |
33 | 🌍 **Traduction Professionnelle** - Traduction cohérente par paragraphes
34 |
35 | 🔄 **Remplacement de Termes** - Échange de vocabulaire spécialisé en un clic
36 |
37 | 🎙️ **Doublage et Clonage Vocal** - Sélection de voix CosyVoice ou clonage
38 |
39 | 🎬 **Composition Vidéo** - Formatage automatique paysage/portrait
40 |
41 | ## Démonstration
42 | L'image ci-dessous montre le résultat après insertion automatique des sous-titres générés pour une vidéo locale de 46 minutes (sans ajustement manuel). Aucun sous-titre manquant ou chevauchant, une segmentation naturelle et une traduction de qualité.
43 | 
44 |
45 |
46 |
47 |
48 |
49 | ### Traduction de Sous-titres
50 | ---
51 | https://github.com/user-attachments/assets/bba1ac0a-fe6b-4947-b58d-ba99306d0339
52 |
53 | |
54 |
55 |
56 | ### Doublage
57 | ---
58 | https://github.com/user-attachments/assets/0b32fad3-c3ad-4b6a-abf0-0865f0dd2385
59 |
60 | |
61 |
62 |
63 |
64 | ### Format Portrait
65 | ---
66 | https://github.com/user-attachments/assets/c2c7b528-0ef8-4ba9-b8ac-f9f92f6d4e71
67 |
68 | |
69 |
70 |
71 |
72 |
73 | ## 🔍 Reconnaissance Vocale
74 | _**Tous les modèles locaux dans le tableau ci-dessous prennent en charge l'installation automatique des fichiers exécutables + fichiers de modèle. Il vous suffit de faire votre sélection, et KrillinAI s'occupera du reste.**_
75 |
76 | | Service | Plateformes supportées | Options de modèle | Local/Cloud | Remarques |
77 | |-----------------|------------------------------|-----------------------------------|-------------|----------------|
78 | | **OpenAI Whisper** | Multi-plateforme | - | Cloud | Rapide avec d'excellents résultats |
79 | | **FasterWhisper** | Windows/Linux | `tiny`/`medium`/`large-v2` (recommandé medium+) | Local | Vitesse accrue, pas de surcharge de service cloud |
80 | | **WhisperKit** | macOS (Apple Silicon uniquement) | `large-v2` | Local | Optimisation native pour puces Apple |
81 | | **Alibaba Cloud ASR** | Multi-plateforme | - | Cloud | Contourne les problèmes réseau en Chine continentale |
82 |
83 | ## 🚀 Prise en charge des Grands Modèles de Langage
84 |
85 | ✅ Compatible avec tous les services cloud/locaux **compatibles avec l'API OpenAI**, y compris mais sans s'y limiter :
86 | - OpenAI
87 | - DeepSeek
88 | - Qwen (Tongyi Qianwen)
89 | - Modèles open source auto-hébergés
90 | - Autres services API compatibles avec le format OpenAI
91 |
92 | ## 🌍 Langues Prises en Charge
93 | Langues d'entrée : Chinois, Anglais, Japonais, Allemand, Turc (autres en cours d'ajout)
94 | Langues de traduction : 101 langues dont Anglais, Chinois, Russe, Espagnol, Français, etc.
95 |
96 | ## Aperçu de l'Interface
97 | 
98 |
99 | ## 🚀 Guide de Démarrage Rapide
100 | ### Étapes de Base
101 | Téléchargez d'abord le fichier exécutable de la version Release correspondant à votre système. Suivez les instructions ci-dessous pour choisir entre la version bureau ou standard, puis placez le logiciel dans un dossier vide. L'exécution du programme générera des répertoires supplémentaires - un dossier vide facilite la gestion.
102 |
103 | [Pour la version bureau (fichiers avec "desktop" dans le nom)]
104 | _La version bureau est une nouveauté conçue pour simplifier la configuration (sans éditer de fichiers). Elle contient encore quelques bugs et est mise à jour régulièrement._
105 |
106 | Double-cliquez sur le fichier pour l'utiliser.
107 |
108 | [Pour la version standard (fichiers sans "desktop" dans le nom), voir ici]
109 | _La version standard est la publication originale, offrant une configuration plus complexe mais une fonctionnalité stable. Elle convient également au déploiement sur serveur grâce à son interface web._
110 |
111 | Créez un dossier `config` dans le répertoire, puis créez un fichier `config.toml` à l'intérieur. Copiez le contenu du fichier `config-example.toml` du dossier `config` du code source dans votre `config.toml` et remplissez les détails de configuration. (Si vous souhaitez utiliser les modèles OpenAI mais ne savez pas comment obtenir une clé, vous pouvez rejoindre le groupe pour un accès d'essai gratuit.)
112 |
113 | Double-cliquez sur l'exécutable ou exécutez-le dans le terminal pour démarrer le service.
114 |
115 | Ouvrez votre navigateur et entrez http://127.0.0.1:8888 pour commencer à l'utiliser. (Remplacez 8888 par le numéro de port que vous avez spécifié dans le fichier config.)
116 |
117 | ### Pour les utilisateurs macOS
118 | [Pour la version bureau (fichiers avec "desktop" dans le nom), voir ici]
119 | La méthode actuelle d'empaquetage ne permet pas d'exécution par double-clic ni d'installation via DMG en raison de problèmes de signature. Une configuration manuelle de confiance est nécessaire :
120 |
121 | 1. Ouvrez dans le Terminal le répertoire contenant le fichier exécutable (nommé par exemple KrillinAI_1.0.0_desktop_macOS_arm64)
122 |
123 | 2. Exécutez les commandes suivantes dans l'ordre :
124 |
125 | ```
126 | sudo xattr -cr ./KrillinAI_1.0.0_desktop_macOS_arm64
127 | sudo chmod +x ./KrillinAI_1.0.0_desktop_macOS_arm64
128 | ./KrillinAI_1.0.0_desktop_macOS_arm64
129 | ```
130 |
131 | [Pour la version standard (fichiers sans "desktop" dans le nom), voir ici]
132 | Ce logiciel n'est pas signé. Après avoir complété la configuration des fichiers comme décrit dans les "Étapes de base", vous devrez approuver manuellement l'application sur macOS. Procédez comme suit :
133 |
134 | 1. Ouvrez le terminal et accédez au répertoire contenant le fichier exécutable (par exemple `KrillinAI_1.0.0_macOS_arm64`)
135 | 2. Exécutez les commandes suivantes dans l'ordre :
136 | ```
137 | sudo xattr -rd com.apple.quarantine ./KrillinAI_1.0.0_macOS_arm64
138 | sudo chmod +x ./KrillinAI_1.0.0_macOS_arm64
139 | ./KrillinAI_1.0.0_macOS_arm64
140 | ```
141 | Cela démarrera le service.
142 |
143 | ### Déploiement Docker
144 | Consultez le [Docker Deployment Instructions](../docs/docker.md).
145 |
146 | ### Configuration des Cookies
147 |
148 | En cas d'échec de téléchargement, suivez le [Cookie Configuration Instructions](../docs/get_cookies.md) .
149 |
150 | ### Aide à la Configuration
151 | La méthode de configuration la plus rapide et pratique :
152 | * Sélectionnez `openai` pour `transcription_provider` et `llm_provider`. Ainsi, vous n'aurez qu'à renseigner `openai.apikey` dans les trois catégories de configuration principales (`openai`, `local_model`, et `aliyun`) pour effectuer la traduction de sous-titres. (Complétez `app.proxy`, `model` et `openai.base_url` selon votre situation.)
153 |
154 | Méthode utilisant le modèle local de reconnaissance vocale (non supporté sur macOS pour le moment) (optimisant coût, vitesse et qualité) :
155 | * Utilisez `fasterwhisper` pour `transcription_provider` et `openai` pour `llm_provider`. Vous devrez alors renseigner `openai.apikey` et `local_model.faster_whisper` dans les catégories `openai` et `local_model`. Le modèle local sera téléchargé automatiquement. (`app.proxy` et `openai.base_url` restent configurables comme mentionné ci-dessus.)
156 |
157 | Cas nécessitant la configuration d'Alibaba Cloud :
158 | * Si `llm_provider` est défini sur `aliyun`, le service de grands modèles d'Alibaba Cloud sera utilisé. Configurez alors `aliyun.bailian`.
159 | * Si `transcription_provider` est sur `aliyun` ou si la fonction "doublage vocal" est activée, le service vocal d'Alibaba Cloud sera utilisé. Configurez `aliyun.speech`.
160 | * Si le "doublage vocal" est activé avec clonage de timbre vocal via fichiers audio locaux, le service OSS d'Alibaba Cloud sera aussi utilisé. Configurez alors `aliyun.oss`.
161 | Guide : [Instructions de configuration Alibaba Cloud](./docs/aliyun.md)
162 |
163 | ## Foire Aux Questions
164 | Consultez la [FAQ](../docs/faq.md) (Foire Aux Questions)
165 |
166 | ## Directives de Contribution
167 |
168 | - Ne soumettez pas de fichiers inutiles comme `.vscode`, `.idea`, etc. Utilisez correctement le fichier `.gitignore` pour les exclure.
169 | - Ne soumettez pas `config.toml` ; soumettez plutôt `config-example.toml`.
170 |
171 | ## Historique des Stars
172 |
173 | [](https://star-history.com/#krillinai/KrillinAI&Date)
174 |
--------------------------------------------------------------------------------
/docs/README_jp.md:
--------------------------------------------------------------------------------
1 |
2 |

3 |
4 | # AI動画翻訳・吹き替えツール(簡単デプロイ)
5 |
6 |

7 |
8 | **[English](../README.md)|[简体中文](../docs/README_zh.md)|[日本語](../docs/README_jp.md)|[한국어](../docs/README_kr.md)|[Tiếng Việt](../docs/README_vi.md)|[Français](../docs/README_fr.md)|[Deutsch](../docs/README_de.md)|[Español](../docs/README_es.md)|[Português](../docs/README_pt.md)|[Русский](../docs/README_rus.md)|[اللغة العربية](../docs/README_ar.md)**
9 |
10 | [](https://x.com/KrillinAI)
11 | [](https://discord.gg/sKUAsHfy)
12 | [](https://space.bilibili.com/242124650)
13 |
14 |
15 |
16 | ## 🚀 プロジェクト概要
17 |
18 | クリリンAIは、動画のローカライズと品質向上を簡単に実現するオールインワンソリューションです。このシンプルでありながら強力なツールは、翻訳、吹き替え、ボイスクローニングからフォーマット調整までをカバー。縦横画面のシームレスな変換により、YouTube、TikTok、Bilibili、抖音(Douyin)、微信チャンネル、RedNote、快手(Kuaishou)など、あらゆるコンテンツプラットフォームに最適化された表示を実現します。エンドツーエンドのワークフローで、わずかなクリックだけで未編集の素材から完成度の高いプラットフォーム対応コンテンツへと仕上げます。
19 |
20 | ## 主な特徴と機能:
21 | 🎯 **ワンクリック起動**:複雑な環境設定不要、依存関係を自動インストール
22 | 📥 **動画取得**:yt-dlpダウンロードまたはローカルファイルアップロード対応
23 | 📜 **高精度認識**:Whisperベースの音声認識
24 | 🧠 **インテリジェント分割**:LLMを使用した字幕分割と調整
25 | 🔄 **用語置換**:専門分野の語彙をワンクリックで置換
26 | 🌍 **プロ翻訳**:LLMベースの段落単位翻訳で文脈一貫性を保持
27 | 🎙️ **音声クローン**:デフォルト音声またはカスタム音声クローニング
28 | 🎬 **動画合成**:縦横画面と字幕レイアウトを自動処理
29 |
30 | ## 効果デモ
31 | 下図は46分のローカル動画をインポートし、ワンクリック実行後に生成された字幕ファイルをトラックに追加した効果です。手動調整なしで、欠落・重複なく、自然な文節区切りと高品質な翻訳を実現。
32 | 
33 |
34 |
35 |
36 |
37 |
38 | ### 字幕翻訳
39 | ---
40 | https://github.com/user-attachments/assets/bba1ac0a-fe6b-4947-b58d-ba99306d0339
41 |
42 | |
43 |
44 |
45 |
46 |
47 | ### 配音
48 | ---
49 | https://github.com/user-attachments/assets/0b32fad3-c3ad-4b6a-abf0-0865f0dd2385
50 |
51 | |
52 |
53 |
54 |
55 | ### 縦画面
56 | ---
57 | https://github.com/user-attachments/assets/c2c7b528-0ef8-4ba9-b8ac-f9f92f6d4e71
58 |
59 | |
60 |
61 |
62 |
63 | ## 🔍 音声認識サポート
64 | _**以下の表に記載されているすべてのローカルモデルは、実行ファイル+モデルファイルの自動インストールに対応しています。選択するだけで、KrillinAIが残りの作業をすべて処理します。**_
65 |
66 | | サービス | 対応プラットフォーム | モデルオプション | ローカル/クラウド | 備考 |
67 | |-----------------|------------------------------|-----------------------------------|-------------|----------------|
68 | | **OpenAI Whisper** | クロスプラットフォーム | - | クラウド | 高速で優れた精度 |
69 | | **FasterWhisper** | Windows/Linux | `tiny`/`medium`/`large-v2` (medium以上推奨) | ローカル | 高速処理、クラウド依存なし |
70 | | **WhisperKit** | macOS (Apple Siliconのみ) | `large-v2` | ローカル | Appleチップ向け最適化 |
71 | | **Alibaba Cloud ASR** | クロスプラットフォーム | - | クラウド | 中国本土のネットワーク制限回避 |
72 |
73 | ## 🚀 大規模言語モデル(LLM)サポート
74 |
75 | ✅ **OpenAI API互換**のクラウド/ローカルLLMサービスすべてに対応(以下に限定されません):
76 | - OpenAI
77 | - DeepSeek
78 | - Qwen (Tongyi Qianwen)
79 | - セルフホスト型オープンソースモデル
80 | - その他OpenAI形式互換APIサービス
81 |
82 | ## 対応言語
83 | 入力言語対応:中国語、英語、日本語、ドイツ語、トルコ語、マレー語(随時追加中)
84 |
85 | 翻訳言語対応:英語、中国語、ロシア語、スペイン語、フランス語など101言語
86 |
87 | ## インターフェースプレビュー
88 | 
89 |
90 |
91 | ## クイックスタート
92 | ### 基本手順
93 | 1. [Release](https://github.com/krillinai/KrillinAI/releases)からお使いのデバイスに合った実行ファイルをダウンロードし、空のフォルダに配置
94 | 2. フォルダ内に`config`フォルダを作成し、`config`フォルダ内に`config.toml`ファイルを作成、ソースコードの`config`ディレクトリにある`config-example.toml`ファイルの内容をコピーして貼り付け、設定情報を記入(OpenAIモデルを使いたいがキーの取得方法がわからない場合はグループに参加して無料で試用可能)
95 | 3. 実行ファイルをダブルクリック、またはターミナルで実行してサービスを起動
96 | 4. ブラウザを開き `http://127.0.0.1:8888`と入力して使用開始
97 |
98 | ### macOSユーザー向け
99 | 本ソフトウェアは署名されていないため、macOSで実行する場合、「基本手順」のファイル設定完了後、手動でアプリを信頼する必要があります。方法は以下の通り:
100 | 1. ターミナルで実行ファイル(ファイル名がKrillinAI_1.0.0_macOS_arm64と仮定)があるディレクトリを開く
101 | 2. 以下のコマンドを順に実行:
102 | ```
103 | sudo xattr -rd com.apple.quarantine ./KrillinAI_1.0.0_macOS_arm64
104 | sudo chmod +x ./KrillinAI_1.0.0_macOS_arm64
105 | ./KrillinAI_1.0.0_macOS_arm64
106 | ```
107 | これでサービスが起動します
108 |
109 | ### Dockerデプロイ
110 | 本プロジェクトはDockerデプロイをサポートしています。[Docker部署说明](./docker.md)を参照してください
111 |
112 | ### Cookie設定説明(オプション)
113 |
114 | 動画ダウンロードに失敗する場合
115 |
116 | [Cookie 配置说明](./get_cookies.md) を参照してCookie情報を設定してください。
117 |
118 | ### 設定ヘルプ(必読)
119 | 最速で簡単な設定方法:
120 | * transcription_providerとllm_providerの両方にopenaiを選択すると、openai、local_model、aliyunの3つの設定項目でopenai.apikeyのみ記入すれば字幕翻訳が可能です。(app.proxy、model、openai.base_urlは状況に応じて記入)
121 |
122 | ローカル音声認識モデルを使用する設定方法(macOS未対応)(コスト、速度、品質を考慮した選択)
123 | * transcription_providerにfasterwhisper、llm_providerにopenaiを記入すると、openai、local_modelの2つの設定項目でopenai.apikeyとlocal_model.faster_whisperを記入するだけで字幕翻訳が可能で、ローカルモデルは自動ダウンロードされます。(app.proxyとopenai.base_urlは同上)
124 |
125 | 以下の使用状況では、Alibaba Cloudの設定が必要です:
126 | * llm_providerにaliyunを記入した場合、Alibaba Cloudの大規模モデルサービスを使用するため、aliyun.bailian項目の設定が必要
127 | * transcription_providerにaliyunを記入した場合、またはタスク起動時に「吹き替え」機能を有効にした場合、Alibaba Cloudの音声サービスを使用するため、aliyun.speech項目の記入が必要
128 | * 「吹き替え」機能を有効にし、ローカルオーディオを音声クローニング用にアップロードした場合、Alibaba CloudのOSSクラウドストレージサービスを使用するため、aliyun.oss項目の記入が必要
129 | Alibaba Cloud設定ヘルプ:[阿里云配置说明](./aliyun.md)
130 |
131 | ## よくある質問
132 |
133 | [よくある質問](./faq.md)をご覧ください
134 |
135 | ## コントリビューション規範
136 | 1. .vscode、.ideaなどの不要なファイルをコミットしないでください。.gitignoreを活用してフィルタリングしてください
137 | 2. config.tomlをコミットせず、代わりにconfig-example.tomlを使用してコミットしてください
138 |
139 | ## お問い合わせ
140 | 1. QQグループに参加して質問にお答えします:754069680
141 | 2. ソーシャルメディアアカウントBilibiliをフォローし、AI技術分野の高品質なコンテンツを毎日シェアしています
142 |
143 | ## Star History
144 |
145 | [](https://star-history.com/#krillinai/KrillinAI&Date)
146 |
--------------------------------------------------------------------------------
/docs/README_kr.md:
--------------------------------------------------------------------------------
1 |
2 |

3 |
4 |
5 | # AI 오디오&비디오 번역 및 더빙 도구
6 |
7 |

8 |
9 | **[English](../README.md)|[简体中文](../docs/README_zh.md)|[日本語](../docs/README_jp.md)|[한국어](../docs/README_kr.md)|[Tiếng Việt](../docs/README_vi.md)|[Français](../docs/README_fr.md)|[Deutsch](../docs/README_de.md)|[Español](../docs/README_es.md)|[Português](../docs/README_pt.md)|[Русский](../docs/README_rus.md)|[اللغة العربية](../docs/README_ar.md)**
10 |
11 | [](https://x.com/KrillinAI)
12 | [](https://discord.gg/sKUAsHfy)
13 | [](https://space.bilibili.com/242124650)
14 |
15 |
16 |
17 | ### 📢 Win & Mac 데스크톱 버전 신규 출시 – 테스트 후 피드백 제공 부탁드립니다
18 |
19 | ## 개요
20 |
21 | 크릴린 AI(Krillin AI)는 번역, 더빙, 음성 복제에서부터 화면 비율 변환까지 모든 과정을 처리하는 올인원 비디오 현지화 및 향상 솔루션입니다. 이 미니멀하면서도 강력한 도구는 유튜브, 틱톡, 빌리빌리, 더우인, 위챗 채널, 레드노트, 쿠아이쇼우 등 모든 콘텐츠 플랫폼에 최적화된 가로/세로 영상 변환을 자동으로 수행합니다. 엔드투엔드 워크플로우로 원본 영상을 클릭 몇 번만에 각 플랫폼에 맞는 완성된 콘텐츠로 변환해 줍니다.
22 |
23 | ## 주요 기능:
24 | 🎯 **원클릭 시작** - 즉시 작업 프로세스 실행
25 |
26 | 📥 **비디오 다운로드** - yt-dlp 지원 및 로컬 파일 업로드 가능
27 |
28 | 📜 **정밀 자막** - Whisper 기반 고정확도 음성 인식
29 |
30 | 🧠 **스마트 분할** - LLM 기반 자막 청크 분할 및 정렬
31 |
32 | 🌍 **전문가 수준 번역** - 문단 단위 자연스러운 번역
33 |
34 | 🔄 **용어 대체** - 분야별 전문 어휘 한 번에 변경
35 |
36 | 🎙️ ** 더빙 및 음성 복제** - CosyVoice 선택 또는 개인 음성 클로닝
37 |
38 | 🎬 **비디오 합성** - 가로/세로 레이아웃 자동 포맷팅
39 |
40 | ## 데모 영상
41 | 46분 분량의 로컬 비디오 파일을 불러온 후 원클릭 작업으로 생성된 자막 파일을 트랙에 삽입한 결과입니다. 전혀 수동 조정 없이도 자막 누락이나 겹침 현상 없이 문장 분할이 자연스럽게 이루어졌으며, 번역 품질 또한 매우 우수합니다.
42 | 
43 |
44 |
45 |
46 |
47 |
48 | ### 자막 번역
49 | ---
50 | https://github.com/user-attachments/assets/bba1ac0a-fe6b-4947-b58d-ba99306d0339
51 |
52 | |
53 |
54 |
55 | ### 더빙
56 | ---
57 | https://github.com/user-attachments/assets/0b32fad3-c3ad-4b6a-abf0-0865f0dd2385
58 |
59 | |
60 |
61 |
62 |
63 | ## 🔍 음성 인식 지원
64 | _**아래 표의 모든 로컬 모델은 실행 파일 + 모델 파일의 자동 설치를 지원합니다. 원하는 모델을 선택하기만 하면 KrillinAI이 나머지 모든 작업을 처리합니다.**_
65 |
66 | | 서비스 | 지원 플랫폼 | 모델 옵션 | 로컬/클라우드 | 참고사항 |
67 | |-----------------|------------------------------|-----------------------------------|-------------|----------------|
68 | | **OpenAI Whisper** | 크로스 플랫폼 | - | 클라우드 | 빠른 속도와 우수한 결과 |
69 | | **FasterWhisper** | Windows/Linux | `tiny`/`medium`/`large-v2` (medium+ 권장) | 로컬 | 더 빠른 속도, 클라우드 서비스 오버헤드 없음 |
70 | | **WhisperKit** | macOS (Apple Silicon 전용) | `large-v2` | 로컬 | Apple 칩에 최적화 |
71 | | **Alibaba Cloud ASR** | 크로스 플랫폼 | - | 클라우드 | 중국 본토 네트워크 문제 회피 |
72 |
73 | ## 🚀 대규모 언어 모델 지원
74 |
75 | ✅ **OpenAI API 호환** 클라우드/로컬 LLM 서비스와 완벽 호환 (다음 포함):
76 | - OpenAI
77 | - DeepSeek
78 | - Qwen (Tongyi Qianwen)
79 | - 자체 호스팅 오픈소스 모델
80 | - 기타 OpenAI 형식 호환 API 서비스
81 |
82 | ## 🌍 언어 지원
83 | 입력 언어: 중국어, 영어, 일본어, 독일어, 터키어, 한국어 지원 (추가 언어 계속 확장 중)
84 | 번역 언어: 영어, 중국어, 러시아어, 스페인어, 프랑스어 등 101개 언어 지원
85 |
86 | ## 인터페이스 미리보기
87 | 
88 |
89 | ## 🚀 빠른 시작
90 | ### 기본 단계
91 | 1. 릴리스에서 사용자 기기 시스템에 맞는 실행 파일을 다운로드 후 빈 폴더에 배치하세요.
92 | 2. 해당 폴더 내부에 config 폴더를 생성하고, config 폴더 안에 config.toml 파일을 만드세요. 소스 코드의 config 디렉토리에 있는 config-example.toml 파일 내용을 복사해 config.toml에 붙여넣은 후 설정 정보를 입력하세요.
93 | 3. 실행 파일을 더블클릭해 서비스를 시작하세요.
94 | 4. 브라우저에서 http://127.0.0.1:8888 주소로 접속하면 사용이 가능합니다(8888은 config.toml에서 설정한 포트 번호로 변경해주세요).
95 |
96 | ### macOS 사용자분들께
97 | 본 소프트웨어는 서명되지 않았으므로, "기본 단계"의 파일 구성 완료 후 macOS에서 수동으로 애플리케이션 신뢰 설정이 필요합니다. 다음 절차를 따라주세요:
98 | 1. 터미널을 열고 실행 파일(예: 파일명이 KrillinAI_1.0.0_macOS_arm64인 경우)이 위치한 디렉토리로 이동합니다.
99 | 2. 다음 명령어들을 순차적으로 실행해주세요:
100 | ```
101 | sudo xattr -rd com.apple.quarantine ./KrillinAI_1.0.0_macOS_arm64
102 | sudo chmod +x ./KrillinAI_1.0.0_macOS_arm64
103 | ./KrillinAI_1.0.0_macOS_arm64
104 | ```
105 | 이렇게 하면 서비스가 시작됩니다.
106 |
107 | ### 도커 배포
108 | 이 프로젝트는 도커 배포를 지원합니다. 자세한 내용은 [Docker Deployment Instructions](./docs/docker.md)를 참고해주세요.
109 |
110 | ### 쿠키 설정 안내
111 |
112 | 비디오 다운로드 실패 시 [Cookie Configuration Instructions](./docs/get_cookies.md) 를 참조하여 쿠키 정보를 설정해주세요.
113 |
114 | ### 설정 가이드
115 | 가장 빠르고 편리한 설정 방법:
116 | * transcription_provider와 llm_provider 모두 openai를 선택하세요. 이 경우 다음 3가지 주요 설정 항목 카테고리(openai, local_model, aliyun) 중 openai.apikey만 입력하면 자막 번역을 수행할 수 있습니다. (app.proxy, model, openai.base_url은 각자의 상황에 맞게 입력하세요.)
117 |
118 | 로컬 음성 인식 모델 사용 설정 방법 (현재 macOS 미지원) (비용, 속도, 품질을 고려한 선택):
119 | * transcription_provider에는 fasterwhisper를, llm_provider에는 openai를 입력하세요. 이 경우 openai와 local_model 카테고리에서 openai.apikey와 local_model.faster_whisper만 입력하면 자막 번역이 가능합니다. 로컬 모델은 자동으로 다운로드됩니다. (위에서 언급한 app.proxy와 openai.base_url도 동일하게 적용됩니다.)
120 |
121 | 다음 사용 상황에서는 알리바바 클라우드 설정이 필요합니다:
122 | * llm_provider에 aliyun을 입력한 경우: 알리바바 클라우드의 대형 모델 서비스를 사용하게 되므로, aliyun.bailian 항목 설정이 필요합니다.
123 | * transcription_provider에 aliyun을 입력하거나 작업 시작 시 "보이스 더빙" 기능을 활성화한 경우: 알리바바 클라우드의 음성 서비스를 사용하게 되므로, aliyun.speech 항목 설정이 필요합니다.
124 | * "보이스 더빙" 기능을 활성화하면서 동시에 로컬 오디오 파일을 업로드해 음색 복제를 하는 경우: 알리바바 클라우드의 OSS 클라우드 스토리지 서비스도 사용하게 되므로, aliyun.oss 항목 설정이 필요합니다.
125 | 설정 가이드: [Alibaba Cloud Configuration Instructions](./docs/aliyun.md)
126 |
127 | ## 자주 묻는 질문
128 | 자세한 내용은 [Frequently Asked Questions](./docs/faq.md)를 참조해주세요.
129 |
130 | ## 기여 가이드라인
131 |
132 | - .vscode, .idea 등 불필요한 파일은 제출하지 마세요. .gitignore 파일을 활용해 필터링해주세요.
133 | - config.toml 대신 config-example.toml 파일을 제출해주세요.
134 |
135 | ## 스타 히스토리
136 |
137 | [](https://star-history.com/#krillinai/KrillinAI&Date)
138 |
--------------------------------------------------------------------------------
/docs/README_rus.md:
--------------------------------------------------------------------------------
1 |
2 |

3 |
4 | # AI инструмент для перевода и озвучки аудио и видео
5 |
6 |

7 |
8 | **[English](../README.md)|[简体中文](../docs/README_zh.md)|[日本語](../docs/README_jp.md)|[한국어](../docs/README_kr.md)|[Tiếng Việt](../docs/README_vi.md)|[Français](../docs/README_fr.md)|[Deutsch](../docs/README_de.md)|[Español](../docs/README_es.md)|[Português](../docs/README_pt.md)|[Русский](../docs/README_rus.md)|[اللغة العربية](../docs/README_ar.md)**
9 |
10 | [](https://x.com/KrillinAI)
11 | [](https://discord.gg/sKUAsHfy)
12 | [](https://space.bilibili.com/242124650)
13 |
14 |
15 |
16 | ## Обзор
17 |
18 | Krillin AI — это универсальное решение для простой локализации и улучшения видео. Этот минималистичный, но мощный инструмент выполняет всё: от перевода и дубляжа до клонирования голоса и адаптации формата — легко преобразует видео между горизонтальным и вертикальным режимами для идеального отображения на любых платформах (YouTube, TikTok, Bilibili, Douyin, WeChat Channel, RedNote, Kuaishou). Благодаря сквозному рабочему процессу Krillin AI превращает исходные материалы в готовый к публикации контент всего за несколько кликов.
19 |
20 | ## Ключевые возможности:
21 | 🎯 **Запуск в один клик** - мгновенное начало работы
22 |
23 | 📥 **Загрузка видео** - поддержка yt-dlp и локальных файлов
24 |
25 | 📜 **Точные субтитры** - распознавание с высокой точностью на основе Whisper
26 |
27 | 🧠 **Умное разделение** - логическая разбивка и выравнивание субтитров с помощью LLM
28 |
29 | 🌍 **Профессиональный перевод** - согласованный перевод на уровне абзацев
30 |
31 | 🔄 **Замена терминов** - смена специализированной лексики в один клик
32 |
33 | 🎙️ **Озвучка и клонирование голоса** - выбор голосов CosyVoice или создание копий
34 |
35 | 🎬 **Видеомонтаж** - автоматическое форматирование для горизонтальных и вертикальных форматов
36 |
37 | ## Пример работы
38 | На изображении ниже показан результат автоматической вставки субтитров в видео после однокликового запуска обработки 46-минутного локального видео. Никаких ручных корректировок не производилось.
39 | 
40 |
41 |
42 |
43 |
44 |
45 | ### Перевод субтитров
46 | ---
47 | https://github.com/user-attachments/assets/bba1ac0a-fe6b-4947-b58d-ba99306d0339
48 |
49 | |
50 |
51 |
52 | ### Озвучка
53 | ---
54 | https://github.com/user-attachments/assets/0b32fad3-c3ad-4b6a-abf0-0865f0dd2385
55 |
56 | |
57 |
58 |
59 |
60 | ### портретный режим
61 | ---
62 | https://github.com/user-attachments/assets/c2c7b528-0ef8-4ba9-b8ac-f9f92f6d4e71
63 |
64 | |
65 |
66 |
67 |
68 | ## 🔍 Поддержка распознавания речи
69 | _**Все локальные модели в таблице ниже поддерживают автоматическую установку исполняемых файлов + файлов моделей. Просто сделайте свой выбор, а KrillinAI сделает всё остальное за вас.**_
70 |
71 | | Сервис | Поддерживаемые платформы | Варианты моделей | Локально/Облако | Примечания |
72 | |-----------------|------------------------------|-----------------------------------|-------------|----------------|
73 | | **OpenAI Whisper** | Кроссплатформенный | - | Облако | Быстрое с отличными результатами |
74 | | **FasterWhisper** | Windows/Linux | `tiny`/`medium`/`large-v2` (recommend medium+) | Локально | Более высокая скорость, без нагрузки на облачный сервис |
75 | | **WhisperKit** | macOS (Apple Silicon only) | `large-v2` | Локально | Нативная оптимизация для чипов Apple |
76 | | **Alibaba Cloud ASR** | Кроссплатформенный | - | Облако | Обходит проблемы сети в материковом Китае |
77 |
78 | ## 🚀 Поддержка больших языковых моделей
79 |
80 | ✅ Совместим со всеми **совместимыми с OpenAI API** облачными/локальными LLM-сервисами, включая, но не ограничиваясь:
81 | - OpenAI
82 | - DeepSeek
83 | - Qwen (Tongyi Qianwen)
84 | - Самостоятельно размещённые open-source модели
85 | - Другие API-сервисы, совместимые с форматом OpenAI
86 |
87 |
88 | ## 🌍 Поддерживаемые языки
89 | Входные языки: китайский, английский, японский, немецкий, турецкий (добавляются новые языки)
90 | Языки перевода: 101 языков, включая английский, китайский, русский, испанский, французский и др.
91 |
92 | ## Предпросмотр интерфейса
93 | 
94 |
95 |
96 | ## 🚀 Быстрый старт
97 | ### Основные шаги
98 | 1. Скачайте исполняемый файл, соответствующий вашей операционной системе, из раздела релизов и поместите его в пустую папку.
99 | 2. Создайте папку config внутри этой папки, затем создайте файл config.toml в папке config. Скопируйте содержимое файла config-example.toml из директории config исходного кода в config.toml и заполните вашу конфигурационную информацию соответствующим образом.
100 | 3. Дважды щелкните на исполняемом файле, чтобы запустить сервис.
101 | 4. Откройте браузер и введите http://127.0.0.1:8888, чтобы начать использование (замените 8888 на порт, который вы указали в файле config.toml).
102 |
103 | ### Для пользователей macOS
104 | Это программное обеспечение не подписано, поэтому после завершения настройки файлов в "Основных шагах" вам потребуется вручную подтвердить доверие к приложению в macOS. Выполните следующие действия:
105 | 1. Откройте терминал и перейдите в директорию, где находится исполняемый файл (предположим, имя файла `KrillinAI_1.0.0_macOS_arm64`).
106 | 2. Выполните следующие команды по порядку:
107 | ```
108 | sudo xattr -rd com.apple.quarantine ./KrillinAI_1.0.0_macOS_arm64
109 | sudo chmod +x ./KrillinAI_1.0.0_macOS_arm64
110 | ./KrillinAI_1.0.0_macOS_arm64
111 | ```
112 | Это запустит сервис.
113 |
114 | ### Инструкции по настройке Cookie
115 | Этот проект поддерживает развертывание через Docker. Пожалуйста, обратитесь к [Docker Deployment Instructions](./docs/docker.md).
116 |
117 | ### Cookie Configuration Instructions
118 |
119 | Если вы столкнулись с ошибками при загрузке видео, пожалуйста, обратитесь к [Cookie Configuration Instructions](./docs/get_cookies.md) для настройки информации о ваших cookie.
120 |
121 | ### Помощь по настройке
122 | Самый быстрый и удобный способ настройки:
123 | * Выберите openai для transcription_provider и llm_provider. Таким образом, вам нужно будет заполнить только openai.apikey в следующих трех основных категориях конфигурации, а именно openai, local_model и aliyun, и затем вы сможете выполнять перевод субтитров. (Заполните app.proxy, model и openai.base_url в соответствии с вашей ситуацией.)
124 |
125 | Способ настройки для использования локальной модели распознавания речи (временно не поддерживается на macOS) (выбор, учитывающий стоимость, скорость и качество):
126 | * Заполните fasterwhisper для transcription_provider и openai для llm_provider. Таким образом, вам нужно будет заполнить только openai.apikey и local_model.faster_whisper в следующих двух основных категориях конфигурации, а именно openai и local_model, и затем вы сможете выполнять перевод субтитров. Локальная модель будет загружена автоматически. (То же самое относится к app.proxy и openai.base_url, как упоминалось выше.)
127 |
128 | Следующие ситуации использования требуют настройки Alibaba Cloud:
129 | * Если llm_provider заполнен как aliyun, это означает, что будет использоваться сервис больших моделей Alibaba Cloud. Следовательно, необходимо настроить параметр aliyun.bailian.
130 | * Если transcription_provider заполнен как aliyun, или если функция "озвучки" включена при запуске задачи, будет использоваться голосовой сервис Alibaba Cloud. Поэтому необходимо заполнить параметр aliyun.speech.
131 | * Если функция "озвучки" включена и одновременно загружаются локальные аудиофайлы для клонирования тембра голоса, также будет использоваться сервис облачного хранилища OSS от Alibaba Cloud. Следовательно, необходимо заполнить параметр aliyun.oss.
132 | Руководство по настройке: [Alibaba Cloud Configuration Instructions](./docs/aliyun.md)
133 |
134 | ## Часто задаваемые вопросы
135 | Пожалуйста, обратитесь к [Frequently Asked Questions](./docs/faq.md)
136 |
137 | ## Рекомендации по внесению вклада
138 |
139 | - Не отправляйте ненужные файлы, такие как .vscode, .idea и т.д. Пожалуйста, используйте .gitignore для их фильтрации.
140 | - Не отправляйте config.toml; вместо этого отправляйте config-example.toml.
141 |
142 | ## История звезд
143 |
144 | [](https://star-history.com/#krillinai/KrillinAI&Date)
145 |
--------------------------------------------------------------------------------
/docs/README_vi.md:
--------------------------------------------------------------------------------
1 |
2 |

3 |
4 |
5 | # # Công Cụ Dịch Thuật và Lồng Tiếng AI cho Âm Thanh & Video
6 |
7 |

8 |
9 | **[English](../README.md)|[简体中文](../docs/README_zh.md)|[日本語](../docs/README_jp.md)|[한국어](../docs/README_kr.md)|[Tiếng Việt](../docs/README_vi.md)|[Français](../docs/README_fr.md)|[Deutsch](../docs/README_de.md)|[Español](../docs/README_es.md)|[Português](../docs/README_pt.md)|[Русский](../docs/README_rus.md)|[اللغة العربية](../docs/README_ar.md)**
10 |
11 | [](https://x.com/KrillinAI)
12 | [](https://discord.gg/sKUAsHfy)
13 | [](https://space.bilibili.com/242124650)
14 |
15 |
16 |
17 | ### 📢 Phiên Bản Mới Cho Desktop Win & Mac – Chào Đón Trải Nghiệm Và Đóng Góp Ý Kiến
18 |
19 | ## Tổng Quan
20 |
21 | Krillin AI là giải pháp toàn diện để địa phương hóa và nâng cấp video một cách dễ dàng. Công cụ tối giản nhưng mạnh mẽ này xử lý mọi thứ từ dịch thuật, lồng tiếng đến nhân bản giọng nói, định dạng – chuyển đổi liền mạch video giữa chế độ ngang và dọc để tối ưu hiển thị trên mọi nền tảng nội dung (YouTube, TikTok, Bilibili, Douyin, Kênh WeChat, RedNote, Kuaishou). Với quy trình làm việc end-to-end, Krillin AI biến footage thô thành nội dung hoàn thiện, sẵn sàng đăng tải chỉ với vài cú nhấp chuột.
22 |
23 | ## Tính năng chính:
24 | 🎯 **Khởi động một chạm** - Bắt đầu quy trình làm việc ngay lập tức, Phiên bản desktop mới - sử dụng dễ dàng hơn!
25 |
26 | 📥 **Tải video** - Hỗ trợ yt-dlp và tải file từ máy tính
27 |
28 | 📜 **Phụ đề chính xác** - Nhận diện với độ chính xác cao nhờ Whisper
29 |
30 | 🧠 **Phân đoạn thông minh** - Chia nhỏ và căn chỉnh phụ đề dựa trên LLM
31 |
32 | 🌍 **Dịch thuật chuyên nghiệp** - Dịch theo đoạn văn để đảm bảo tính nhất quán
33 |
34 | 🔄 **Thay thế thuật ngữ** - Đổi từ vựng chuyên ngành chỉ với một cú nhấp chuột
35 |
36 | 🎙️ **Lồng tiếng & Nhân bản giọng nói** - Lựa chọn giọng CosyVoice hoặc giọng nhân bản
37 |
38 | 🎬 **Tổng hợp video** - Tự động định dạng cho bố cục ngang/dọc
39 |
40 | ## Minh họa
41 | Bức ảnh dưới đây thể hiện kết quả sau khi file phụ đề - được tạo tự động chỉ bằng một cú nhấp chuột từ video local 46 phút - được chèn vào timeline. Toàn bộ quá trình không hề có bất kỳ chỉnh sửa thủ công nào. Phụ đề hiển thị đầy đủ không bị thiếu hay chồng chéo, cách phân đoạn câu tự nhiên, chất lượng bản dịch cũng rất cao.
42 | 
43 |
44 |
45 |
46 |
47 |
48 | ### Phụ đề dịch
49 | ---
50 | https://github.com/user-attachments/assets/bba1ac0a-fe6b-4947-b58d-ba99306d0339
51 |
52 | |
53 |
54 |
55 | ### Lồng tiếng
56 | ---
57 | https://github.com/user-attachments/assets/0b32fad3-c3ad-4b6a-abf0-0865f0dd2385
58 |
59 | |
60 |
61 |
62 |
63 | ### Dọc
64 | ---
65 | https://github.com/user-attachments/assets/c2c7b528-0ef8-4ba9-b8ac-f9f92f6d4e71
66 |
67 | |
68 |
69 |
70 |
71 |
72 | ## 🔍 Hỗ trợ Nhận dạng Giọng nói
73 | _**Tất cả mô hình cục bộ trong bảng dưới đây hỗ trợ cài đặt tự động file thực thi + file mô hình. Chỉ cần lựa chọn, KrillinAI sẽ tự động xử lý phần còn lại cho bạn.**_
74 |
75 | | Dịch vụ | Nền tảng hỗ trợ | Tùy chọn mô hình | Cục bộ/Đám mây | Ghi chú |
76 | |-----------------|------------------------------|-----------------------------------|-------------|----------------|
77 | | **OpenAI Whisper** | Đa nền tảng | - | Đám mây | Tốc độ nhanh với kết quả xuất sắc |
78 | | **FasterWhisper** | Windows/Linux | `tiny`/`medium`/`large-v2` (recommend medium+) | Cục bộ | Tốc độ nhanh hơn, không phụ thuộc dịch vụ đám mây |
79 | | **WhisperKit** | macOS (Apple Silicon only) | `large-v2` | Cục bộ | Tối ưu hóa riêng cho chip Apple |
80 | | **Alibaba Cloud ASR** | Đa nền tảng | - | Đám mây | Không gặp vấn đề mạng tại Trung Quốc đại lục |
81 |
82 | ## 🚀 Hỗ trợ Mô hình Ngôn ngữ Lớn
83 |
84 | ✅ Tương thích với tất cả dịch vụ LLM đám mây/cục bộ tương thích **OpenAI API** bao gồm nhưng không giới hạn:
85 | - OpenAI
86 | - DeepSeek
87 | - Qwen (Tongyi Qianwen)
88 | - Các mô hình mã nguồn mở tự triển khai
89 | - Các dịch vụ API tương thích định dạng OpenAI khác
90 |
91 | ## 🌍 Hỗ trợ Ngôn ngữ
92 | Ngôn ngữ đầu vào: Hỗ trợ tiếng Trung, Anh, Nhật, Đức, Thổ Nhĩ Kỳ (đang tiếp tục bổ sung thêm)
93 | Ngôn ngữ dịch: Hỗ trợ 101 ngôn ngữ bao gồm tiếng Anh, Trung, Nga, Tây Ban Nha, Pháp,...
94 |
95 | ## Xem trước giao diện
96 | 
97 |
98 | ## 🚀 Bắt đầu nhanh
99 | ### Các bước cơ bản
100 | Đầu tiên, tải file thực thi Release phù hợp với hệ thống thiết bị của bạn. Làm theo hướng dẫn dưới đây để chọn giữa phiên bản desktop hoặc non-desktop, sau đó đặt phần mềm vào thư mục trống. Chạy chương trình sẽ tạo ra một số thư mục, vì vậy việc đặt trong thư mục trống giúp quản lý dễ dàng hơn.
101 |
102 | [Đối với phiên bản desktop (file release có chứa "desktop" trong tên), xem hướng dẫn tại đây]
103 | Phiên bản desktop mới được phát hành để giải quyết khó khăn cho người mới trong việc chỉnh sửa file cấu hình. Phiên bản này vẫn còn một số lỗi và đang được cập nhật liên tục.
104 |
105 | Nhấp đúp vào file để bắt đầu sử dụng.
106 |
107 | [Đối với phiên bản non-desktop (file release không có "desktop" trong tên), xem hướng dẫn tại đây]
108 | Phiên bản non-desktop là bản phát hành gốc, có cấu hình phức tạp hơn nhưng chức năng ổn định. Phiên bản này cũng phù hợp để triển khai trên server, vì cung cấp giao diện web.
109 |
110 | Tạo thư mục config trong thư mục chứa phần mềm, sau đó tạo file config.toml trong đó. Sao chép nội dung từ file config-example.toml trong thư mục config của mã nguồn vào file config.toml của bạn và điền các thông tin cấu hình. (Nếu bạn muốn sử dụng các mô hình OpenAI nhưng không biết cách lấy key, có thể tham gia nhóm để được dùng thử miễn phí.)
111 |
112 | Nhấp đúp vào file thực thi hoặc chạy trong terminal để khởi động dịch vụ.
113 |
114 | Mở trình duyệt và truy cập http://127.0.0.1:8888 để bắt đầu sử dụng. (Thay 8888 bằng số cổng bạn đã chỉ định trong file config.)
115 |
116 | ### Dành cho người dùng macOS
117 | [Đối với phiên bản desktop (file bản phát hành có chứa "desktop" trong tên), làm theo hướng dẫn sau]
118 | Do vấn đề chứng thực, phiên bản desktop hiện chưa hỗ trợ chạy trực tiếp bằng double-click hoặc cài đặt qua DMG. Cần cấu hình thủ công như sau:
119 |
120 | 1. Mở Terminal và truy cập thư mục chứa file thực thi (giả sử tên file là KrillinAI_1.0.0_desktop_macOS_arm64)
121 |
122 | 2. Thực hiện lần lượt các lệnh sau:
123 |
124 | ```
125 | sudo xattr -cr ./KrillinAI_1.0.0_desktop_macOS_arm64
126 | sudo chmod +x ./KrillinAI_1.0.0_desktop_macOS_arm64
127 | ./KrillinAI_1.0.0_desktop_macOS_arm64
128 | ```
129 |
130 | [Đối với phiên bản non-desktop (file bản phát hành không có "desktop" trong tên), làm theo hướng dẫn sau]
131 | Phần mềm này chưa được chứng thực, nên sau khi hoàn thành các bước cấu hình file ở mục "Các bước cơ bản", bạn cần thủ công cấp quyền trust ứng dụng trên macOS. Thực hiện theo các bước sau:
132 | 1. Mở Terminal và điều hướng đến thư mục chứa file thực thi (giả sử tên file là KrillinAI_1.0.0_macOS_arm64)
133 | 2. Thực hiện lần lượt các lệnh sau:
134 | ```
135 | sudo xattr -rd com.apple.quarantine ./KrillinAI_1.0.0_macOS_arm64
136 | sudo chmod +x ./KrillinAI_1.0.0_macOS_arm64
137 | ./KrillinAI_1.0.0_macOS_arm64
138 | ```
139 | Thao tác này sẽ khởi động dịch vụ.
140 |
141 | ### Triển khai bằng Docker
142 | Dự án này hỗ trợ triển khai qua Docker. Vui lòng tham khảo [Docker Deployment Instructions](../docs/docker.md).
143 |
144 | ### Hướng dẫn cấu hình Cookie
145 |
146 | Nếu gặp lỗi khi tải video xuống, vui lòng tham khảo [Cookie Configuration Instructions](./docs/get_cookies.md) để thiết lập thông tin cookie của bạn.
147 |
148 | ### Hướng dẫn cấu hình
149 | Cách cấu hình nhanh chóng và tiện lợi nhất:
150 | * Chọn openai cho cả transcription_provider và llm_provider. Với cách này, bạn chỉ cần điền openai.apikey trong ba nhóm cấu hình chính sau: openai, local_model và aliyun là có thể thực hiện dịch phụ đề. (Điền app.proxy, model và openai.base_url theo tình hình thực tế của bạn.)
151 |
152 | Cách cấu hình sử dụng mô hình nhận dạng giọng nói cục bộ (tạm thời chưa hỗ trợ macOS) (lựa chọn cân bằng giữa chi phí, tốc độ và chất lượng):
153 | * Điền fasterwhisper cho transcription_provider và openai cho llm_provider. Với cách này, bạn chỉ cần điền openai.apikey và local_model.faster_whisper trong hai nhóm cấu hình openai và local_model là có thể thực hiện dịch phụ đề. Mô hình cục bộ sẽ được tải xuống tự động. (Tương tự với app.proxy và openai.base_url như đã đề cập ở trên.)
154 |
155 | Các trường hợp sử dụng sau yêu cầu cấu hình Alibaba Cloud:
156 | * Nếu llm_provider điền aliyun nghĩa là sẽ sử dụng dịch vụ mô hình lớn của Alibaba Cloud, do đó cần cấu hình mục aliyun.bailian.
157 | * Nếu transcription_provider điền aliyun, hoặc khi bật chức năng "lồng tiếng" khi bắt đầu tác vụ sẽ sử dụng dịch vụ giọng nói của Alibaba Cloud, do đó cần điền cấu hình mục aliyun.speech.
158 | * Nếu bật chức năng "lồng tiếng" đồng thời tải lên file âm thanh cục bộ để nhân bản giọng nói thì sẽ sử dụng cả dịch vụ lưu trữ đám mây OSS của Alibaba Cloud, do đó cần điền cấu hình mục aliyun.oss.
159 | Hướng dẫn cấu hình: [Alibaba Cloud Configuration Instructions](../docs/aliyun.md)
160 |
161 | ## Câu hỏi thường gặp
162 | Vui lòng tham khảo [Frequently Asked Questions](../docs/faq.md)
163 |
164 | ## Hướng dẫn đóng góp
165 |
166 | - Không gửi các file không cần thiết như .vscode, .idea,... Hãy sử dụng tốt file .gitignore để lọc chúng.
167 | - Không gửi file config.toml mà hãy gửi file config-example.toml.
168 |
169 | ## Lịch sử sao
170 |
171 | [](https://star-history.com/#krillinai/KrillinAI&Date)
172 |
173 |
--------------------------------------------------------------------------------
/docs/README_zh.md:
--------------------------------------------------------------------------------
1 |
2 |

3 |
4 | # 极简部署AI视频翻译配音工具
5 |
6 |

7 |
8 | **[English](../README.md)|[简体中文](../docs/README_zh.md)|[日本語](../docs/README_jp.md)|[한국어](../docs/README_kr.md)|[Tiếng Việt](../docs/README_vi.md)|[Français](../docs/README_fr.md)|[Deutsch](../docs/README_de.md)|[Español](../docs/README_es.md)|[Português](../docs/README_pt.md)|[Русский](../docs/README_rus.md)|[اللغة العربية](../docs/README_ar.md)**
9 |
10 | [](https://jq.qq.com/?_wv=1027&k=754069680)
11 | [](https://space.bilibili.com/242124650)
12 |
13 |
14 |
15 | ### 📢win&mac桌面端新发布 欢迎测试反馈[文档有点落后,持续更新中]
16 |
17 | ## 项目简介
18 |
19 | Krillin AI 是一款全能型音视频本地化与增强解决方案。这款简约而强大的工具,集音视频翻译、配音、语音克隆于一身,支持横竖屏格式输出,确保在所有主流平台(哔哩哔哩,小红书,抖音,视频号,快手,YouTube,TikTok等)都能完美呈现。通过端到端的工作流程,Krillin AI 仅需点击几次,就能将原始素材转化为精美即用的跨平台内容。
20 |
21 | ## 主要特点与功能:
22 | 🎯 **一键启动**:无需复杂的环境配置,自动安装依赖,立即投入使用,新增桌面版本,使用更便捷!
23 |
24 | 📥 **视频获取**:支持yt-dlp下载或本地文件上传
25 |
26 | 📜 **精准识别**:基于Whisper的高准确度语音识别
27 |
28 | 🧠 **智能分段**:使用LLM进行字幕分段和对齐
29 |
30 | 🔄 **术语替换**:一键替换专业领域词汇
31 |
32 | 🌍 **专业翻译**:基于LLM,段落级翻译保持语义连贯性
33 |
34 | 🎙️ **配音克隆**:提供CosyVoice精选音色或自定义音色克隆
35 |
36 | 🎬 **视频合成**:自动处理横竖版视频和字幕排版
37 |
38 |
39 | ## 效果展示
40 | 下图为46分钟的本地视频导入,一键执行后生成的字幕文件入轨后的效果,无任何手动调整。无缺失、重叠,断句自然,翻译质量也非常高。
41 | 
42 |
43 |
44 |
45 |
46 |
47 | ### 字幕翻译
48 | ---
49 | https://github.com/user-attachments/assets/bba1ac0a-fe6b-4947-b58d-ba99306d0339
50 |
51 | |
52 |
53 |
54 |
55 |
56 | ### 配音
57 | ---
58 | https://github.com/user-attachments/assets/0b32fad3-c3ad-4b6a-abf0-0865f0dd2385
59 |
60 | |
61 |
62 |
63 |
64 | ### 竖屏
65 | ---
66 | https://github.com/user-attachments/assets/c2c7b528-0ef8-4ba9-b8ac-f9f92f6d4e71
67 |
68 | |
69 |
70 |
71 |
72 |
73 | ## 🔍 语音识别服务支持
74 | _**下表中的本地模型全部支持自动安装可执行文件+模型文件,你只要选择,其它的KrillinAI帮你全部准备完毕。**_
75 |
76 | | 服务源 | 支持平台 | 模型可选项 | 本地/云端 | 备注 |
77 | |----------------|------------------------------|-----------------------------------|-------|-------------|
78 | | **OpenAI Whisper** | 全平台 | - | 云端 | 速度快效果好 |
79 | | **FasterWhisper** | Windows/Linux | `tiny`/`medium`/`large-v2` (推荐medium+) | 本地 | 速度更快,无云服务开销 |
80 | | **WhisperKit** | macOS (仅限M系列芯片) | `large-v2` | 本地 | Apple芯片原生优化 |
81 | | **阿里云ASR** | 全平台 | - | 云端 | 避免中国大陆网络问题 |
82 |
83 | ## 🚀 大模型支持
84 |
85 | ✅ 兼容所有符合 **OpenAI API规范** 的云端/本地大模型服务,包括但不限于:
86 | - OpenAI
87 | - DeepSeek
88 | - 通义千问
89 | - 本地部署的开源模型
90 | - 其他兼容OpenAI格式的API服务
91 |
92 | ## 语言支持
93 | 输入语言支持:中文,英文,日语,德语,土耳其,韩语,俄语,马来语(持续增加中)
94 |
95 | 翻译语言支持:英文,中文,俄语,西班牙语,法语等101种语言
96 |
97 | ## 界面预览
98 | 
99 |
100 |
101 | ## 🚀 快速开始
102 | ### 基本步骤
103 | 首先下载[Release](https://github.com/krillinai/KrillinAI/releases)中与你设备系统匹配的可执行文件,按照下面的教程选择桌面版还是非桌面版,然后放入空文件夹,把软件下载到一个空文件夹,因为运行之后会生成一些目录,放到空文件夹会好管理一些。
104 |
105 | 【如果是桌面版,即release文件带desktop的看此处】
106 | _桌面版是新发布的,为了解决新手用户难以正确编辑配置文件的问题,还有不少bug,持续更新中_
107 | 1. 双击文件即可开始使用(桌面端也是需要配置的,在软件内配置)
108 |
109 | 【如果是非桌面版,即release文件不带desktop的看此处】
110 | _非桌面版是一开始的版本,配置比较复杂,但是功能稳定,同时适合服务器部署,因为会以web的方式提供ui_
111 | 1. 在文件夹内创建`config`文件夹,然后在`config`文件夹创建`config.toml`文件,复制源代码`config`目录下的`config-example.toml`文件的内容填入`config.toml`,并对照填写你的配置信息。
112 | 2. 双击,或在终端执行可执行文件,启动服务
113 | 3. 打开浏览器,输入`http://127.0.0.1:8888`,开始使用 (8888替换成你在配置文件中填写的端口)
114 |
115 | ### To: macOS用户
116 | 【如果是桌面版,即release文件带desktop的看此处】
117 | 桌面端目前打包方式由于签名等问题,还不能够做到双击直接运行或者dmg安装,需要手动信任应用,方法如下:
118 | 1. 在终端打开可执行文件(假设文件名是KrillinAI_1.0.0_desktop_macOS_arm64)所在目录
119 | 2. 依次执行以下命令:
120 | ```
121 | sudo xattr -cr ./KrillinAI_1.0.0_desktop_macOS_arm64
122 | sudo chmod +x ./KrillinAI_1.0.0_desktop_macOS_arm64
123 | ./KrillinAI_1.0.0_desktop_macOS_arm64
124 | ```
125 |
126 | 【如果是非桌面版,即release文件不带desktop的看此处】
127 | 本软件没有做签名,因此在macOS上运行时,在完成“基本步骤”中的文件配置后,还需要手动信任应用,方法如下:
128 | 1. 在终端打开可执行文件(假设文件名是KrillinAI_1.0.0_macOS_arm64)所在目录
129 | 2. 依次执行以下命令:
130 | ```
131 | sudo xattr -rd com.apple.quarantine ./KrillinAI_1.0.0_macOS_arm64
132 | sudo chmod +x ./KrillinAI_1.0.0_macOS_arm64
133 | ./KrillinAI_1.0.0_macOS_arm64
134 | ```
135 | 即可启动服务
136 |
137 | ### Docker部署
138 | 本项目支持Docker部署,请参考[Docker部署说明](./docker.md)
139 |
140 | ### Cookie配置说明(非必选)
141 |
142 | 如果你遇到视频下载失败的情况
143 |
144 | 请参考 [Cookie 配置说明](./get_cookies.md) 配置你的Cookie信息。
145 |
146 | ### 配置帮助(必看)
147 | 最快速便捷的配置方式:
148 | * `transcription_provider`和`llm_provider`都选择`openai`,这样在下方`openai`、`local_model`、`aliyun`三个配置项大类里只需要填写`openai.apikey`就可以进行字幕翻译。(`app.proxy`、`model`和`openai.base_url`按自己情况选填)
149 |
150 | 使用本地语言识别模型(暂不支持macOS)的配置方式(兼顾成本、速度与质量的选择)
151 | * `transcription_provider`填写`fasterwhisper`,`llm_provider`填写`openai`,这样在下方`openai`、`local_model`三个配置项大类里只需要填写`openai.apikey`和`local_model.faster_whisper`就可以进行字幕翻译,本地模型会自动下载。(`app.proxy`和`openai.base_url`同上)
152 |
153 | 以下几种使用情况,需要进行阿里云的配置:
154 | * 如果`llm_provider`填写了`aliyun`,需要使用阿里云的大模型服务,因此需要配置`aliyun.bailian`项的配置
155 | * 如果`transcription_provider`填写了`aliyun`,或者在启动任务时开启了“配音”功能,都需要使用阿里云的语音服务,因此需要填写`aliyun.speech`项的配置
156 | * 如果开启了“配音”功能,同时上传了本地的音频做音色克隆,则还需要使用阿里云的OSS云存储服务,因此需要填写`aliyun.oss`项的配置
157 | 阿里云配置帮助:[阿里云配置说明](./aliyun.md)
158 |
159 | ## 常见问题
160 |
161 | 请移步[常见问题](./faq.md)
162 |
163 | ## 贡献规范
164 | 1. 不要提交无用文件,如.vscode、.idea等,请善于使用.gitignore过滤
165 | 2. 不要提交config.toml,而是使用config-example.toml提交
166 |
167 | ## 联系我们
168 | 1. 加入我们的QQ群,解答问题:754069680
169 | 2. 关注我们的社交媒体账号,[哔哩哔哩](https://space.bilibili.com/242124650),每天分享AI科技领域优质内容
170 |
171 | ## Star History
172 |
173 | [](https://star-history.com/#krillinai/KrillinAI&Date)
174 |
--------------------------------------------------------------------------------
/docs/aliyun.md:
--------------------------------------------------------------------------------
1 | ## 前提条件
2 | 需要先有[阿里云](https://www.aliyun.com)账号并经过实名认证,多数服务有免费额度
3 |
4 | ## 阿里云百炼平台密钥获取
5 | 1. 登录[阿里云百炼大模型服务平台](https://bailian.console.aliyun.com/),鼠标悬停于页面右上角的个人中心图标上,在下拉菜单中单击API-KEY
6 | 
7 | 2. 在左侧导航栏,选择全部API-KEY或我的API-KEY,然后创建或查看API Key
8 |
9 | ## 阿里云`access_key_id`和`access_key_secret`获取
10 | 1. 进入[阿里云AccessKey管理页面](https://ram.console.aliyun.com/profile/access-keys)
11 | 2. 点击创建AccessKey,如需要选择使用方式,选择“本地开发环境中使用”
12 | 
13 | 3. 妥善保管,最好复制到本地文件保存
14 |
15 | ## 阿里云语音服务开通
16 | 1. 进入[阿里云语音服务管理页面](https://nls-portal.console.aliyun.com/applist),首次进入需开通服务
17 | 2. 点击创建项目
18 | 
19 | 3. 选择功能并开通
20 | 
21 | 4. “流式文本语音合成(CosyVoice大模型)”需要升级成商业版,其它服务可以用免费体验版
22 | 
23 | 5. 复制app key即可
24 | 
25 |
26 | ## 阿里云OSS服务开通
27 | 1. 进入[阿里云对象存储服务控制台](https://oss.console.aliyun.com/overview),首次进入需开通服务
28 | 2. 左侧选择Bucket列表,然后点击创建
29 | 
30 | 3. 选择快捷创建,填写符合要求的Bucket名称并选择**上海**地域,完成创建(此处填写的名字就是配置项`aliyun.oss.bucket`的值)
31 | 
32 | 4. 创建完成后进入Bucket
33 | 
34 | 5. 将“阻止公共访问”开关关闭,并设置读写权限为“公共读”
35 | 
36 | 
--------------------------------------------------------------------------------
/docs/docker.md:
--------------------------------------------------------------------------------
1 | # Docker 部署指南
2 |
3 | ## 快速开始
4 | 先准备好配置文件,设置服务器监听端口为`8888`、服务器监听地址为`0.0.0.0`
5 |
6 | ### docker run启动
7 | ```bash
8 | docker run -d \
9 | -p 8888:8888 \
10 | -v /path/to/config.toml:/app/config/config.toml \
11 | asteria798/krillinai
12 | ```
13 |
14 | ### docker-compose启动
15 | ```yaml
16 | version: '3'
17 | services:
18 | krillin:
19 | image: asteria798/krillinai
20 | ports:
21 | - "8888:8888"
22 | volumes:
23 | - /path/to/config.toml:/app/config/config.toml
24 | ```
25 |
26 | ## 持久化模型
27 | 如果使用fasterwhisper模型, KrillinAI 会自动下载模型所需文件到`/app/models`目录和`/app/bin`目录。容器删除后,这些文件会丢失。如果需要持久化模型,可以将这两个目录映射到宿主机的目录。
28 |
29 | ### docker run启动
30 | ```bash
31 | docker run -d \
32 | -p 8888:8888 \
33 | -v /path/to/config.toml:/app/config/config.toml \
34 | -v /path/to/models:/app/models \
35 | -v /path/to/bin:/app/bin \
36 | krillinai/krillin
37 | ```
38 |
39 | ### docker-compose启动
40 | ```yaml
41 | version: '3'
42 | services:
43 | krillin:
44 | image: krillinai/krillin
45 | ports:
46 | - "8888:8888"
47 | volumes:
48 | - /path/to/config.toml:/app/config/config.toml
49 | - /path/to/models:/app/models
50 | - /path/to/bin:/app/bin
51 | ```
52 |
53 | ## 注意事项
54 | 1. 如果docker容器的网络模式不为host,建议将配置文件服务器监听地址设置为`0.0.0.0`,否则可能无法访问服务。
55 | 2. 如果容器内需要访问宿主机的网络代理,请将代理地址配置项`proxy`的`127.0.0.1`设置为`host.docker.internal`,例如`http://host.docker.internal:7890`
--------------------------------------------------------------------------------
/docs/faq.md:
--------------------------------------------------------------------------------
1 | ### 1. 看不到`app.log`配置文件,无法知道报错内容
2 | Windows用户请将本软件的工作目录放在非C盘的文件夹。
3 |
4 | ### 2. 非桌面版明明创建了配置文件,但还是报错“找不到配置文件”
5 | 确保配置文件名是`config.toml`,而不是`config.toml.txt`或其它。
6 | 配置完成后,本软件的工作文件夹的结构应该是这样的:
7 | ```
8 | /── config/
9 | │ └── config.toml
10 | ├── cookies.txt (<- 可选的cookies.txt文件)
11 | └── krillinai.exe
12 | ```
13 |
14 | ### 3. 填写了大模型配置,但是报错“xxxxx需要配置xxxxx API Key”
15 | 模型服务和语音服务虽然可以都用openai的服务,但是也有大模型单独使用非openai的场景,因此这两块配置是分开的,除了大模型配置,请往配置下方找whisper配置填写对应的密钥等信息。
16 |
17 | ### 3. 报错内含“yt-dlp error”
18 | 视频下载器的问题,目前看来无非就是网络问题或者下载器版本问题,检查下网络代理有没有打开并且配置到配置文件的代理配置项,同时建议选择香港节点。下载器是本软件自动安装的,安装的源我会更新但毕竟不是官方源,所以可能会有落后,遇到问题尝试手动更新一下,更新方法:
19 |
20 | 在软件bin目录位置打开终端,执行
21 | ```
22 | ./yt-dlp.exe -U
23 | ```
24 | 此处`yt-dlp.exe`替换为你系统实际的ytdlp软件名称。
--------------------------------------------------------------------------------
/docs/get_cookies.md:
--------------------------------------------------------------------------------
1 | # Cookie 配置说明
2 |
3 | ## 问题说明
4 | 在生成字幕的时候,可能会遇到出错的情况,例如“Sign in to confirm you are not a bot”:
5 |
6 | 这是因为:
7 | 1. 部分视频平台需要用户登录信息才能获取高质量视频
8 | 2. 您当前的代理的ip不够纯净,已被视频网站官方限制
9 |
10 | ## 解决方法
11 |
12 | ### 1. 安装浏览器扩展
13 | 根据你使用的浏览器选择安装:
14 |
15 | - Chrome浏览器: [Get CookieTxt Locally](https://chromewebstore.google.com/detail/get-cookiestxt-locally/cclelndahbckbenkjhflpdbgdldlbecc)
16 | - Edge浏览器: [Export Cookies File](https://microsoftedge.microsoft.com/addons/detail/export-cookies-file/hbglikhfdcfhdfikmocdflffaecbnedo)
17 |
18 | ### 2. 导出Cookie文件
19 | 1. 登录需要下载视频的网站(如B站、YouTube等)
20 | 2. 点击浏览器扩展图标
21 | 3. 选择"Export Cookies"选项
22 | 4. 将导出的cookies.txt文件保存到本软件所在的目录下
23 | 5. 如果导出的文件名不是cookies.txt,请将文件名改为cookies.txt
24 |
25 | 图示:
26 | 
27 |
28 | 导出后,工具的工作文件夹的结构应该是这样的:
29 | ```
30 | /── config/
31 | │ └── config.toml
32 | ├── tasks/
33 | ├── cookies.txt (<- 导出的cookies.txt文件)
34 | └── krillinai.exe
35 | ```
--------------------------------------------------------------------------------
/docs/images/alignment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/alignment.png
--------------------------------------------------------------------------------
/docs/images/aliyun_accesskey_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/aliyun_accesskey_1.png
--------------------------------------------------------------------------------
/docs/images/aliyun_oss_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/aliyun_oss_1.png
--------------------------------------------------------------------------------
/docs/images/aliyun_oss_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/aliyun_oss_2.png
--------------------------------------------------------------------------------
/docs/images/aliyun_oss_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/aliyun_oss_3.png
--------------------------------------------------------------------------------
/docs/images/aliyun_oss_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/aliyun_oss_4.png
--------------------------------------------------------------------------------
/docs/images/aliyun_oss_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/aliyun_oss_5.png
--------------------------------------------------------------------------------
/docs/images/aliyun_speech_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/aliyun_speech_1.png
--------------------------------------------------------------------------------
/docs/images/aliyun_speech_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/aliyun_speech_2.png
--------------------------------------------------------------------------------
/docs/images/aliyun_speech_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/aliyun_speech_3.png
--------------------------------------------------------------------------------
/docs/images/aliyun_speech_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/aliyun_speech_4.png
--------------------------------------------------------------------------------
/docs/images/bailian_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/bailian_1.png
--------------------------------------------------------------------------------
/docs/images/export_cookies.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/export_cookies.png
--------------------------------------------------------------------------------
/docs/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/logo.png
--------------------------------------------------------------------------------
/docs/images/ui.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/ui.jpg
--------------------------------------------------------------------------------
/docs/images/ui_desktop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/ui_desktop.png
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module krillin-ai
2 |
3 | go 1.22
4 |
5 | require (
6 | fyne.io/fyne/v2 v2.5.4
7 | github.com/BurntSushi/toml v1.4.0
8 | github.com/aliyun/alibaba-cloud-sdk-go v1.63.72
9 | github.com/aliyun/alibabacloud-oss-go-sdk-v2 v1.1.3
10 | github.com/gin-gonic/gin v1.10.0
11 | github.com/go-resty/resty/v2 v2.7.0
12 | github.com/google/uuid v1.4.0
13 | github.com/gorilla/websocket v1.5.0
14 | github.com/samber/lo v1.38.1
15 | github.com/sashabaranov/go-openai v1.36.0
16 | go.uber.org/zap v1.25.0
17 | golang.org/x/sync v0.9.0
18 | )
19 |
20 | require (
21 | fyne.io/systray v1.11.0 // indirect
22 | github.com/bytedance/sonic v1.11.6 // indirect
23 | github.com/bytedance/sonic/loader v0.1.1 // indirect
24 | github.com/cloudwego/base64x v0.1.4 // indirect
25 | github.com/cloudwego/iasm v0.2.0 // indirect
26 | github.com/davecgh/go-spew v1.1.1 // indirect
27 | github.com/fredbi/uri v1.1.0 // indirect
28 | github.com/fsnotify/fsnotify v1.7.0 // indirect
29 | github.com/fyne-io/gl-js v0.0.0-20220119005834-d2da28d9ccfe // indirect
30 | github.com/fyne-io/glfw-js v0.0.0-20241126112943-313d8a0fe1d0 // indirect
31 | github.com/fyne-io/image v0.0.0-20220602074514-4956b0afb3d2 // indirect
32 | github.com/gabriel-vasile/mimetype v1.4.3 // indirect
33 | github.com/gin-contrib/sse v0.1.0 // indirect
34 | github.com/go-gl/gl v0.0.0-20211210172815-726fda9656d6 // indirect
35 | github.com/go-gl/glfw/v3.3/glfw v0.0.0-20240506104042-037f3cc74f2a // indirect
36 | github.com/go-playground/locales v0.14.1 // indirect
37 | github.com/go-playground/universal-translator v0.18.1 // indirect
38 | github.com/go-playground/validator/v10 v10.20.0 // indirect
39 | github.com/go-text/render v0.2.0 // indirect
40 | github.com/go-text/typesetting v0.2.0 // indirect
41 | github.com/goccy/go-json v0.10.2 // indirect
42 | github.com/godbus/dbus/v5 v5.1.0 // indirect
43 | github.com/google/go-cmp v0.5.9 // indirect
44 | github.com/gopherjs/gopherjs v1.17.2 // indirect
45 | github.com/jeandeaual/go-locale v0.0.0-20240223122105-ce5225dcaa49 // indirect
46 | github.com/jmespath/go-jmespath v0.4.0 // indirect
47 | github.com/json-iterator/go v1.1.12 // indirect
48 | github.com/jsummers/gobmp v0.0.0-20151104160322-e2ba15ffa76e // indirect
49 | github.com/klauspost/cpuid/v2 v2.2.7 // indirect
50 | github.com/kr/pretty v0.3.1 // indirect
51 | github.com/leodido/go-urn v1.4.0 // indirect
52 | github.com/mattn/go-isatty v0.0.20 // indirect
53 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
54 | github.com/modern-go/reflect2 v1.0.2 // indirect
55 | github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 // indirect
56 | github.com/nicksnyder/go-i18n/v2 v2.4.0 // indirect
57 | github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b // indirect
58 | github.com/pelletier/go-toml/v2 v2.2.2 // indirect
59 | github.com/pmezard/go-difflib v1.0.0 // indirect
60 | github.com/rymdport/portal v0.3.0 // indirect
61 | github.com/srwiley/oksvg v0.0.0-20221011165216-be6e8873101c // indirect
62 | github.com/srwiley/rasterx v0.0.0-20220730225603-2ab79fcdd4ef // indirect
63 | github.com/stretchr/testify v1.9.0 // indirect
64 | github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
65 | github.com/ugorji/go/codec v1.2.12 // indirect
66 | github.com/yuin/goldmark v1.7.1 // indirect
67 | go.uber.org/atomic v1.10.0 // indirect
68 | go.uber.org/multierr v1.11.0 // indirect
69 | golang.org/x/arch v0.8.0 // indirect
70 | golang.org/x/crypto v0.23.0 // indirect
71 | golang.org/x/exp v0.0.0-20221031165847-c99f073a8326 // indirect
72 | golang.org/x/image v0.18.0 // indirect
73 | golang.org/x/mobile v0.0.0-20231127183840-76ac6878050a // indirect
74 | golang.org/x/net v0.25.0 // indirect
75 | golang.org/x/sys v0.20.0 // indirect
76 | golang.org/x/text v0.20.0 // indirect
77 | golang.org/x/time v0.4.0 // indirect
78 | google.golang.org/protobuf v1.34.1 // indirect
79 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
80 | gopkg.in/ini.v1 v1.67.0 // indirect
81 | gopkg.in/yaml.v3 v3.0.1 // indirect
82 | )
83 |
--------------------------------------------------------------------------------
/internal/api/subtitle.go:
--------------------------------------------------------------------------------
1 | package api
2 |
3 | import (
4 | "fmt"
5 | "os"
6 | "path/filepath"
7 | "time"
8 | )
9 |
10 | // WordReplacement 词语替换
11 | type WordReplacement struct {
12 | From string `json:"from"`
13 | To string `json:"to"`
14 | }
15 |
16 | // SubtitleTask 字幕任务
17 | type SubtitleTask struct {
18 | URL string `json:"url"` // 视频URL
19 | Language string `json:"language"` // 界面语言
20 | OriginLang string `json:"origin_lang"` // 源语言
21 | TargetLang string `json:"target_lang"` // 目标语言
22 | Bilingual int `json:"bilingual"` // 是否双语 1:是 2:否
23 | TranslationSubtitlePos int `json:"translation_subtitle_pos"` // 翻译字幕位置 1:上方 2:下方
24 | TTS int `json:"tts"` // 是否配音 1:是 2:否
25 | TTSVoiceCode int `json:"tts_voice_code,omitempty"` // 配音声音代码 1:女声 2:男声
26 | TTSVoiceCloneSrcFileURL string `json:"tts_voice_clone_src_file_url,omitempty"` // 音色克隆源文件URL
27 | ModalFilter int `json:"modal_filter"` // 是否过滤语气词 1:是 2:否
28 | Replace []string `json:"replace,omitempty"` // 词汇替换列表
29 | EmbedSubtitleVideoType string `json:"embed_subtitle_video_type"` // 字幕嵌入视频类型 none:不嵌入 horizontal:横屏 vertical:竖屏 all:全部
30 | VerticalMajorTitle string `json:"vertical_major_title,omitempty"` // 竖屏主标题
31 | VerticalMinorTitle string `json:"vertical_minor_title,omitempty"` // 竖屏副标题
32 | }
33 |
34 | // SubtitleResult 字幕结果
35 | type SubtitleResult struct {
36 | Name string `json:"name"` // 文件名
37 | DownloadURL string `json:"download_url"` // 下载URL
38 | }
39 |
40 | // TaskStatus 任务状态
41 | type TaskStatus struct {
42 | TaskId string `json:"task_id"` // 任务ID
43 | ProcessPercent int `json:"process_percent"` // 处理进度百分比
44 | Status string `json:"status"` // 任务状态
45 | Message string `json:"message"` // 状态消息
46 | SubtitleInfo []SubtitleResult `json:"subtitle_info"` // 字幕信息
47 | SpeechDownloadURL string `json:"speech_download_url"` // 配音下载URL
48 | }
49 |
50 | // CreateSubtitleTask 创建字幕任务
51 | func CreateSubtitleTask(task *SubtitleTask) (*TaskStatus, error) {
52 | // 生成任务ID
53 | taskId := generateTaskId()
54 |
55 | // 创建任务目录
56 | taskDir := filepath.Join("tasks", taskId)
57 | if err := createTaskDirectory(taskDir); err != nil {
58 | return nil, fmt.Errorf("创建任务目录失败: %v", err)
59 | }
60 |
61 | // 启动异步任务处理
62 | go processTask(taskId, task)
63 |
64 | return &TaskStatus{
65 | TaskId: taskId,
66 | ProcessPercent: 0,
67 | Status: "created",
68 | Message: "任务已创建",
69 | }, nil
70 | }
71 |
72 | // GetSubtitleTaskStatus 获取任务状态
73 | func GetSubtitleTaskStatus(taskId string) (*TaskStatus, error) {
74 | // 获取任务状态
75 | status, err := getTaskStatus(taskId)
76 | if err != nil {
77 | return nil, fmt.Errorf("获取任务状态失败: %v", err)
78 | }
79 |
80 | // 如果任务完成,添加下载链接
81 | if status.ProcessPercent >= 100 {
82 | status.SubtitleInfo = []SubtitleResult{
83 | {
84 | Name: "字幕.srt",
85 | DownloadURL: fmt.Sprintf("/tasks/%s/output/subtitle.srt", taskId),
86 | },
87 | {
88 | Name: "字幕.ass",
89 | DownloadURL: fmt.Sprintf("/tasks/%s/output/subtitle.ass", taskId),
90 | },
91 | }
92 |
93 | // 如果启用了配音,添加配音下载链接
94 | if status.SpeechDownloadURL == "" {
95 | status.SpeechDownloadURL = fmt.Sprintf("/tasks/%s/output/speech.mp3", taskId)
96 | }
97 | }
98 |
99 | return status, nil
100 | }
101 |
102 | // 以下是辅助函数,需要在实际使用时实现
103 | func generateTaskId() string {
104 | // TODO: 实现任务ID生成逻辑
105 | return "task-" + time.Now().Format("20060102150405")
106 | }
107 |
108 | func createTaskDirectory(taskDir string) error {
109 | // TODO: 实现任务目录创建逻辑
110 | return os.MkdirAll(taskDir, 0755)
111 | }
112 |
113 | func processTask(taskId string, task *SubtitleTask) {
114 | // TODO: 实现任务处理逻辑
115 | // 1. 下载视频
116 | // 2. 提取音频
117 | // 3. 语音识别
118 | // 4. 翻译字幕
119 | // 5. 生成字幕文件
120 | // 6. 如果需要,生成配音
121 | // 7. 如果需要,嵌入字幕到视频
122 | // 8. 更新任务状态
123 | }
124 |
125 | func getTaskStatus(taskId string) (*TaskStatus, error) {
126 | // TODO: 实现任务状态获取逻辑
127 | return &TaskStatus{
128 | TaskId: taskId,
129 | ProcessPercent: 50,
130 | Status: "processing",
131 | Message: "正在处理中",
132 | }, nil
133 | }
134 |
--------------------------------------------------------------------------------
/internal/desktop/components.go:
--------------------------------------------------------------------------------
1 | package desktop
2 |
3 | import (
4 | "fmt"
5 | "image/color"
6 | "time"
7 |
8 | "fyne.io/fyne/v2"
9 | "fyne.io/fyne/v2/canvas"
10 | "fyne.io/fyne/v2/container"
11 | "fyne.io/fyne/v2/layout"
12 | "fyne.io/fyne/v2/widget"
13 | )
14 |
15 | // FadeAnimation 淡入淡出动画
16 | func FadeAnimation(content fyne.CanvasObject, duration time.Duration, startOpacity, endOpacity float64) {
17 | // 使用更柔和的动画效果
18 | rect := canvas.NewRectangle(color.NRGBA{R: 240, G: 246, B: 252, A: 0})
19 | rect.FillColor = color.NRGBA{R: 240, G: 246, B: 252, A: uint8(startOpacity * 255)}
20 |
21 | anim := canvas.NewColorRGBAAnimation(
22 | color.NRGBA{R: 240, G: 246, B: 252, A: uint8(startOpacity * 255)},
23 | color.NRGBA{R: 240, G: 246, B: 252, A: uint8(endOpacity * 255)},
24 | duration,
25 | func(c color.Color) {
26 | rect.FillColor = c
27 | content.Refresh()
28 | })
29 |
30 | anim.Start()
31 | }
32 |
33 | // PrimaryButton 创建主要按钮
34 | func PrimaryButton(text string, icon fyne.Resource, action func()) *widget.Button {
35 | btn := widget.NewButtonWithIcon(text, icon, action)
36 | btn.Importance = widget.HighImportance
37 | return btn
38 | }
39 |
40 | // SecondaryButton 创建次要按钮
41 | func SecondaryButton(text string, icon fyne.Resource, action func()) *widget.Button {
42 | btn := widget.NewButtonWithIcon(text, icon, action)
43 | btn.Importance = widget.MediumImportance
44 | return btn
45 | }
46 |
47 | // TitleText 创建标题文本
48 | func TitleText(text string) *canvas.Text {
49 | title := canvas.NewText(text, color.NRGBA{R: 88, G: 157, B: 246, A: 255})
50 | title.TextSize = 22
51 | title.TextStyle = fyne.TextStyle{Bold: true}
52 | title.Alignment = fyne.TextAlignCenter
53 | return title
54 | }
55 |
56 | // SubtitleText 创建副标题文本
57 | func SubtitleText(text string) *canvas.Text {
58 | subtitle := canvas.NewText(text, color.NRGBA{R: 100, G: 120, B: 160, A: 255})
59 | subtitle.TextSize = 16
60 | subtitle.TextStyle = fyne.TextStyle{Italic: true}
61 | subtitle.Alignment = fyne.TextAlignCenter
62 | return subtitle
63 | }
64 |
65 | func createShadowRectangle(fillColor color.Color, cornerRadius float32) *canvas.Rectangle {
66 | rect := canvas.NewRectangle(fillColor)
67 | rect.CornerRadius = cornerRadius
68 | return rect
69 | }
70 |
71 | func GlassCard(title, subtitle string, content fyne.CanvasObject) *fyne.Container {
72 | glassBackground := createShadowRectangle(color.NRGBA{R: 255, G: 255, B: 255, A: 200}, 12)
73 |
74 | titleLabel := canvas.NewText(title, color.NRGBA{R: 60, G: 80, B: 120, A: 255})
75 | titleLabel.TextSize = 16
76 | titleLabel.TextStyle = fyne.TextStyle{Bold: true}
77 |
78 | // 副标题
79 | var subtitleLabel *canvas.Text
80 | if subtitle != "" {
81 | subtitleLabel = canvas.NewText(subtitle, color.NRGBA{R: 100, G: 120, B: 150, A: 200})
82 | subtitleLabel.TextSize = 12
83 | }
84 |
85 | // 标题容器
86 | var headerContainer *fyne.Container
87 | if subtitleLabel != nil {
88 | headerContainer = container.NewVBox(titleLabel, subtitleLabel)
89 | } else {
90 | headerContainer = container.NewVBox(titleLabel)
91 | }
92 |
93 | // 分隔线
94 | divider := canvas.NewLine(color.NRGBA{R: 220, G: 230, B: 240, A: 255})
95 | divider.StrokeWidth = 1
96 |
97 | contentWithPadding := container.NewPadded(content)
98 |
99 | // 布局
100 | cardContent := container.NewBorder(
101 | container.NewVBox(container.NewPadded(headerContainer), divider),
102 | nil, nil, nil,
103 | contentWithPadding,
104 | )
105 |
106 | // 阴影
107 | shadow := canvas.NewRectangle(color.NRGBA{R: 0, G: 0, B: 0, A: 20})
108 | shadow.Move(fyne.NewPos(3, 3))
109 | shadow.Resize(fyne.NewSize(cardContent.Size().Width, cardContent.Size().Height))
110 | shadow.CornerRadius = 12
111 |
112 | return container.NewStack(shadow, glassBackground, cardContent)
113 | }
114 |
115 | func StyledCard(title string, content fyne.CanvasObject) *fyne.Container {
116 | bg := createShadowRectangle(color.NRGBA{R: 250, G: 251, B: 254, A: 255}, 8)
117 |
118 | titleLabel := canvas.NewText(title, color.NRGBA{R: 60, G: 80, B: 120, A: 255})
119 | titleLabel.TextSize = 16
120 | titleLabel.TextStyle = fyne.TextStyle{Bold: true}
121 |
122 | divider := canvas.NewRectangle(color.NRGBA{R: 230, G: 235, B: 240, A: 255})
123 | divider.SetMinSize(fyne.NewSize(0, 1))
124 |
125 | // 组合
126 | contentContainer := container.NewBorder(
127 | container.NewVBox(
128 | container.NewPadded(titleLabel),
129 | divider,
130 | ),
131 | nil, nil, nil,
132 | container.NewPadded(content),
133 | )
134 |
135 | shadow := canvas.NewRectangle(color.NRGBA{R: 0, G: 0, B: 0, A: 15})
136 | shadow.Move(fyne.NewPos(2, 2))
137 | shadow.SetMinSize(fyne.NewSize(contentContainer.Size().Width+4, contentContainer.Size().Height+4))
138 | shadow.CornerRadius = 8
139 |
140 | return container.NewStack(shadow, bg, contentContainer)
141 | }
142 |
143 | func StyledSelect(options []string, selected func(string)) *widget.Select {
144 | sel := widget.NewSelect(options, selected)
145 |
146 | // 针对包含"翻译后字幕"的选项增加宽度
147 | for _, option := range options {
148 | if len(option) > 8 {
149 |
150 | extraOptions := make([]string, len(options))
151 | copy(extraOptions, options)
152 |
153 | maxOption := ""
154 | for _, opt := range options {
155 | if len(opt) > len(maxOption) {
156 | maxOption = opt
157 | }
158 | }
159 |
160 | // 添加额外空格来扩展宽度
161 | padding := " "
162 | if len(maxOption) < 20 {
163 | maxOption = maxOption + padding
164 | }
165 |
166 | sel = widget.NewSelect(extraOptions, selected)
167 | break
168 | }
169 | }
170 |
171 | return sel
172 | }
173 |
174 | func StyledEntry(placeholder string) *widget.Entry {
175 | entry := widget.NewEntry()
176 | entry.SetPlaceHolder(placeholder)
177 | return entry
178 | }
179 |
180 | func StyledPasswordEntry(placeholder string) *widget.Entry {
181 | entry := widget.NewPasswordEntry()
182 | entry.SetPlaceHolder(placeholder)
183 | return entry
184 | }
185 |
186 | func DividedContainer(vertical bool, items ...fyne.CanvasObject) *fyne.Container {
187 | if len(items) <= 1 {
188 | if len(items) == 1 {
189 | return container.NewPadded(items[0])
190 | }
191 | return container.NewPadded()
192 | }
193 |
194 | var dividers []fyne.CanvasObject
195 | for i := 0; i < len(items)-1; i++ {
196 | dividers = append(dividers, createDivider(vertical))
197 | }
198 |
199 | var objects []fyne.CanvasObject
200 | for i, item := range items {
201 | objects = append(objects, item)
202 | if i < len(dividers) {
203 | objects = append(objects, dividers[i])
204 | }
205 | }
206 |
207 | if vertical {
208 | return container.New(layout.NewVBoxLayout(), objects...)
209 | }
210 | return container.New(layout.NewHBoxLayout(), objects...)
211 | }
212 |
213 | func createDivider(vertical bool) fyne.CanvasObject {
214 | divider := canvas.NewRectangle(color.NRGBA{R: 210, G: 220, B: 240, A: 255})
215 | if vertical {
216 | divider.SetMinSize(fyne.NewSize(0, 1))
217 | } else {
218 | divider.SetMinSize(fyne.NewSize(1, 0))
219 | }
220 | return divider
221 | }
222 |
223 | func ProgressWithLabel(initial float64) (*widget.ProgressBar, *widget.Label, *fyne.Container) {
224 | progress := widget.NewProgressBar()
225 | progress.SetValue(initial)
226 |
227 | label := widget.NewLabel("0%")
228 |
229 | container := container.NewBorder(nil, nil, nil, label, progress)
230 |
231 | return progress, label, container
232 | }
233 |
234 | // UpdateProgressLabel 更新进度条标签
235 | func UpdateProgressLabel(progress *widget.ProgressBar, label *widget.Label) {
236 | percentage := int(progress.Value * 100)
237 | label.SetText(fmt.Sprintf("%d%%", percentage))
238 | }
239 |
240 | func AnimatedContainer() *fyne.Container {
241 | return container.NewStack()
242 | }
243 |
244 | func SwitchContent(container *fyne.Container, content fyne.CanvasObject, duration time.Duration) {
245 | if container == nil || content == nil {
246 | return
247 | }
248 |
249 | if len(container.Objects) > 0 {
250 | oldContent := container.Objects[0]
251 | FadeAnimation(oldContent, duration/2, 1.0, 0.0)
252 |
253 | go func() {
254 | defer func() {
255 | if r := recover(); r != nil {
256 | fmt.Println("内容切换时发生错误:", r)
257 | }
258 | }()
259 |
260 | time.Sleep(duration / 2)
261 | container.Objects = []fyne.CanvasObject{content}
262 | container.Refresh()
263 | FadeAnimation(content, duration/2, 0.0, 1.0)
264 | }()
265 | } else {
266 | container.Objects = []fyne.CanvasObject{content}
267 | container.Refresh()
268 | FadeAnimation(content, duration/2, 0.0, 1.0)
269 | }
270 | }
271 |
--------------------------------------------------------------------------------
/internal/desktop/desktop.go:
--------------------------------------------------------------------------------
1 | package desktop
2 |
3 | import (
4 | "image/color"
5 | "time"
6 |
7 | "fyne.io/fyne/v2"
8 | "fyne.io/fyne/v2/app"
9 | "fyne.io/fyne/v2/canvas"
10 | "fyne.io/fyne/v2/container"
11 | "fyne.io/fyne/v2/layout"
12 | "fyne.io/fyne/v2/theme"
13 | "fyne.io/fyne/v2/widget"
14 | )
15 |
16 | func createNavButton(text string, icon fyne.Resource, isSelected bool, onTap func()) *widget.Button {
17 | btn := widget.NewButtonWithIcon(text, icon, onTap)
18 |
19 | // 根据选中状态设置颜色
20 | if isSelected {
21 | btn.Importance = widget.HighImportance
22 | } else {
23 | btn.Importance = widget.LowImportance
24 | }
25 |
26 | return btn
27 | }
28 |
29 | // Show 展示桌面
30 | func Show() {
31 | myApp := app.New()
32 |
33 | // 自定义主题
34 | myApp.Settings().SetTheme(NewCustomTheme(false))
35 |
36 | myWindow := myApp.NewWindow("Krillin AI")
37 |
38 | logoContainer := container.NewVBox()
39 |
40 | logo := canvas.NewText("Krillin AI", color.NRGBA{R: 88, G: 157, B: 246, A: 255})
41 | logo.TextSize = 28
42 | logo.TextStyle = fyne.TextStyle{Bold: true}
43 | logo.Alignment = fyne.TextAlignCenter
44 |
45 | separator := canvas.NewRectangle(color.NRGBA{R: 210, G: 225, B: 245, A: 255})
46 | separator.SetMinSize(fyne.NewSize(0, 2))
47 |
48 | slogan := canvas.NewText("智能内容创作助手", color.NRGBA{R: 100, G: 120, B: 160, A: 255})
49 | slogan.TextSize = 12
50 | slogan.Alignment = fyne.TextAlignCenter
51 |
52 | logoContainer.Add(logo)
53 | logoContainer.Add(separator)
54 | logoContainer.Add(slogan)
55 |
56 | // 创建左侧导航栏
57 | navItems := []string{"工作台 Workbench", "配置 Config"}
58 | navIcons := []fyne.Resource{theme.DocumentIcon(), theme.SettingsIcon()}
59 |
60 | // 存储导航按钮列表
61 | var navButtons []*widget.Button
62 | navContainer := container.NewVBox()
63 |
64 | // 创建内容区域,使用Stack容器来叠放多个内容
65 | contentStack := container.NewStack()
66 |
67 | // 预先创建两个tab的内容
68 | workbenchContent := CreateSubtitleTab(myWindow)
69 | configContent := CreateConfigTab(myWindow)
70 |
71 | // 默认显示工作台内容
72 | contentStack.Add(workbenchContent)
73 | contentStack.Add(configContent)
74 |
75 | configContent.Hide()
76 |
77 | currentSelectedIndex := 0
78 |
79 | // 创建导航项
80 | for i, item := range navItems {
81 | index := i // 捕获变量
82 | isSelected := (i == currentSelectedIndex)
83 |
84 | // 创建导航按钮以及点击处理函数
85 | navBtn := createNavButton(item, navIcons[i], isSelected, func() {
86 | // 如果已经是当前选中项,不做任何操作
87 | if currentSelectedIndex == index {
88 | return
89 | }
90 |
91 | // 更新所有导航项的状态
92 | for j, btn := range navButtons {
93 | if j == index {
94 | btn.Importance = widget.HighImportance
95 | } else {
96 | btn.Importance = widget.LowImportance
97 | }
98 | }
99 |
100 | // 更新当前选中的索引
101 | currentSelectedIndex = index
102 |
103 | navContainer.Refresh()
104 |
105 | if index == 0 {
106 | workbenchContent.Show()
107 | configContent.Hide()
108 | // 确保进度条和下载区域状态正确显示
109 | workbenchContent.Refresh()
110 | FadeAnimation(workbenchContent, 300*time.Millisecond, 0.0, 1.0)
111 | } else {
112 | workbenchContent.Hide()
113 | configContent.Show()
114 | FadeAnimation(configContent, 300*time.Millisecond, 0.0, 1.0)
115 | }
116 |
117 | contentStack.Refresh()
118 | })
119 |
120 | // 将导航按钮添加到列表和容器中
121 | navButtons = append(navButtons, navBtn)
122 | navContainer.Add(container.NewPadded(navBtn))
123 | }
124 |
125 | navBackground := canvas.NewRectangle(color.NRGBA{R: 250, G: 251, B: 254, A: 255})
126 |
127 | navWithBackground := container.NewStack(
128 | navBackground,
129 | container.NewBorder(
130 | container.NewPadded(logoContainer),
131 | nil, nil, nil,
132 | container.NewPadded(navContainer),
133 | ),
134 | )
135 |
136 | // 主布局
137 | split := container.NewHSplit(navWithBackground, container.NewPadded(contentStack))
138 | split.SetOffset(0.2)
139 |
140 | mainContainer := container.NewPadded(split)
141 |
142 | // 底部状态栏
143 | statusText := canvas.NewText("就绪", color.NRGBA{R: 100, G: 120, B: 160, A: 180})
144 | statusText.TextSize = 12
145 | statusBar := container.NewHBox(
146 | layout.NewSpacer(),
147 | statusText,
148 | )
149 |
150 | finalContainer := container.NewBorder(nil, container.NewPadded(statusBar), nil, nil, mainContainer)
151 |
152 | myWindow.SetContent(finalContainer)
153 | myWindow.Resize(fyne.NewSize(1000, 700))
154 | myWindow.CenterOnScreen()
155 | myWindow.ShowAndRun()
156 | }
157 |
--------------------------------------------------------------------------------
/internal/desktop/file.go:
--------------------------------------------------------------------------------
1 | package desktop
2 |
3 | import (
4 | "bytes"
5 | "encoding/json"
6 | "fmt"
7 | "io"
8 | "mime/multipart"
9 | "net/http"
10 | "os"
11 | "path/filepath"
12 |
13 | "fyne.io/fyne/v2"
14 | "fyne.io/fyne/v2/dialog"
15 | )
16 |
17 | type FileManager struct {
18 | window fyne.Window
19 | files []string
20 | }
21 |
22 | func NewFileManager(window fyne.Window) *FileManager {
23 | return &FileManager{
24 | window: window,
25 | files: make([]string, 0),
26 | }
27 | }
28 |
29 | func (fm *FileManager) ShowUploadDialog() {
30 | fd := dialog.NewFileOpen(func(reader fyne.URIReadCloser, err error) {
31 | if err != nil {
32 | dialog.ShowError(err, fm.window)
33 | return
34 | }
35 | if reader == nil {
36 | return
37 | }
38 |
39 | // 获取文件路径
40 | filePath := reader.URI().Path()
41 | fileName := filepath.Base(filePath)
42 |
43 | err = fm.uploadFile(filePath, fileName)
44 | if err != nil {
45 | dialog.ShowError(err, fm.window)
46 | return
47 | }
48 |
49 | dialog.ShowInformation("成功", "文件上传成功", fm.window)
50 | }, fm.window)
51 |
52 | fd.Show()
53 | }
54 |
55 | func (fm *FileManager) uploadFile(filePath, fileName string) error {
56 | file, err := os.Open(filePath)
57 | if err != nil {
58 | return err
59 | }
60 | defer file.Close()
61 |
62 | // 创建multipart form
63 | body := &bytes.Buffer{}
64 | writer := multipart.NewWriter(body)
65 | part, err := writer.CreateFormFile("file", fileName)
66 | if err != nil {
67 | return err
68 | }
69 | _, err = io.Copy(part, file)
70 | if err != nil {
71 | return err
72 | }
73 | writer.Close()
74 |
75 | // 发送请求
76 | resp, err := http.Post("http://localhost:8888/api/file", writer.FormDataContentType(), body)
77 | if err != nil {
78 | return err
79 | }
80 | defer resp.Body.Close()
81 |
82 | var result struct {
83 | Error int `json:"error"`
84 | Msg string `json:"msg"`
85 | Data struct {
86 | FilePath string `json:"file_path"`
87 | } `json:"data"`
88 | }
89 |
90 | err = json.NewDecoder(resp.Body).Decode(&result)
91 | if err != nil {
92 | return err
93 | }
94 |
95 | if result.Error != 0 && result.Error != 200 {
96 | return fmt.Errorf(result.Msg)
97 | }
98 |
99 | fm.files = append(fm.files, result.Data.FilePath)
100 | return nil
101 | }
102 |
103 | func (fm *FileManager) GetFileCount() int {
104 | return len(fm.files)
105 | }
106 |
107 | func (fm *FileManager) GetFileName(index int) string {
108 | if index < 0 || index >= len(fm.files) {
109 | return ""
110 | }
111 | return filepath.Base(fm.files[index])
112 | }
113 |
114 | func (fm *FileManager) DownloadFile(index int) {
115 | if index < 0 || index >= len(fm.files) {
116 | return
117 | }
118 |
119 | filePath := fm.files[index]
120 |
121 | dialog.ShowFileSave(func(writer fyne.URIWriteCloser, err error) {
122 | if err != nil {
123 | dialog.ShowError(err, fm.window)
124 | return
125 | }
126 | if writer == nil {
127 | return
128 | }
129 |
130 | resp, err := http.Get("http://localhost:8888" + filePath)
131 | if err != nil {
132 | dialog.ShowError(err, fm.window)
133 | return
134 | }
135 | defer resp.Body.Close()
136 |
137 | _, err = io.Copy(writer, resp.Body)
138 | if err != nil {
139 | dialog.ShowError(err, fm.window)
140 | return
141 | }
142 |
143 | writer.Close()
144 | dialog.ShowInformation("成功", "文件下载完成", fm.window)
145 | }, fm.window)
146 | }
147 |
--------------------------------------------------------------------------------
/internal/desktop/theme.go:
--------------------------------------------------------------------------------
1 | package desktop
2 |
3 | import (
4 | "image/color"
5 |
6 | "fyne.io/fyne/v2"
7 | "fyne.io/fyne/v2/theme"
8 | )
9 |
10 | // customTheme 自定义主题
11 | type customTheme struct {
12 | baseTheme fyne.Theme
13 | forceDark bool
14 | }
15 |
16 | func NewCustomTheme(forceDark bool) fyne.Theme {
17 | if forceDark {
18 | return &customTheme{baseTheme: theme.DefaultTheme(), forceDark: true}
19 | }
20 | return &customTheme{baseTheme: theme.DefaultTheme(), forceDark: false}
21 | }
22 |
23 | func (t *customTheme) Color(name fyne.ThemeColorName, variant fyne.ThemeVariant) color.Color {
24 | if t.forceDark || variant == theme.VariantDark {
25 | return t.darkColors(name)
26 | }
27 | return t.lightColors(name)
28 | }
29 |
30 | // lightColors 浅色主题配色方案
31 | func (t *customTheme) lightColors(name fyne.ThemeColorName) color.Color {
32 | switch name {
33 | // 主色系
34 | case theme.ColorNamePrimary:
35 | return color.NRGBA{R: 100, G: 150, B: 240, A: 255}
36 |
37 | // 背景与前景
38 | case theme.ColorNameBackground:
39 | return color.NRGBA{R: 248, G: 249, B: 252, A: 255} // 极浅灰背景
40 | case theme.ColorNameForeground:
41 | return color.NRGBA{R: 30, G: 35, B: 45, A: 255} // 深灰文字
42 | case theme.ColorNameDisabled:
43 | return color.NRGBA{R: 180, G: 185, B: 190, A: 150} // 柔和禁用色
44 |
45 | // 按钮状态
46 | case theme.ColorNameButton:
47 | return color.NRGBA{R: 70, G: 130, B: 230, A: 255}
48 | case theme.ColorNameHover:
49 | return color.NRGBA{R: 90, G: 150, B: 240, A: 255} // 浅蓝悬停
50 | case theme.ColorNamePressed:
51 | return color.NRGBA{R: 50, G: 110, B: 210, A: 255} // 深蓝按下
52 |
53 | // 输入组件
54 | case theme.ColorNameInputBackground:
55 | return color.NRGBA{R: 255, G: 255, B: 255, A: 255} // 纯白输入框
56 | case theme.ColorNameInputBorder:
57 | return color.NRGBA{R: 210, G: 215, B: 220, A: 255} // 浅灰边框
58 | case theme.ColorNamePlaceHolder:
59 | return color.NRGBA{R: 160, G: 165, B: 170, A: 200} // 灰占位符
60 |
61 | // 其他
62 | case theme.ColorNameSelection:
63 | return color.NRGBA{R: 200, G: 225, B: 255, A: 180} // 淡蓝选中
64 | case theme.ColorNameScrollBar:
65 | return color.NRGBA{R: 200, G: 205, B: 210, A: 200}
66 | case theme.ColorNameShadow:
67 | return color.NRGBA{R: 0, G: 0, B: 0, A: 25} // 柔和阴影
68 |
69 | // 状态色
70 | case theme.ColorNameError:
71 | return color.NRGBA{R: 230, G: 70, B: 70, A: 255} // 红色错误
72 | case theme.ColorNameWarning:
73 | return color.NRGBA{R: 245, G: 160, B: 50, A: 255} // 橙色警告
74 | case theme.ColorNameSuccess:
75 | return color.NRGBA{R: 60, G: 180, B: 120, A: 255} // 绿色成功
76 | case theme.ColorNameFocus:
77 | return color.NRGBA{R: 70, G: 130, B: 230, A: 100} // 半透明焦点
78 |
79 | default:
80 | return t.baseTheme.Color(name, theme.VariantLight)
81 | }
82 | }
83 |
84 | // darkColors 深色主题配色方案
85 | func (t *customTheme) darkColors(name fyne.ThemeColorName) color.Color {
86 | switch name {
87 | // 主色系
88 | case theme.ColorNamePrimary:
89 | return color.NRGBA{R: 90, G: 150, B: 250, A: 255} // 稍亮的蓝色
90 |
91 | // 背景与前景
92 | case theme.ColorNameBackground:
93 | return color.NRGBA{R: 20, G: 22, B: 30, A: 255} // 更深的灰蓝背景
94 | case theme.ColorNameForeground:
95 | return color.NRGBA{R: 230, G: 235, B: 240, A: 255} // 浅灰文字
96 | case theme.ColorNameDisabled:
97 | return color.NRGBA{R: 100, G: 105, B: 110, A: 150} // 深色禁用
98 |
99 | // 按钮状态
100 | case theme.ColorNameButton:
101 | return color.NRGBA{R: 50, G: 55, B: 65, A: 255} // 更深的按钮背景
102 | case theme.ColorNameHover:
103 | return color.NRGBA{R: 70, G: 75, B: 85, A: 255} // 浅灰悬停
104 | case theme.ColorNamePressed:
105 | return color.NRGBA{R: 30, G: 35, B: 45, A: 255} // 更深按下
106 |
107 | // 输入组件
108 | case theme.ColorNameInputBackground:
109 | return color.NRGBA{R: 35, G: 38, B: 48, A: 255} // 更深的输入框背景
110 | case theme.ColorNameInputBorder:
111 | return color.NRGBA{R: 60, G: 65, B: 75, A: 255} // 更深的边框
112 | case theme.ColorNamePlaceHolder:
113 | return color.NRGBA{R: 120, G: 125, B: 130, A: 200} // 灰占位符
114 |
115 | // 其他
116 | case theme.ColorNameSelection:
117 | return color.NRGBA{R: 70, G: 130, B: 230, A: 180} // 蓝色选中
118 | case theme.ColorNameScrollBar:
119 | return color.NRGBA{R: 60, G: 65, B: 75, A: 200} // 更深的滚动条
120 | case theme.ColorNameShadow:
121 | return color.NRGBA{R: 0, G: 0, B: 0, A: 50} // 深色阴影
122 |
123 | // 状态色(更鲜艳)
124 | case theme.ColorNameError:
125 | return color.NRGBA{R: 240, G: 80, B: 80, A: 255}
126 | case theme.ColorNameWarning:
127 | return color.NRGBA{R: 255, G: 170, B: 60, A: 255}
128 | case theme.ColorNameSuccess:
129 | return color.NRGBA{R: 70, G: 190, B: 130, A: 255}
130 | case theme.ColorNameFocus:
131 | return color.NRGBA{R: 80, G: 140, B: 240, A: 100}
132 |
133 | default:
134 | return t.baseTheme.Color(name, theme.VariantDark)
135 | }
136 | }
137 |
138 | // Icon 主题图标
139 | func (t *customTheme) Icon(name fyne.ThemeIconName) fyne.Resource {
140 | return t.baseTheme.Icon(name)
141 | }
142 |
143 | // Font 主题字体
144 | func (t *customTheme) Font(style fyne.TextStyle) fyne.Resource {
145 | return t.baseTheme.Font(style)
146 | }
147 |
148 | // Size 主题尺寸设置
149 | func (t *customTheme) Size(name fyne.ThemeSizeName) float32 {
150 | switch name {
151 | case theme.SizeNamePadding:
152 | return 10
153 | case theme.SizeNameInlineIcon:
154 | return 20
155 | case theme.SizeNameScrollBar:
156 | return 10
157 | case theme.SizeNameScrollBarSmall:
158 | return 4
159 | case theme.SizeNameSeparatorThickness:
160 | return 1
161 | case theme.SizeNameText:
162 | return 14
163 | case theme.SizeNameInputBorder:
164 | return 1.5
165 | case theme.SizeNameInputRadius:
166 | return 5
167 | default:
168 | return t.baseTheme.Size(name)
169 | }
170 | }
171 |
--------------------------------------------------------------------------------
/internal/dto/subtitle_task.go:
--------------------------------------------------------------------------------
1 | package dto
2 |
3 | type StartVideoSubtitleTaskReq struct {
4 | AppId uint32 `json:"app_id"`
5 | Url string `json:"url"`
6 | OriginLanguage string `json:"origin_lang"`
7 | TargetLang string `json:"target_lang"`
8 | Bilingual uint8 `json:"bilingual"`
9 | TranslationSubtitlePos uint8 `json:"translation_subtitle_pos"`
10 | ModalFilter uint8 `json:"modal_filter"`
11 | Tts uint8 `json:"tts"`
12 | TtsVoiceCode uint8 `json:"tts_voice_code"`
13 | TtsVoiceCloneSrcFileUrl string `json:"tts_voice_clone_src_file_url"`
14 | Replace []string `json:"replace"`
15 | Language string `json:"language"`
16 | EmbedSubtitleVideoType string `json:"embed_subtitle_video_type"`
17 | VerticalMajorTitle string `json:"vertical_major_title"`
18 | VerticalMinorTitle string `json:"vertical_minor_title"`
19 | OriginLanguageWordOneLine int `json:"origin_language_word_one_line"`
20 | }
21 |
22 | type StartVideoSubtitleTaskResData struct {
23 | TaskId string `json:"task_id"`
24 | }
25 |
26 | type StartVideoSubtitleTaskRes struct {
27 | Error int32 `json:"error"`
28 | Msg string `json:"msg"`
29 | Data *StartVideoSubtitleTaskResData `json:"data"`
30 | }
31 |
32 | type GetVideoSubtitleTaskReq struct {
33 | TaskId string `form:"taskId"`
34 | }
35 |
36 | type VideoInfo struct {
37 | Title string `json:"title"`
38 | Description string `json:"description"`
39 | TranslatedTitle string `json:"translated_title"`
40 | TranslatedDescription string `json:"translated_description"`
41 | Language string `json:"language"`
42 | }
43 |
44 | type SubtitleInfo struct {
45 | Name string `json:"name"`
46 | DownloadUrl string `json:"download_url"`
47 | }
48 |
49 | type GetVideoSubtitleTaskResData struct {
50 | TaskId string `json:"task_id"`
51 | ProcessPercent uint8 `json:"process_percent"`
52 | VideoInfo *VideoInfo `json:"video_info"`
53 | SubtitleInfo []*SubtitleInfo `json:"subtitle_info"`
54 | TargetLanguage string `json:"target_language"`
55 | SpeechDownloadUrl string `json:"speech_download_url"`
56 | }
57 |
58 | type GetVideoSubtitleTaskRes struct {
59 | Error int32 `json:"error"`
60 | Msg string `json:"msg"`
61 | Data *GetVideoSubtitleTaskResData `json:"data"`
62 | }
63 |
--------------------------------------------------------------------------------
/internal/handler/init.go:
--------------------------------------------------------------------------------
1 | package handler
2 |
3 | import "krillin-ai/internal/service"
4 |
5 | type Handler struct {
6 | Service *service.Service
7 | }
8 |
9 | func NewHandler() *Handler {
10 | return &Handler{
11 | Service: service.NewService(),
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/internal/handler/middleware.go:
--------------------------------------------------------------------------------
1 | package handler
2 |
--------------------------------------------------------------------------------
/internal/handler/subtitle_task.go:
--------------------------------------------------------------------------------
1 | package handler
2 |
3 | import (
4 | "github.com/gin-gonic/gin"
5 | "krillin-ai/internal/dto"
6 | "krillin-ai/internal/response"
7 | "os"
8 | "path/filepath"
9 | )
10 |
11 | func (h Handler) StartSubtitleTask(c *gin.Context) {
12 | var req dto.StartVideoSubtitleTaskReq
13 | if err := c.ShouldBindJSON(&req); err != nil {
14 | response.R(c, response.Response{
15 | Error: -1,
16 | Msg: "参数错误",
17 | Data: nil,
18 | })
19 | return
20 | }
21 |
22 | svc := h.Service
23 |
24 | data, err := svc.StartSubtitleTask(req)
25 | if err != nil {
26 | response.R(c, response.Response{
27 | Error: -1,
28 | Msg: err.Error(),
29 | Data: nil,
30 | })
31 | return
32 | }
33 | response.R(c, response.Response{
34 | Error: 0,
35 | Msg: "成功",
36 | Data: data,
37 | })
38 | }
39 |
40 | func (h Handler) GetSubtitleTask(c *gin.Context) {
41 | var req dto.GetVideoSubtitleTaskReq
42 | if err := c.ShouldBindQuery(&req); err != nil {
43 | response.R(c, response.Response{
44 | Error: -1,
45 | Msg: "参数错误",
46 | Data: nil,
47 | })
48 | return
49 | }
50 | svc := h.Service
51 | data, err := svc.GetTaskStatus(req)
52 | if err != nil {
53 | response.R(c, response.Response{
54 | Error: -1,
55 | Msg: err.Error(),
56 | Data: nil,
57 | })
58 | return
59 | }
60 | response.R(c, response.Response{
61 | Error: 0,
62 | Msg: "成功",
63 | Data: data,
64 | })
65 | }
66 |
67 | func (h Handler) UploadFile(c *gin.Context) {
68 | file, err := c.FormFile("file")
69 | if err != nil {
70 | response.R(c, response.Response{
71 | Error: -1,
72 | Msg: "未能获取文件",
73 | Data: nil,
74 | })
75 | return
76 | }
77 |
78 | savePath := "./uploads/" + file.Filename
79 | if err = c.SaveUploadedFile(file, savePath); err != nil {
80 | response.R(c, response.Response{
81 | Error: -1,
82 | Msg: "文件保存失败",
83 | Data: nil,
84 | })
85 | return
86 | }
87 |
88 | response.R(c, response.Response{
89 | Error: 0,
90 | Msg: "文件上传成功",
91 | Data: gin.H{"file_path": "local:" + savePath},
92 | })
93 | }
94 |
95 | func (h Handler) DownloadFile(c *gin.Context) {
96 | requestedFile := c.Param("filepath")
97 | if requestedFile == "" {
98 | response.R(c, response.Response{
99 | Error: -1,
100 | Msg: "文件路径为空",
101 | Data: nil,
102 | })
103 | return
104 | }
105 |
106 | localFilePath := filepath.Join(".", requestedFile)
107 | if _, err := os.Stat(localFilePath); os.IsNotExist(err) {
108 | response.R(c, response.Response{
109 | Error: -1,
110 | Msg: "文件不存在",
111 | Data: nil,
112 | })
113 | return
114 | }
115 | c.FileAttachment(localFilePath, filepath.Base(localFilePath))
116 | }
117 |
--------------------------------------------------------------------------------
/internal/response/response.go:
--------------------------------------------------------------------------------
1 | package response
2 |
3 | import "github.com/gin-gonic/gin"
4 |
5 | type Response struct {
6 | Error int32 `json:"error"`
7 | Msg string `json:"msg"`
8 | Data any `json:"data"`
9 | }
10 |
11 | func R(c *gin.Context, data any) {
12 | c.JSON(200, data)
13 | }
14 |
--------------------------------------------------------------------------------
/internal/router/router.go:
--------------------------------------------------------------------------------
1 | package router
2 |
3 | import (
4 | "krillin-ai/internal/handler"
5 | "krillin-ai/static"
6 | "net/http"
7 |
8 | "github.com/gin-gonic/gin"
9 | )
10 |
11 | func SetupRouter(r *gin.Engine) {
12 | api := r.Group("/api")
13 |
14 | hdl := handler.NewHandler()
15 | {
16 | api.POST("/capability/subtitleTask", hdl.StartSubtitleTask)
17 | api.GET("/capability/subtitleTask", hdl.GetSubtitleTask)
18 | api.POST("/file", hdl.UploadFile)
19 | api.GET("/file/*filepath", hdl.DownloadFile)
20 | }
21 |
22 | r.GET("/", func(c *gin.Context) {
23 | c.Redirect(http.StatusMovedPermanently, "/static")
24 | })
25 | r.StaticFS("/static", http.FS(static.EmbeddedFiles))
26 | }
27 |
--------------------------------------------------------------------------------
/internal/server/server.go:
--------------------------------------------------------------------------------
1 | package server
2 |
3 | import (
4 | "fmt"
5 | "krillin-ai/config"
6 | "krillin-ai/internal/router"
7 | "krillin-ai/log"
8 |
9 | "github.com/gin-gonic/gin"
10 | "go.uber.org/zap"
11 | )
12 |
13 | func StartBackend() error {
14 | gin.SetMode(gin.ReleaseMode)
15 | engine := gin.Default()
16 | router.SetupRouter(engine)
17 | log.GetLogger().Info("服务启动", zap.String("host", config.Conf.Server.Host), zap.Int("port", config.Conf.Server.Port))
18 | return engine.Run(fmt.Sprintf("%s:%d", config.Conf.Server.Host, config.Conf.Server.Port))
19 | }
20 |
--------------------------------------------------------------------------------
/internal/service/audio2subtitle_test.go:
--------------------------------------------------------------------------------
1 | package service
2 |
3 | import (
4 | "os"
5 | "testing"
6 | )
7 |
8 | func Test_isValidSplitContent(t *testing.T) {
9 | // 固定的测试文件路径
10 | splitContentFile := "g:\\bin\\AI\\tasks\\gdQRrtQP\\srt_no_ts_1.srt"
11 | originalTextFile := "g:\\bin\\AI\\tasks\\gdQRrtQP\\output\\origin_1.txt"
12 |
13 | // 读取分割内容文件
14 | splitContent, err := os.ReadFile(splitContentFile)
15 | if err != nil {
16 | t.Fatalf("读取分割内容文件失败: %v", err)
17 | }
18 |
19 | // 读取原始文本文件
20 | originalText, err := os.ReadFile(originalTextFile)
21 | if err != nil {
22 | t.Fatalf("读取原始文本文件失败: %v", err)
23 | }
24 |
25 | // 执行测试
26 | if got := isValidSplitContent(string(splitContent), string(originalText)); !got {
27 | t.Errorf("isValidSplitContent() = %v, want true", got)
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/internal/service/get_video_info.go:
--------------------------------------------------------------------------------
1 | package service
2 |
3 | import (
4 | "context"
5 | "fmt"
6 | "go.uber.org/zap"
7 | "krillin-ai/config"
8 | "krillin-ai/internal/storage"
9 | "krillin-ai/internal/types"
10 | "krillin-ai/log"
11 | "os/exec"
12 | "strings"
13 | )
14 |
15 | func (s Service) getVideoInfo(ctx context.Context, stepParam *types.SubtitleTaskStepParam) error {
16 | link := stepParam.Link
17 | if strings.Contains(link, "youtube.com") || strings.Contains(link, "bilibili.com") {
18 | var (
19 | err error
20 | title, description string
21 | )
22 | // 获取标题
23 | titleCmdArgs := []string{"--skip-download", "--encoding", "utf-8", "--get-title", stepParam.Link}
24 | descriptionCmdArgs := []string{"--skip-download", "--encoding", "utf-8", "--get-description", stepParam.Link}
25 | titleCmdArgs = append(titleCmdArgs, "--cookies", "./cookies.txt")
26 | descriptionCmdArgs = append(descriptionCmdArgs, "--cookies", "./cookies.txt")
27 | if config.Conf.App.Proxy != "" {
28 | titleCmdArgs = append(titleCmdArgs, "--proxy", config.Conf.App.Proxy)
29 | descriptionCmdArgs = append(descriptionCmdArgs, "--proxy", config.Conf.App.Proxy)
30 | }
31 | if storage.FfmpegPath != "ffmpeg" {
32 | titleCmdArgs = append(titleCmdArgs, "--ffmpeg-location", storage.FfmpegPath)
33 | descriptionCmdArgs = append(descriptionCmdArgs, "--ffmpeg-location", storage.FfmpegPath)
34 | }
35 | cmd := exec.Command(storage.YtdlpPath, titleCmdArgs...)
36 | var output []byte
37 | output, err = cmd.CombinedOutput()
38 | if err != nil {
39 | log.GetLogger().Error("getVideoInfo yt-dlp error", zap.Any("stepParam", stepParam), zap.String("output", string(output)), zap.Error(err))
40 | output = []byte{}
41 | // 不需要整个流程退出
42 | }
43 | title = string(output)
44 | cmd = exec.Command(storage.YtdlpPath, descriptionCmdArgs...)
45 | output, err = cmd.CombinedOutput()
46 | if err != nil {
47 | log.GetLogger().Error("getVideoInfo yt-dlp error", zap.Any("stepParam", stepParam), zap.String("output", string(output)), zap.Error(err))
48 | output = []byte{}
49 | }
50 | description = string(output)
51 | log.GetLogger().Debug("getVideoInfo title and description", zap.String("title", title), zap.String("description", description))
52 | // 翻译
53 | var result string
54 | result, err = s.ChatCompleter.ChatCompletion(fmt.Sprintf(types.TranslateVideoTitleAndDescriptionPrompt, types.GetStandardLanguageName(stepParam.TargetLanguage), title+"####"+description))
55 | if err != nil {
56 | log.GetLogger().Error("getVideoInfo openai chat completion error", zap.Any("stepParam", stepParam), zap.Error(err))
57 | }
58 | log.GetLogger().Debug("getVideoInfo translate video info result", zap.String("result", result))
59 |
60 | taskPtr := stepParam.TaskPtr
61 |
62 | taskPtr.Title = title
63 | taskPtr.Description = description
64 | taskPtr.OriginLanguage = string(stepParam.OriginLanguage)
65 | taskPtr.TargetLanguage = string(stepParam.TargetLanguage)
66 | taskPtr.ProcessPct = 10
67 | splitResult := strings.Split(result, "####")
68 | if len(splitResult) == 1 {
69 | taskPtr.TranslatedTitle = splitResult[0]
70 | } else if len(splitResult) == 2 {
71 | taskPtr.TranslatedTitle = splitResult[0]
72 | taskPtr.TranslatedDescription = splitResult[1]
73 | } else {
74 | log.GetLogger().Error("getVideoInfo translate video info error split result length != 1 and 2", zap.Any("stepParam", stepParam), zap.Any("translate result", result), zap.Error(err))
75 | }
76 | }
77 | return nil
78 | }
79 |
--------------------------------------------------------------------------------
/internal/service/init.go:
--------------------------------------------------------------------------------
1 | package service
2 |
3 | import (
4 | "krillin-ai/config"
5 | "krillin-ai/internal/types"
6 | "krillin-ai/log"
7 | "krillin-ai/pkg/aliyun"
8 | "krillin-ai/pkg/fasterwhisper"
9 | "krillin-ai/pkg/whispercpp"
10 | "krillin-ai/pkg/openai"
11 | "krillin-ai/pkg/whisper"
12 | "krillin-ai/pkg/whisperkit"
13 |
14 | "go.uber.org/zap"
15 | )
16 |
17 | type Service struct {
18 | Transcriber types.Transcriber
19 | ChatCompleter types.ChatCompleter
20 | TtsClient *aliyun.TtsClient
21 | OssClient *aliyun.OssClient
22 | VoiceCloneClient *aliyun.VoiceCloneClient
23 | }
24 |
25 | func NewService() *Service {
26 | var transcriber types.Transcriber
27 | var chatCompleter types.ChatCompleter
28 |
29 | switch config.Conf.App.TranscribeProvider {
30 | case "openai":
31 | transcriber = whisper.NewClient(config.Conf.Openai.Whisper.BaseUrl, config.Conf.Openai.Whisper.ApiKey, config.Conf.App.Proxy)
32 | case "aliyun":
33 | transcriber = aliyun.NewAsrClient(config.Conf.Aliyun.Bailian.ApiKey)
34 | case "fasterwhisper":
35 | transcriber = fasterwhisper.NewFastwhisperProcessor(config.Conf.LocalModel.Fasterwhisper)
36 | case "whispercpp":
37 | transcriber = whispercpp.NewWhispercppProcessor(config.Conf.LocalModel.Whispercpp)
38 | case "whisperkit":
39 | transcriber = whisperkit.NewWhisperKitProcessor(config.Conf.LocalModel.Whisperkit)
40 | }
41 | log.GetLogger().Info("当前选择的转录源: ", zap.String("transcriber", config.Conf.App.TranscribeProvider))
42 |
43 | switch config.Conf.App.LlmProvider {
44 | case "openai":
45 | chatCompleter = openai.NewClient(config.Conf.Openai.BaseUrl, config.Conf.Openai.ApiKey, config.Conf.App.Proxy)
46 | case "aliyun":
47 | chatCompleter = aliyun.NewChatClient(config.Conf.Aliyun.Bailian.ApiKey)
48 | }
49 | log.GetLogger().Info("当前选择的LLM源: ", zap.String("llm", config.Conf.App.LlmProvider))
50 |
51 | return &Service{
52 | Transcriber: transcriber,
53 | ChatCompleter: chatCompleter,
54 | TtsClient: aliyun.NewTtsClient(config.Conf.Aliyun.Speech.AccessKeyId, config.Conf.Aliyun.Speech.AccessKeySecret, config.Conf.Aliyun.Speech.AppKey),
55 | OssClient: aliyun.NewOssClient(config.Conf.Aliyun.Oss.AccessKeyId, config.Conf.Aliyun.Oss.AccessKeySecret, config.Conf.Aliyun.Oss.Bucket),
56 | VoiceCloneClient: aliyun.NewVoiceCloneClient(config.Conf.Aliyun.Speech.AccessKeyId, config.Conf.Aliyun.Speech.AccessKeySecret, config.Conf.Aliyun.Speech.AppKey),
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/internal/service/link2file.go:
--------------------------------------------------------------------------------
1 | package service
2 |
3 | import (
4 | "context"
5 | "errors"
6 | "fmt"
7 | "go.uber.org/zap"
8 | "krillin-ai/config"
9 | "krillin-ai/internal/storage"
10 | "krillin-ai/internal/types"
11 | "krillin-ai/log"
12 | "krillin-ai/pkg/util"
13 | "os/exec"
14 | "strings"
15 | )
16 |
17 | func (s Service) linkToFile(ctx context.Context, stepParam *types.SubtitleTaskStepParam) error {
18 | var (
19 | err error
20 | output []byte
21 | )
22 | link := stepParam.Link
23 | audioPath := fmt.Sprintf("%s/%s", stepParam.TaskBasePath, types.SubtitleTaskAudioFileName)
24 | videoPath := fmt.Sprintf("%s/%s", stepParam.TaskBasePath, types.SubtitleTaskVideoFileName)
25 | stepParam.TaskPtr.ProcessPct = 3
26 | if strings.Contains(link, "local:") {
27 | // 本地文件
28 | videoPath = strings.ReplaceAll(link, "local:", "")
29 | stepParam.InputVideoPath = videoPath
30 | cmd := exec.Command(storage.FfmpegPath, "-i", videoPath, "-vn", "-ar", "44100", "-ac", "2", "-ab", "192k", "-f", "mp3", audioPath)
31 | output, err = cmd.CombinedOutput()
32 | if err != nil {
33 | log.GetLogger().Error("generateAudioSubtitles.linkToFile ffmpeg error", zap.Any("step param", stepParam), zap.String("output", string(output)), zap.Error(err))
34 | return fmt.Errorf("generateAudioSubtitles.linkToFile ffmpeg error: %w", err)
35 | }
36 | } else if strings.Contains(link, "youtube.com") {
37 | var videoId string
38 | videoId, err = util.GetYouTubeID(link)
39 | if err != nil {
40 | log.GetLogger().Error("linkToFile.GetYouTubeID error", zap.Any("step param", stepParam), zap.Error(err))
41 | return fmt.Errorf("linkToFile.GetYouTubeID error: %w", err)
42 | }
43 | stepParam.Link = "https://www.youtube.com/watch?v=" + videoId
44 | cmdArgs := []string{"-f", "bestaudio", "--extract-audio", "--audio-format", "mp3", "--audio-quality", "192K", "-o", audioPath, stepParam.Link}
45 | if config.Conf.App.Proxy != "" {
46 | cmdArgs = append(cmdArgs, "--proxy", config.Conf.App.Proxy)
47 | }
48 | cmdArgs = append(cmdArgs, "--cookies", "./cookies.txt")
49 | if storage.FfmpegPath != "ffmpeg" {
50 | cmdArgs = append(cmdArgs, "--ffmpeg-location", storage.FfmpegPath)
51 | }
52 | cmd := exec.Command(storage.YtdlpPath, cmdArgs...)
53 | output, err = cmd.CombinedOutput()
54 | if err != nil {
55 | log.GetLogger().Error("linkToFile download audio yt-dlp error", zap.Any("step param", stepParam), zap.String("output", string(output)), zap.Error(err))
56 | return fmt.Errorf("linkToFile download audio yt-dlp error: %w", err)
57 | }
58 | } else if strings.Contains(link, "bilibili.com") {
59 | videoId := util.GetBilibiliVideoId(link)
60 | if videoId == "" {
61 | return errors.New("linkToFile error: invalid link")
62 | }
63 | stepParam.Link = "https://www.bilibili.com/video/" + videoId
64 | cmdArgs := []string{"-f", "bestaudio[ext=m4a]", "-x", "--audio-format", "mp3", "-o", audioPath, stepParam.Link}
65 | if config.Conf.App.Proxy != "" {
66 | cmdArgs = append(cmdArgs, "--proxy", config.Conf.App.Proxy)
67 | }
68 | if storage.FfmpegPath != "ffmpeg" {
69 | cmdArgs = append(cmdArgs, "--ffmpeg-location", storage.FfmpegPath)
70 | }
71 | cmd := exec.Command(storage.YtdlpPath, cmdArgs...)
72 | output, err = cmd.CombinedOutput()
73 | if err != nil {
74 | log.GetLogger().Error("linkToFile download audio yt-dlp error", zap.Any("step param", stepParam), zap.String("output", string(output)), zap.Error(err))
75 | return fmt.Errorf("linkToFile download audio yt-dlp error: %w", err)
76 | }
77 | } else {
78 | log.GetLogger().Info("linkToFile.unsupported link type", zap.Any("step param", stepParam))
79 | return errors.New("linkToFile error: unsupported link, only support youtube, bilibili and local file")
80 | }
81 | stepParam.TaskPtr.ProcessPct = 6
82 | stepParam.AudioFilePath = audioPath
83 |
84 | if !strings.HasPrefix(link, "local:") && stepParam.EmbedSubtitleVideoType != "none" {
85 | // 需要下载原视频
86 | cmdArgs := []string{"-f", "bestvideo[height<=1080][ext=mp4]+bestaudio[ext=m4a]/bestvideo[height<=720][ext=mp4]+bestaudio[ext=m4a]/bestvideo[height<=480][ext=mp4]+bestaudio[ext=m4a]", "-o", videoPath, stepParam.Link}
87 | if config.Conf.App.Proxy != "" {
88 | cmdArgs = append(cmdArgs, "--proxy", config.Conf.App.Proxy)
89 | }
90 | if storage.FfmpegPath != "" {
91 | cmdArgs = append(cmdArgs, "--ffmpeg-location", storage.FfmpegPath)
92 | }
93 | cmd := exec.Command(storage.YtdlpPath, cmdArgs...)
94 | output, err = cmd.CombinedOutput()
95 | if err != nil {
96 | log.GetLogger().Error("linkToFile download video yt-dlp error", zap.Any("step param", stepParam), zap.String("output", string(output)), zap.Error(err))
97 | return fmt.Errorf("linkToFile download video yt-dlp error: %w", err)
98 | }
99 | stepParam.InputVideoPath = videoPath
100 | }
101 |
102 | // 更新字幕任务信息
103 | stepParam.TaskPtr.ProcessPct = 10
104 | return nil
105 | }
106 |
--------------------------------------------------------------------------------
/internal/service/subtitle_service.go:
--------------------------------------------------------------------------------
1 | package service
2 |
3 | import (
4 | "context"
5 | "errors"
6 | "fmt"
7 | "github.com/samber/lo"
8 | "go.uber.org/zap"
9 | "krillin-ai/internal/dto"
10 | "krillin-ai/internal/storage"
11 | "krillin-ai/internal/types"
12 | "krillin-ai/log"
13 | "krillin-ai/pkg/util"
14 | "os"
15 | "path/filepath"
16 | "runtime"
17 | "strings"
18 | )
19 |
20 | func (s Service) StartSubtitleTask(req dto.StartVideoSubtitleTaskReq) (*dto.StartVideoSubtitleTaskResData, error) {
21 | // 校验链接
22 | if strings.Contains(req.Url, "youtube.com") {
23 | videoId, _ := util.GetYouTubeID(req.Url)
24 | if videoId == "" {
25 | return nil, fmt.Errorf("链接不合法")
26 | }
27 | }
28 | if strings.Contains(req.Url, "bilibili.com") {
29 | videoId := util.GetBilibiliVideoId(req.Url)
30 | if videoId == "" {
31 | return nil, fmt.Errorf("链接不合法")
32 | }
33 | }
34 | // 生成任务id
35 | taskId := util.GenerateRandStringWithUpperLowerNum(8)
36 | // 构造任务所需参数
37 | var resultType types.SubtitleResultType
38 | // 根据入参选项确定要返回的字幕类型
39 | if req.TargetLang == "none" {
40 | resultType = types.SubtitleResultTypeOriginOnly
41 | } else {
42 | if req.Bilingual == types.SubtitleTaskBilingualYes {
43 | if req.TranslationSubtitlePos == types.SubtitleTaskTranslationSubtitlePosTop {
44 | resultType = types.SubtitleResultTypeBilingualTranslationOnTop
45 | } else {
46 | resultType = types.SubtitleResultTypeBilingualTranslationOnBottom
47 | }
48 | } else {
49 | resultType = types.SubtitleResultTypeTargetOnly
50 | }
51 | }
52 | // 文字替换map
53 | replaceWordsMap := make(map[string]string)
54 | if len(req.Replace) > 0 {
55 | for _, replace := range req.Replace {
56 | beforeAfter := strings.Split(replace, "|")
57 | if len(beforeAfter) == 2 {
58 | replaceWordsMap[beforeAfter[0]] = beforeAfter[1]
59 | } else {
60 | log.GetLogger().Info("generateAudioSubtitles replace param length err", zap.Any("replace", replace), zap.Any("taskId", taskId))
61 | }
62 | }
63 | }
64 | var err error
65 | ctx := context.Background()
66 | // 创建字幕任务文件夹
67 | taskBasePath := filepath.Join("./tasks", taskId)
68 | if _, err = os.Stat(taskBasePath); os.IsNotExist(err) {
69 | // 不存在则创建
70 | err = os.MkdirAll(filepath.Join(taskBasePath, "output"), os.ModePerm)
71 | if err != nil {
72 | log.GetLogger().Error("StartVideoSubtitleTask MkdirAll err", zap.Any("req", req), zap.Error(err))
73 | }
74 | }
75 |
76 | // 创建任务
77 | taskPtr := &types.SubtitleTask{
78 | TaskId: taskId,
79 | VideoSrc: req.Url,
80 | Status: types.SubtitleTaskStatusProcessing,
81 | }
82 | storage.SubtitleTasks.Store(taskId, taskPtr)
83 |
84 | var ttsVoiceCode string
85 | if req.TtsVoiceCode == types.SubtitleTaskTtsVoiceCodeLongyu {
86 | ttsVoiceCode = "longyu"
87 | } else {
88 | ttsVoiceCode = "longchen"
89 | }
90 |
91 | // 处理声音克隆源
92 | var voiceCloneAudioUrl string
93 | if req.TtsVoiceCloneSrcFileUrl != "" {
94 | localFileUrl := strings.TrimPrefix(req.TtsVoiceCloneSrcFileUrl, "local:")
95 | fileKey := util.GenerateRandStringWithUpperLowerNum(5) + filepath.Ext(localFileUrl) // 防止url encode的问题,这里统一处理
96 | err = s.OssClient.UploadFile(context.Background(), fileKey, localFileUrl, s.OssClient.Bucket)
97 | if err != nil {
98 | log.GetLogger().Error("StartVideoSubtitleTask UploadFile err", zap.Any("req", req), zap.Error(err))
99 | return nil, errors.New("上传声音克隆源失败")
100 | }
101 | voiceCloneAudioUrl = fmt.Sprintf("https://%s.oss-cn-shanghai.aliyuncs.com/%s", s.OssClient.Bucket, fileKey)
102 | log.GetLogger().Info("StartVideoSubtitleTask 上传声音克隆源成功", zap.Any("oss url", voiceCloneAudioUrl))
103 | }
104 |
105 | stepParam := types.SubtitleTaskStepParam{
106 | TaskId: taskId,
107 | TaskPtr: taskPtr,
108 | TaskBasePath: taskBasePath,
109 | Link: req.Url,
110 | SubtitleResultType: resultType,
111 | EnableModalFilter: req.ModalFilter == types.SubtitleTaskModalFilterYes,
112 | EnableTts: req.Tts == types.SubtitleTaskTtsYes,
113 | TtsVoiceCode: ttsVoiceCode,
114 | VoiceCloneAudioUrl: voiceCloneAudioUrl,
115 | ReplaceWordsMap: replaceWordsMap,
116 | OriginLanguage: types.StandardLanguageCode(req.OriginLanguage),
117 | TargetLanguage: types.StandardLanguageCode(req.TargetLang),
118 | UserUILanguage: types.StandardLanguageCode(req.Language),
119 | EmbedSubtitleVideoType: req.EmbedSubtitleVideoType,
120 | VerticalVideoMajorTitle: req.VerticalMajorTitle,
121 | VerticalVideoMinorTitle: req.VerticalMinorTitle,
122 | MaxWordOneLine: 12, // 默认值
123 | }
124 | if req.OriginLanguageWordOneLine != 0 {
125 | stepParam.MaxWordOneLine = req.OriginLanguageWordOneLine
126 | }
127 |
128 | log.GetLogger().Info("current task info", zap.String("taskId", taskId), zap.Any("param", stepParam))
129 |
130 | go func() {
131 | defer func() {
132 | if r := recover(); r != nil {
133 | const size = 64 << 10
134 | buf := make([]byte, size)
135 | buf = buf[:runtime.Stack(buf, false)]
136 | log.GetLogger().Error("autoVideoSubtitle panic", zap.Any("panic:", r), zap.Any("stack:", buf))
137 | stepParam.TaskPtr.Status = types.SubtitleTaskStatusFailed
138 | }
139 | }()
140 | // 新版流程:链接->本地音频文件->视频信息获取(若有)->本地字幕文件->语言合成->视频合成->字幕文件链接生成
141 | log.GetLogger().Info("video subtitle start task", zap.String("taskId", taskId))
142 | err = s.linkToFile(ctx, &stepParam)
143 | if err != nil {
144 | log.GetLogger().Error("StartVideoSubtitleTask linkToFile err", zap.Any("req", req), zap.Error(err))
145 | stepParam.TaskPtr.Status = types.SubtitleTaskStatusFailed
146 | stepParam.TaskPtr.FailReason = err.Error()
147 | return
148 | }
149 | // 暂时不加视频信息
150 | //err = s.getVideoInfo(ctx, &stepParam)
151 | //if err != nil {
152 | // log.GetLogger().Error("StartVideoSubtitleTask getVideoInfo err", zap.Any("req", req), zap.Error(err))
153 | // stepParam.TaskPtr.Status = types.SubtitleTaskStatusFailed
154 | // stepParam.TaskPtr.FailReason = "get video info error"
155 | // return
156 | //}
157 | err = s.audioToSubtitle(ctx, &stepParam)
158 | if err != nil {
159 | log.GetLogger().Error("StartVideoSubtitleTask audioToSubtitle err", zap.Any("req", req), zap.Error(err))
160 | stepParam.TaskPtr.Status = types.SubtitleTaskStatusFailed
161 | stepParam.TaskPtr.FailReason = err.Error()
162 | return
163 | }
164 | err = s.srtFileToSpeech(ctx, &stepParam)
165 | if err != nil {
166 | log.GetLogger().Error("StartVideoSubtitleTask srtFileToSpeech err", zap.Any("req", req), zap.Error(err))
167 | stepParam.TaskPtr.Status = types.SubtitleTaskStatusFailed
168 | stepParam.TaskPtr.FailReason = err.Error()
169 | return
170 | }
171 | err = s.embedSubtitles(ctx, &stepParam)
172 | if err != nil {
173 | log.GetLogger().Error("StartVideoSubtitleTask embedSubtitles err", zap.Any("req", req), zap.Error(err))
174 | stepParam.TaskPtr.Status = types.SubtitleTaskStatusFailed
175 | stepParam.TaskPtr.FailReason = err.Error()
176 | return
177 | }
178 | err = s.uploadSubtitles(ctx, &stepParam)
179 | if err != nil {
180 | log.GetLogger().Error("StartVideoSubtitleTask uploadSubtitles err", zap.Any("req", req), zap.Error(err))
181 | stepParam.TaskPtr.Status = types.SubtitleTaskStatusFailed
182 | stepParam.TaskPtr.FailReason = err.Error()
183 | return
184 | }
185 |
186 | log.GetLogger().Info("video subtitle task end", zap.String("taskId", taskId))
187 | }()
188 |
189 | return &dto.StartVideoSubtitleTaskResData{
190 | TaskId: taskId,
191 | }, nil
192 | }
193 |
194 | func (s Service) GetTaskStatus(req dto.GetVideoSubtitleTaskReq) (*dto.GetVideoSubtitleTaskResData, error) {
195 | task, ok := storage.SubtitleTasks.Load(req.TaskId)
196 | if !ok || task == nil {
197 | return nil, errors.New("任务不存在")
198 | }
199 | taskPtr := task.(*types.SubtitleTask)
200 | if taskPtr.Status == types.SubtitleTaskStatusFailed {
201 | return nil, fmt.Errorf("任务失败,原因:%s", taskPtr.FailReason)
202 | }
203 | return &dto.GetVideoSubtitleTaskResData{
204 | TaskId: taskPtr.TaskId,
205 | ProcessPercent: taskPtr.ProcessPct,
206 | VideoInfo: &dto.VideoInfo{
207 | Title: taskPtr.Title,
208 | Description: taskPtr.Description,
209 | TranslatedTitle: taskPtr.TranslatedTitle,
210 | TranslatedDescription: taskPtr.TranslatedDescription,
211 | },
212 | SubtitleInfo: lo.Map(taskPtr.SubtitleInfos, func(item types.SubtitleInfo, _ int) *dto.SubtitleInfo {
213 | return &dto.SubtitleInfo{
214 | Name: item.Name,
215 | DownloadUrl: item.DownloadUrl,
216 | }
217 | }),
218 | TargetLanguage: taskPtr.TargetLanguage,
219 | SpeechDownloadUrl: taskPtr.SpeechDownloadUrl,
220 | }, nil
221 | }
222 |
--------------------------------------------------------------------------------
/internal/service/upload_subtitle.go:
--------------------------------------------------------------------------------
1 | package service
2 |
3 | import (
4 | "context"
5 | "fmt"
6 | "go.uber.org/zap"
7 | "krillin-ai/internal/types"
8 | "krillin-ai/log"
9 | "krillin-ai/pkg/util"
10 | )
11 |
12 | func (s Service) uploadSubtitles(ctx context.Context, stepParam *types.SubtitleTaskStepParam) error {
13 | subtitleInfos := make([]types.SubtitleInfo, 0)
14 | var err error
15 | for _, info := range stepParam.SubtitleInfos {
16 | resultPath := info.Path
17 | if len(stepParam.ReplaceWordsMap) > 0 { // 需要进行替换
18 | replacedSrcFile := util.AddSuffixToFileName(resultPath, "_replaced")
19 | err = util.ReplaceFileContent(resultPath, replacedSrcFile, stepParam.ReplaceWordsMap)
20 | if err != nil {
21 | log.GetLogger().Error("uploadSubtitles ReplaceFileContent err", zap.Any("stepParam", stepParam), zap.Error(err))
22 | return fmt.Errorf("uploadSubtitles ReplaceFileContent err: %w", err)
23 | }
24 | resultPath = replacedSrcFile
25 | }
26 | subtitleInfos = append(subtitleInfos, types.SubtitleInfo{
27 | TaskId: stepParam.TaskId,
28 | Name: info.Name,
29 | DownloadUrl: "/api/file/" + resultPath,
30 | })
31 | }
32 | // 更新字幕任务信息
33 | taskPtr := stepParam.TaskPtr
34 | taskPtr.SubtitleInfos = subtitleInfos
35 | taskPtr.Status = types.SubtitleTaskStatusSuccess
36 | taskPtr.ProcessPct = 100
37 | // 配音文件
38 | if stepParam.TtsResultFilePath != "" {
39 | taskPtr.SpeechDownloadUrl = "/api/file/" + stepParam.TtsResultFilePath
40 | }
41 | return nil
42 | }
43 |
--------------------------------------------------------------------------------
/internal/storage/bin.go:
--------------------------------------------------------------------------------
1 | package storage
2 |
3 | var (
4 | FfmpegPath string
5 | FfprobePath string
6 | YtdlpPath string
7 | FasterwhisperPath string
8 | WhisperKitPath string
9 | WhispercppPath string
10 | )
11 |
--------------------------------------------------------------------------------
/internal/storage/subtitle_task.go:
--------------------------------------------------------------------------------
1 | package storage
2 |
3 | import (
4 | "sync"
5 | )
6 |
7 | var SubtitleTasks = sync.Map{} // task id -> SubtitleTask,用于接口查询数据
8 |
--------------------------------------------------------------------------------
/internal/types/embed_subtitle.go:
--------------------------------------------------------------------------------
1 | package types
2 |
3 | const AssHeaderHorizontal = `[Script Info]
4 | Title: Example
5 | Original Script:
6 | ScriptType: v4.00+
7 | PlayDepth: 0
8 |
9 | [V4+ Styles]
10 | Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
11 | Style: Major,Arial,18,&H00BFFF,&H000000FF,&H00000000,&H64000000,-1,0,0,0,100,100,0,0,1,2.5,1.5,2,10,10,20,1
12 | Style: Minor,Arial,12,&H00BFFF,&H000000FF,&H00000000,&H64000000,-1,0,0,0,100,100,0,0,1,2.5,1.5,2,10,10,30,1
13 |
14 |
15 | [Events]
16 | Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
17 | `
18 | const AssHeaderVertical = `[Script Info]
19 | Title: Example
20 | Original Script:
21 | ScriptType: v4.00+
22 | PlayDepth: 0
23 |
24 | [V4+ Styles]
25 | Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
26 | Style: Major,Arial,15,&H00BFFF,&H000000FF,&H00000000,&H64000000,-1,0,0,0,100,100,-10,0,1,2.5,1.5,2,10,10,80,1
27 | Style: Minor,Arial,8,&H00BFFF,&H000000FF,&H00000000,&H64000000,-1,0,0,0,100,100,-10,0,1,2.5,1.5,2,10,10,100,1
28 |
29 |
30 | [Events]
31 | Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
32 | `
33 |
--------------------------------------------------------------------------------
/internal/types/fasterwhisper.go:
--------------------------------------------------------------------------------
1 | package types
2 |
3 | type FasterWhisperOutput struct {
4 | Segments []struct {
5 | Id int `json:"id"`
6 | Seek int `json:"seek"`
7 | Start float64 `json:"start"`
8 | End float64 `json:"end"`
9 | Text string `json:"text"`
10 | Tokens []int `json:"tokens"`
11 | Temperature float64 `json:"temperature"`
12 | AvgLogprob float64 `json:"avg_logprob"`
13 | CompressionRatio float64 `json:"compression_ratio"`
14 | NoSpeechProb float64 `json:"no_speech_prob"`
15 | Words []struct {
16 | Start float64 `json:"start"`
17 | End float64 `json:"end"`
18 | Word string `json:"word"`
19 | Probability float64 `json:"probability"`
20 | } `json:"words"`
21 | } `json:"segments"`
22 | Language string `json:"language"`
23 | Text string `json:"text"`
24 | }
25 |
--------------------------------------------------------------------------------
/internal/types/interface.go:
--------------------------------------------------------------------------------
1 | package types
2 |
3 | type ChatCompleter interface {
4 | ChatCompletion(query string) (string, error)
5 | }
6 |
7 | type Transcriber interface {
8 | Transcription(audioFile, language, wordDir string) (*TranscriptionData, error)
9 | }
10 |
--------------------------------------------------------------------------------
/internal/types/subtitle_task.go:
--------------------------------------------------------------------------------
1 | package types
2 |
3 | // var SplitTextPrompt = `你是一个英语处理专家,擅长翻译成%s和处理英文文本,根据句意和标点对句子进行拆分。
4 |
5 | // - 不要漏掉原英文任何一个单词
6 | // - 翻译一定要流畅,完整表达原文意思
7 | // - 优先根据标点符号进行拆分,遇到逗号、句号、问号,一定要拆分,必须把句子拆短些。
8 | // - 遇到定语从句、并列句等复杂句式,根据连词(如and, but, which, when)进行拆分。
9 | // - 拆分后的单行句子英文不能超过15个单词。
10 | // - 翻译的时候确保每个原始字幕块单独存在且编号和格式正确。
11 | // - 不需要任何额外的话语,直接按下面格式输出结果。
12 |
13 | // 1
14 | // [中文翻译]
15 | // [英文句子]
16 |
17 | // 2
18 | // [中文翻译]
19 | // [英文句子]
20 |
21 | // 内容如下:`
22 |
23 | var SplitTextPrompt = `你是一个语言处理专家,专注于自然语言处理和翻译任务。按照以下步骤和要求,以最大程度实现准确和高质量翻译:
24 |
25 | 1. 将原句翻译为%s,确保译文流畅、自然,达到专业翻译水平。
26 | 2. 严格依据标点符号(逗号、句号、问号等)将内容拆分成单独的句子,并依据以下规则确保拆分粒度合理:
27 | - 每个句子在保证句意完整的情况下尽可能短,长度尽量不得超过15个字。
28 | - 可以根据连词(例如 "and", "but", "which", "when", "so", "所以", "但是", "因此", "考虑到" 等)进一步拆分句子,避免语句太长。
29 | 3. 对每个拆分的句子分别翻译,确保不遗漏或修改任何字词。
30 | 4. 将每对翻译后的句子与原句用独立编号表示,并分别以方括号[]包裹内容。
31 | 5. 输出的翻译与原文应保持对应,严格按照原文顺序呈现,不得有错位,且原文尽可能使用原文。
32 | 6. 不管内容是正式还是非正式,都要翻译。
33 |
34 | 翻译输出应采用如下格式:
35 | **正常翻译的示例(注意每块3部分,每个部分都独占一行,空格分块)**:
36 | 1
37 | [翻译后的句子1]
38 | [原句子1]
39 |
40 | 2
41 | [翻译后的句子2]
42 | [原句子2]
43 |
44 | **无文本需要翻译的输出示例**:
45 | [无文本]
46 |
47 | 确保高效、精确地完成上述翻译任务,输入内容如下:
48 | `
49 |
50 | // 带有语气词过滤的拆分Prompt
51 | var SplitTextPromptWithModalFilter = `你是一个语言处理专家,专注于自然语言处理和翻译任务。按照以下步骤和要求,以最大程度实现准确和高质量翻译:
52 |
53 | 1. 将原句翻译为%s,确保译文流畅、自然,达到专业翻译水平。
54 | 2. 严格依据标点符号(逗号、句号、问号等)将内容拆分成单独的句子,并依据以下规则确保拆分粒度合理:
55 | - 每个句子在保证句意完整的情况下尽可能短,长度尽量不得超过15个字。
56 | - 可以根据连词(例如 "and", "but", "which", "when", "so", "所以", "但是", "因此", "考虑到" 等)进一步拆分句子,避免语句太长。
57 | 3. 对每个拆分的句子分别翻译,确保不遗漏或修改任何字词。
58 | 4. 将每对翻译后的句子与原句用独立编号表示,并分别以方括号[]包裹内容。
59 | 5. 输出的翻译与原文应保持对应,严格按照原文顺序呈现,不得有错位,且原文尽可能使用原文。
60 | 6. 忽略文本中的语气词,比如"Oh" "Ah" "Wow"等等。
61 | 7. 不管内容是正式还是非正式,都要翻译。
62 |
63 | 翻译输出应采用如下格式:
64 | **正常翻译的示例(注意每块3部分,每个部分都独占一行,空格分块)**:
65 | 1
66 | [翻译后的句子1]
67 | [原句子1]
68 |
69 | 2
70 | [翻译后的句子2]
71 | [原句子2]
72 |
73 | **无文本需要翻译的输出示例**:
74 | [无文本]
75 |
76 | 确保高效、精确地完成上述翻译任务,输入内容如下:
77 | `
78 |
79 | var TranslateVideoTitleAndDescriptionPrompt = `你是一个专业的翻译专家,请翻译下面给出的标题和描述信息(两者用####来分隔),要求如下:
80 | - 将内容翻译成 %s
81 | - 翻译后的内容仍然用####来分隔标题和描述两部分
82 | 以下全部是源内容,请完整按要求翻译:
83 | %s
84 | `
85 |
86 | type SmallAudio struct {
87 | AudioFile string
88 | Num int
89 | TranscriptionData *TranscriptionData
90 | SrtNoTsFile string
91 | }
92 |
93 | type SubtitleResultType int
94 |
95 | const (
96 | SubtitleResultTypeOriginOnly SubtitleResultType = iota + 1 // 仅返回原语言字幕
97 | SubtitleResultTypeTargetOnly // 仅返回翻译后语言字幕
98 | SubtitleResultTypeBilingualTranslationOnTop // 返回双语字幕,翻译后的字幕在上
99 | SubtitleResultTypeBilingualTranslationOnBottom // 返回双语字幕,翻译后的字幕在下
100 | )
101 |
102 | const (
103 | SubtitleTaskBilingualYes uint8 = iota + 1
104 | SubtitleTaskBilingualNo
105 | )
106 |
107 | const (
108 | SubtitleTaskTranslationSubtitlePosTop uint8 = iota + 1
109 | SubtitleTaskTranslationSubtitlePosBelow
110 | )
111 |
112 | const (
113 | SubtitleTaskModalFilterYes uint8 = iota + 1
114 | SubtitleTaskModalFilterNo
115 | )
116 |
117 | const (
118 | SubtitleTaskTtsYes uint8 = iota + 1
119 | SubtitleTaskTtsNo
120 | )
121 |
122 | const (
123 | SubtitleTaskTtsVoiceCodeLongyu uint8 = iota + 1
124 | SubtitleTaskTtsVoiceCodeLongchen
125 | )
126 |
127 | const (
128 | SubtitleTaskStatusProcessing uint8 = iota + 1
129 | SubtitleTaskStatusSuccess
130 | SubtitleTaskStatusFailed
131 | )
132 |
133 | const (
134 | SubtitleTaskAudioFileName = "origin_audio.mp3"
135 | SubtitleTaskVideoFileName = "origin_video.mp4"
136 | SubtitleTaskSplitAudioFileNamePrefix = "split_audio"
137 | SubtitleTaskSplitAudioFileNamePattern = SubtitleTaskSplitAudioFileNamePrefix + "_%03d.mp3"
138 | SubtitleTaskSplitAudioTxtFileNamePattern = "split_audio_txt_%d.txt"
139 | SubtitleTaskSplitAudioWordsFileNamePattern = "split_audio_words_%d.txt"
140 | SubtitleTaskSplitSrtNoTimestampFileNamePattern = "srt_no_ts_%d.srt"
141 | SubtitleTaskSrtNoTimestampFileName = "srt_no_ts.srt"
142 | SubtitleTaskSplitBilingualSrtFileNamePattern = "split_bilingual_srt_%d.srt"
143 | SubtitleTaskSplitShortOriginMixedSrtFileNamePattern = "split_short_origin_mixed_srt_%d.srt" //长中文+短英文
144 | SubtitleTaskSplitShortOriginSrtFileNamePattern = "split_short_origin_srt_%d.srt" //短英文
145 | SubtitleTaskBilingualSrtFileName = "bilingual_srt.srt"
146 | SubtitleTaskShortOriginMixedSrtFileName = "short_origin_mixed_srt.srt" //长中文+短英文
147 | SubtitleTaskShortOriginSrtFileName = "short_origin_srt.srt" //短英文
148 | SubtitleTaskOriginLanguageSrtFileName = "origin_language_srt.srt"
149 | SubtitleTaskOriginLanguageTextFileName = "origin_language.txt"
150 | SubtitleTaskTargetLanguageSrtFileName = "target_language_srt.srt"
151 | SubtitleTaskTargetLanguageTextFileName = "target_language.txt"
152 | SubtitleTaskStepParamGobPersistenceFileName = "step_param.gob"
153 | SubtitleTaskTransferredVerticalVideoFileName = "transferred_vertical_video.mp4"
154 | SubtitleTaskHorizontalEmbedVideoFileName = "horizontal_embed.mp4"
155 | SubtitleTaskVerticalEmbedVideoFileName = "vertical_embed.mp4"
156 | )
157 |
158 | const (
159 | TtsAudioDurationDetailsFileName = "audio_duration_details.txt"
160 | TtsResultAudioFileName = "tts_final_audio.wav"
161 | )
162 |
163 | const (
164 | AsrMono16kAudioFileName = "mono_16k_audio.mp3"
165 | )
166 |
167 | type SubtitleFileInfo struct {
168 | Name string
169 | Path string
170 | LanguageIdentifier string // 在最终下载的文件里标识语言,如zh_cn,en,bilingual
171 | }
172 |
173 | type SubtitleTaskStepParam struct {
174 | TaskId string
175 | TaskPtr *SubtitleTask // 和storage里面对应
176 | TaskBasePath string
177 | Link string
178 | AudioFilePath string
179 | SmallAudios []*SmallAudio
180 | SubtitleResultType SubtitleResultType
181 | EnableModalFilter bool
182 | EnableTts bool
183 | TtsVoiceCode string // 人声语音编码
184 | VoiceCloneAudioUrl string // 音色克隆的源音频oss地址
185 | ReplaceWordsMap map[string]string
186 | OriginLanguage StandardLanguageCode // 视频源语言
187 | TargetLanguage StandardLanguageCode // 用户希望的目标翻译语言
188 | UserUILanguage StandardLanguageCode // 用户的使用语言
189 | BilingualSrtFilePath string
190 | ShortOriginMixedSrtFilePath string
191 | SubtitleInfos []SubtitleFileInfo
192 | TtsSourceFilePath string
193 | TtsResultFilePath string
194 | InputVideoPath string // 源视频路径
195 | EmbedSubtitleVideoType string // 合成字幕嵌入的视频类型 none不嵌入 horizontal横屏 vertical竖屏
196 | VerticalVideoMajorTitle string // 合成竖屏视频的主标题
197 | VerticalVideoMinorTitle string
198 | MaxWordOneLine int // 字幕一行最多显示多少个字
199 | }
200 |
201 | type SrtSentence struct {
202 | Text string
203 | Start float64
204 | End float64
205 | }
206 |
207 | type SrtSentenceWithStrTime struct {
208 | Text string
209 | Start string
210 | End string
211 | }
212 |
213 | type SubtitleInfo struct {
214 | Id uint64 `json:"id" gorm:"column:id"` // 自增id
215 | TaskId string `json:"task_id" gorm:"column:task_id"` // task_id
216 | Uid uint32 `json:"uid" gorm:"column:uid"` // 用户id
217 | Name string `json:"name" gorm:"column:name"` // 字幕名称
218 | DownloadUrl string `json:"download_url" gorm:"column:download_url"` // 字幕地址
219 | CreateTime int64 `json:"create_time" gorm:"column:create_time;autoCreateTime"` // 创建时间
220 | }
221 |
222 | type SubtitleTask struct {
223 | Id uint64 `json:"id" gorm:"column:id"` // 自增id
224 | TaskId string `json:"task_id" gorm:"column:task_id"` // 任务id
225 | Title string `json:"title" gorm:"column:title"` // 标题
226 | Description string `json:"description" gorm:"column:description"` // 描述
227 | TranslatedTitle string `json:"translated_title" gorm:"column:translated_title"` // 翻译后的标题
228 | TranslatedDescription string `json:"translated_description" gorm:"column:translated_description"` // 翻译后的描述
229 | OriginLanguage string `json:"origin_language" gorm:"column:origin_language"` // 视频原语言
230 | TargetLanguage string `json:"target_language" gorm:"column:target_language"` // 翻译任务的目标语言
231 | VideoSrc string `json:"video_src" gorm:"column:video_src"` // 视频地址
232 | Status uint8 `json:"status" gorm:"column:status"` // 1-处理中,2-成功,3-失败
233 | LastSuccessStepNum uint8 `json:"last_success_step_num" gorm:"column:last_success_step_num"` // 最后成功的子任务序号,用于任务恢复
234 | FailReason string `json:"fail_reason" gorm:"column:fail_reason"` // 失败原因
235 | ProcessPct uint8 `json:"process_percent" gorm:"column:process_percent"` // 处理进度
236 | Duration uint32 `json:"duration" gorm:"column:duration"` // 视频时长
237 | SrtNum int `json:"srt_num" gorm:"column:srt_num"` // 字幕数量
238 | SubtitleInfos []SubtitleInfo `gorm:"foreignKey:TaskId;references:TaskId"`
239 | Cover string `json:"cover" gorm:"column:cover"` // 封面
240 | SpeechDownloadUrl string `json:"speech_download_url" gorm:"column:speech_download_url"` // 语音文件下载地址
241 | CreateTime int64 `json:"create_time" gorm:"column:create_time;autoCreateTime"` // 创建时间
242 | UpdateTime int64 `json:"update_time" gorm:"column:update_time;autoUpdateTime"` // 更新时间
243 | }
244 |
245 | type Word struct {
246 | Num int
247 | Text string
248 | Start float64
249 | End float64
250 | }
251 |
252 | type TranscriptionData struct {
253 | Language string
254 | Text string
255 | Words []Word
256 | }
257 |
--------------------------------------------------------------------------------
/internal/types/whispercpp.go:
--------------------------------------------------------------------------------
1 | package types
2 |
3 | type WhispercppOutput struct {
4 | SystemInfo string `json:"systeminfo"`
5 | Model struct {
6 | Type string `json:"type"`
7 | Multilingual bool `json:"multilingual"`
8 | Vocab int `json:"vocab"`
9 | Audio struct {
10 | Ctx int `json:"ctx"`
11 | State int `json:"state"`
12 | Head int `json:"head"`
13 | Layer int `json:"layer"`
14 | } `json:"audio"`
15 | Text struct {
16 | Ctx int `json:"ctx"`
17 | State int `json:"state"`
18 | Head int `json:"head"`
19 | Layer int `json:"layer"`
20 | } `json:"text"`
21 | Mels int `json:"mels"`
22 | Ftype int `json:"ftype"`
23 | } `json:"model"`
24 | Params struct {
25 | Model string `json:"model"`
26 | Language string `json:"language"`
27 | Translate bool `json:"translate"`
28 | } `json:"params"`
29 | Result struct {
30 | Language string `json:"language"`
31 | } `json:"result"`
32 | Transcription []struct {
33 | Timestamps struct {
34 | From string `json:"from"`
35 | To string `json:"to"`
36 | } `json:"timestamps"`
37 | Offsets struct {
38 | From int `json:"from"`
39 | To int `json:"to"`
40 | } `json:"offsets"`
41 | Text string `json:"text"`
42 | Tokens []struct {
43 | Text string `json:"text"`
44 | Timestamps struct {
45 | From string `json:"from"`
46 | To string `json:"to"`
47 | } `json:"timestamps"`
48 | Offsets struct {
49 | From int `json:"from"`
50 | To int `json:"to"`
51 | } `json:"offsets"`
52 | ID int `json:"id"`
53 | P float64 `json:"p"`
54 | TDtw int `json:"t_dtw"`
55 | } `json:"tokens"`
56 | } `json:"transcription"`
57 | }
58 |
--------------------------------------------------------------------------------
/internal/types/whisperkit.go:
--------------------------------------------------------------------------------
1 | package types
2 |
3 | type WhisperKitOutput struct {
4 | Text string `json:"text"`
5 | Language string `json:"language"`
6 | Segments []struct {
7 | Seek int `json:"seek"`
8 | Tokens []int `json:"tokens"`
9 | CompressionRatio float64 `json:"compressionRatio"`
10 | Temperature float64 `json:"temperature"`
11 | AvgLogprob float64 `json:"avgLogprob"`
12 | NoSpeechProb float64 `json:"noSpeechProb"`
13 | Id int `json:"id"`
14 | TokenLogProbs []map[string]float64 `json:"tokenLogProbs"`
15 | Start float64 `json:"start"`
16 | Words []struct {
17 | Start float64 `json:"start"`
18 | End float64 `json:"end"`
19 | Word string `json:"word"`
20 | Probability float64 `json:"probability"`
21 | Tokens []int `json:"tokens"`
22 | } `json:"words"`
23 | Text string `json:"text"`
24 | End float64 `json:"end"`
25 | } `json:"segments"`
26 | }
27 |
--------------------------------------------------------------------------------
/log/zap.go:
--------------------------------------------------------------------------------
1 | package log
2 |
3 | import (
4 | "go.uber.org/zap"
5 | "go.uber.org/zap/zapcore"
6 | "os"
7 | )
8 |
9 | var Logger *zap.Logger
10 |
11 | func InitLogger() {
12 | file, err := os.OpenFile("app.log", os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
13 | if err != nil {
14 | panic("无法打开日志文件: " + err.Error())
15 | }
16 |
17 | fileSyncer := zapcore.AddSync(file)
18 | consoleSyncer := zapcore.AddSync(os.Stdout)
19 |
20 | encoderConfig := zap.NewProductionEncoderConfig()
21 | encoderConfig.EncodeTime = zapcore.ISO8601TimeEncoder
22 |
23 | core := zapcore.NewTee(
24 | zapcore.NewCore(zapcore.NewJSONEncoder(encoderConfig), fileSyncer, zap.DebugLevel), // 写入文件(JSON 格式)
25 | zapcore.NewCore(zapcore.NewConsoleEncoder(encoderConfig), consoleSyncer, zap.InfoLevel), // 输出到终端
26 | )
27 |
28 | Logger = zap.New(core, zap.AddCaller())
29 | }
30 |
31 | func GetLogger() *zap.Logger {
32 | return Logger
33 | }
34 |
--------------------------------------------------------------------------------
/pkg/aliyun/asr.go:
--------------------------------------------------------------------------------
1 | package aliyun
2 |
3 | import (
4 | "encoding/json"
5 | "fmt"
6 | "github.com/google/uuid"
7 | "github.com/gorilla/websocket"
8 | "go.uber.org/zap"
9 | "io"
10 | "krillin-ai/internal/storage"
11 | "krillin-ai/internal/types"
12 | "krillin-ai/log"
13 | "net/http"
14 | "os"
15 | "os/exec"
16 | "path/filepath"
17 | "strings"
18 | "time"
19 | )
20 |
21 | type AsrClient struct {
22 | BailianApiKey string
23 | }
24 |
25 | func NewAsrClient(bailianApiKey string) *AsrClient {
26 | return &AsrClient{
27 | BailianApiKey: bailianApiKey,
28 | }
29 | }
30 |
31 | const (
32 | wsURL = "wss://dashscope.aliyuncs.com/api-ws/v1/inference/" // WebSocket服务器地址
33 | )
34 |
35 | var dialer = websocket.DefaultDialer
36 |
37 | func (c AsrClient) Transcription(audioFile, language, workDir string) (*types.TranscriptionData, error) {
38 | // 处理音频
39 | processedAudioFile, err := processAudio(audioFile)
40 | if err != nil {
41 | log.GetLogger().Error("处理音频失败", zap.Error(err), zap.String("audio file", audioFile))
42 | return nil, err
43 | }
44 |
45 | // 连接WebSocket服务
46 | conn, err := connectWebSocket(c.BailianApiKey)
47 | if err != nil {
48 | log.GetLogger().Error("连接WebSocket失败", zap.Error(err), zap.String("audio file", audioFile))
49 | return nil, err
50 | }
51 | defer closeConnection(conn)
52 |
53 | // 启动一个goroutine来接收结果
54 | taskStarted := make(chan bool)
55 | taskDone := make(chan bool)
56 |
57 | words := make([]types.Word, 0)
58 | text := ""
59 | startResultReceiver(conn, &words, &text, taskStarted, taskDone)
60 |
61 | // 发送run-task指令
62 | taskID, err := sendRunTaskCmd(conn, language)
63 | if err != nil {
64 | log.GetLogger().Error("发送run-task指令失败", zap.Error(err), zap.String("audio file", audioFile))
65 | }
66 |
67 | // 等待task-started事件
68 | waitForTaskStarted(taskStarted)
69 |
70 | // 发送待识别音频文件流
71 | if err := sendAudioData(conn, processedAudioFile); err != nil {
72 | log.GetLogger().Error("发送音频数据失败", zap.Error(err))
73 | }
74 |
75 | // 发送finish-task指令
76 | if err := sendFinishTaskCmd(conn, taskID); err != nil {
77 | log.GetLogger().Error("发送finish-task指令失败", zap.Error(err), zap.String("audio file", audioFile))
78 | }
79 |
80 | // 等待任务完成或失败
81 | <-taskDone
82 |
83 | if len(words) == 0 {
84 | log.GetLogger().Info("识别结果为空", zap.String("audio file", audioFile))
85 | }
86 | log.GetLogger().Debug("识别结果", zap.Any("words", words), zap.String("text", text), zap.String("audio file", audioFile))
87 |
88 | transcriptionData := &types.TranscriptionData{
89 | Text: text,
90 | Words: words,
91 | }
92 |
93 | return transcriptionData, nil
94 | }
95 |
96 | // 定义结构体来表示JSON数据
97 | type AsrHeader struct {
98 | Action string `json:"action"`
99 | TaskID string `json:"task_id"`
100 | Streaming string `json:"streaming"`
101 | Event string `json:"event"`
102 | ErrorCode string `json:"error_code,omitempty"`
103 | ErrorMessage string `json:"error_message,omitempty"`
104 | Attributes map[string]interface{} `json:"attributes"`
105 | }
106 |
107 | type Output struct {
108 | Sentence struct {
109 | BeginTime int64 `json:"begin_time"`
110 | EndTime *int64 `json:"end_time"`
111 | Text string `json:"text"`
112 | Words []struct {
113 | BeginTime int64 `json:"begin_time"`
114 | EndTime *int64 `json:"end_time"`
115 | Text string `json:"text"`
116 | Punctuation string `json:"punctuation"`
117 | } `json:"words"`
118 | } `json:"sentence"`
119 | Usage interface{} `json:"usage"`
120 | }
121 |
122 | type Payload struct {
123 | TaskGroup string `json:"task_group"`
124 | Task string `json:"task"`
125 | Function string `json:"function"`
126 | Model string `json:"model"`
127 | Parameters Params `json:"parameters"`
128 | Resources []Resource `json:"resources"`
129 | Input Input `json:"input"`
130 | Output Output `json:"output,omitempty"`
131 | }
132 |
133 | type Params struct {
134 | Format string `json:"format"`
135 | SampleRate int `json:"sample_rate"`
136 | VocabularyID string `json:"vocabulary_id"`
137 | DisfluencyRemovalEnabled bool `json:"disfluency_removal_enabled"`
138 | LanguageHints []string `json:"language_hints"`
139 | }
140 |
141 | type Resource struct {
142 | ResourceID string `json:"resource_id"`
143 | ResourceType string `json:"resource_type"`
144 | }
145 |
146 | type Input struct {
147 | }
148 |
149 | type Event struct {
150 | Header AsrHeader `json:"header"`
151 | Payload Payload `json:"payload"`
152 | }
153 |
154 | // 把音频处理成单声道、16k采样率
155 | func processAudio(filePath string) (string, error) {
156 | dest := strings.ReplaceAll(filePath, filepath.Ext(filePath), "_mono_16K.mp3")
157 | cmdArgs := []string{"-i", filePath, "-ac", "1", "-ar", "16000", "-b:a", "192k", dest}
158 | cmd := exec.Command(storage.FfmpegPath, cmdArgs...)
159 | output, err := cmd.CombinedOutput()
160 | if err != nil {
161 | log.GetLogger().Error("处理音频失败", zap.Error(err), zap.String("audio file", filePath), zap.String("output", string(output)))
162 | return "", err
163 | }
164 | return dest, nil
165 | }
166 |
167 | // 连接WebSocket服务
168 | func connectWebSocket(apiKey string) (*websocket.Conn, error) {
169 | header := make(http.Header)
170 | header.Add("X-DashScope-DataInspection", "enable")
171 | header.Add("Authorization", fmt.Sprintf("bearer %s", apiKey))
172 | conn, _, err := dialer.Dial(wsURL, header)
173 | return conn, err
174 | }
175 |
176 | // 启动一个goroutine异步接收WebSocket消息
177 | func startResultReceiver(conn *websocket.Conn, words *[]types.Word, text *string, taskStarted chan<- bool, taskDone chan<- bool) {
178 | go func() {
179 | for {
180 | _, message, err := conn.ReadMessage()
181 | if err != nil {
182 | log.GetLogger().Error("解析服务器消息失败:", zap.Error(err))
183 | continue
184 | }
185 | currentEvent := Event{}
186 | err = json.Unmarshal(message, ¤tEvent)
187 | if err != nil {
188 | log.GetLogger().Error("解析服务器消息失败:", zap.Error(err))
189 | continue
190 | }
191 | if currentEvent.Payload.Output.Sentence.EndTime != nil {
192 | // 本句结束,添加当前的words和text
193 | *text += currentEvent.Payload.Output.Sentence.Text
194 | currentNum := 0
195 | if len(*words) > 0 {
196 | currentNum = (*words)[len(*words)-1].Num + 1
197 | }
198 | for _, word := range currentEvent.Payload.Output.Sentence.Words {
199 | *words = append(*words, types.Word{
200 | Num: currentNum,
201 | Text: strings.TrimSpace(word.Text), // 阿里云这边的word后面会有空格
202 | Start: float64(word.BeginTime) / 1000,
203 | End: float64(*word.EndTime) / 1000,
204 | })
205 | currentNum++
206 | }
207 | }
208 | if handleEvent(conn, ¤tEvent, taskStarted, taskDone) {
209 | return
210 | }
211 | }
212 | }()
213 | }
214 |
215 | // 发送run-task指令
216 | func sendRunTaskCmd(conn *websocket.Conn, language string) (string, error) {
217 | runTaskCmd, taskID, err := generateRunTaskCmd(language)
218 | if err != nil {
219 | return "", err
220 | }
221 | err = conn.WriteMessage(websocket.TextMessage, []byte(runTaskCmd))
222 | return taskID, err
223 | }
224 |
225 | // 生成run-task指令
226 | func generateRunTaskCmd(language string) (string, string, error) {
227 | taskID := uuid.New().String()
228 | runTaskCmd := Event{
229 | Header: AsrHeader{
230 | Action: "run-task",
231 | TaskID: taskID,
232 | Streaming: "duplex",
233 | },
234 | Payload: Payload{
235 | TaskGroup: "audio",
236 | Task: "asr",
237 | Function: "recognition",
238 | Model: "paraformer-realtime-v2",
239 | Parameters: Params{
240 | Format: "mp3",
241 | SampleRate: 16000,
242 | LanguageHints: []string{language},
243 | },
244 | Input: Input{},
245 | },
246 | }
247 | runTaskCmdJSON, err := json.Marshal(runTaskCmd)
248 | return string(runTaskCmdJSON), taskID, err
249 | }
250 |
251 | // 等待task-started事件
252 | func waitForTaskStarted(taskStarted chan bool) {
253 | select {
254 | case <-taskStarted:
255 | log.GetLogger().Info("阿里云语音识别任务开启成功")
256 | case <-time.After(10 * time.Second):
257 | log.GetLogger().Error("等待task-started超时,任务开启失败")
258 | }
259 | }
260 |
261 | // 发送音频数据
262 | func sendAudioData(conn *websocket.Conn, filePath string) error {
263 | file, err := os.Open(filePath)
264 | if err != nil {
265 | return err
266 | }
267 | defer file.Close()
268 |
269 | buf := make([]byte, 1024) // 100ms的音频大约1024字节
270 | for {
271 | n, err := file.Read(buf)
272 | if n == 0 {
273 | break
274 | }
275 | if err != nil && err != io.EOF {
276 | return err
277 | }
278 | err = conn.WriteMessage(websocket.BinaryMessage, buf[:n])
279 | if err != nil {
280 | return err
281 | }
282 | time.Sleep(100 * time.Millisecond)
283 | }
284 | return nil
285 | }
286 |
287 | // 发送finish-task指令
288 | func sendFinishTaskCmd(conn *websocket.Conn, taskID string) error {
289 | finishTaskCmd, err := generateFinishTaskCmd(taskID)
290 | if err != nil {
291 | return err
292 | }
293 | err = conn.WriteMessage(websocket.TextMessage, []byte(finishTaskCmd))
294 | return err
295 | }
296 |
297 | // 生成finish-task指令
298 | func generateFinishTaskCmd(taskID string) (string, error) {
299 | finishTaskCmd := Event{
300 | Header: AsrHeader{
301 | Action: "finish-task",
302 | TaskID: taskID,
303 | Streaming: "duplex",
304 | },
305 | Payload: Payload{
306 | Input: Input{},
307 | },
308 | }
309 | finishTaskCmdJSON, err := json.Marshal(finishTaskCmd)
310 | return string(finishTaskCmdJSON), err
311 | }
312 |
313 | // 处理事件
314 | func handleEvent(conn *websocket.Conn, event *Event, taskStarted chan<- bool, taskDone chan<- bool) bool {
315 | switch event.Header.Event {
316 | case "task-started":
317 | log.GetLogger().Info("收到task-started事件", zap.String("taskID", event.Header.TaskID))
318 | taskStarted <- true
319 | case "result-generated":
320 | log.GetLogger().Info("收到result-generated事件", zap.String("当前text", event.Payload.Output.Sentence.Text))
321 | case "task-finished":
322 | log.GetLogger().Info("收到task-finished事件,任务完成", zap.String("taskID", event.Header.TaskID))
323 | taskDone <- true
324 | return true
325 | case "task-failed":
326 | log.GetLogger().Info("收到task-failed事件", zap.String("taskID", event.Header.TaskID))
327 | handleTaskFailed(event, conn)
328 | taskDone <- true
329 | return true
330 | default:
331 | log.GetLogger().Info("未知事件:", zap.String("event", event.Header.Event))
332 | }
333 | return false
334 | }
335 |
336 | // 处理任务失败事件
337 | func handleTaskFailed(event *Event, conn *websocket.Conn) {
338 | log.GetLogger().Error("任务失败:", zap.String("error", event.Header.ErrorMessage))
339 | }
340 |
341 | // 关闭连接
342 | func closeConnection(conn *websocket.Conn) {
343 | if conn != nil {
344 | conn.Close()
345 | }
346 | }
347 |
--------------------------------------------------------------------------------
/pkg/aliyun/base.go:
--------------------------------------------------------------------------------
1 | package aliyun
2 |
3 | import (
4 | "encoding/json"
5 | "github.com/aliyun/alibaba-cloud-sdk-go/sdk"
6 | "github.com/aliyun/alibaba-cloud-sdk-go/sdk/requests"
7 | "go.uber.org/zap"
8 | "krillin-ai/config"
9 | "krillin-ai/log"
10 | )
11 |
12 | type TokenResult struct {
13 | ErrMsg string
14 | Token struct {
15 | UserId string
16 | Id string
17 | ExpireTime int64
18 | }
19 | }
20 |
21 | func CreateToken(ak, sk string) (string, error) {
22 | client, err := sdk.NewClientWithAccessKey("cn-shanghai", ak, sk)
23 | if err != nil {
24 | return "", err
25 | }
26 | if config.Conf.App.Proxy != "" {
27 | client.SetHttpProxy(config.Conf.App.Proxy)
28 | }
29 | request := requests.NewCommonRequest()
30 | request.Method = "POST"
31 | request.Domain = "nls-meta.cn-shanghai.aliyuncs.com"
32 | request.ApiName = "CreateToken"
33 | request.Version = "2019-02-28"
34 | response, err := client.ProcessCommonRequest(request)
35 | if err != nil {
36 | log.GetLogger().Error("aliyun sdk create token request error:", zap.Error(err))
37 | return "", err
38 | }
39 |
40 | var tr TokenResult
41 | err = json.Unmarshal([]byte(response.GetHttpContentString()), &tr)
42 | if err != nil {
43 | log.GetLogger().Error("aliyun sdk json unmarshal error:", zap.Error(err))
44 | return "", err
45 | }
46 | return tr.Token.Id, nil
47 | }
48 |
--------------------------------------------------------------------------------
/pkg/aliyun/chat.go:
--------------------------------------------------------------------------------
1 | package aliyun
2 |
3 | import (
4 | "context"
5 | goopenai "github.com/sashabaranov/go-openai"
6 | "go.uber.org/zap"
7 | "krillin-ai/log"
8 | )
9 |
10 | type ChatClient struct {
11 | *goopenai.Client
12 | }
13 |
14 | func NewChatClient(apiKey string) *ChatClient {
15 | cfg := goopenai.DefaultConfig(apiKey)
16 | cfg.BaseURL = "https://dashscope.aliyuncs.com/compatible-mode/v1" // 使用阿里云的openai兼容模式调用
17 | return &ChatClient{
18 | Client: goopenai.NewClientWithConfig(cfg),
19 | }
20 | }
21 |
22 | func (c ChatClient) ChatCompletion(query string) (string, error) {
23 | req := goopenai.ChatCompletionRequest{
24 | Model: "qwen-plus",
25 | Messages: []goopenai.ChatCompletionMessage{
26 | {
27 | Role: goopenai.ChatMessageRoleSystem,
28 | Content: "You are an assistant that helps with subtitle translation.",
29 | },
30 | {
31 | Role: goopenai.ChatMessageRoleUser,
32 | Content: query,
33 | },
34 | },
35 | }
36 |
37 | resp, err := c.CreateChatCompletion(context.Background(), req)
38 | if err != nil {
39 | log.GetLogger().Error("aliyun openai create chat completion failed", zap.Error(err))
40 | return "", err
41 | }
42 |
43 | resContent := resp.Choices[0].Message.Content
44 |
45 | return resContent, nil
46 | }
47 |
--------------------------------------------------------------------------------
/pkg/aliyun/oss.go:
--------------------------------------------------------------------------------
1 | package aliyun
2 |
3 | import (
4 | "context"
5 | "fmt"
6 | "github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss"
7 | "github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss/credentials"
8 | "os"
9 | )
10 |
11 | type OssClient struct {
12 | *oss.Client
13 | Bucket string
14 | }
15 |
16 | func NewOssClient(accessKeyID, accessKeySecret, bucket string) *OssClient {
17 | credProvider := credentials.NewStaticCredentialsProvider(accessKeyID, accessKeySecret)
18 |
19 | cfg := oss.LoadDefaultConfig().
20 | WithCredentialsProvider(credProvider).
21 | WithRegion("cn-shanghai")
22 |
23 | client := oss.NewClient(cfg)
24 |
25 | return &OssClient{client, bucket}
26 | }
27 |
28 | func (o *OssClient) UploadFile(ctx context.Context, objectKey, filePath, bucket string) error {
29 | file, err := os.Open(filePath)
30 | if err != nil {
31 | return fmt.Errorf("failed to open file: %v", err)
32 | }
33 | defer file.Close()
34 |
35 | _, err = o.PutObject(ctx, &oss.PutObjectRequest{
36 | Bucket: &bucket,
37 | Key: &objectKey,
38 | Body: file,
39 | })
40 | if err != nil {
41 | return fmt.Errorf("failed to upload file to OSS: %v", err)
42 | }
43 |
44 | fmt.Printf("File %s uploaded successfully to bucket %s as %s\n", filePath, bucket, objectKey)
45 | return nil
46 | }
47 |
--------------------------------------------------------------------------------
/pkg/aliyun/tts.go:
--------------------------------------------------------------------------------
1 | package aliyun
2 |
3 | import (
4 | "encoding/json"
5 | "fmt"
6 | "github.com/gorilla/websocket"
7 | "go.uber.org/zap"
8 | "krillin-ai/config"
9 | "krillin-ai/log"
10 | "krillin-ai/pkg/util"
11 | "net/http"
12 | "os"
13 | "time"
14 | )
15 |
16 | type TtsClient struct {
17 | AccessKeyID string
18 | AccessKeySecret string
19 | Appkey string
20 | }
21 |
22 | type TtsHeader struct {
23 | Appkey string `json:"appkey"`
24 | MessageID string `json:"message_id"`
25 | TaskID string `json:"task_id"`
26 | Namespace string `json:"namespace"`
27 | Name string `json:"name"`
28 | }
29 |
30 | type StartSynthesisPayload struct {
31 | Voice string `json:"voice,omitempty"`
32 | Format string `json:"format,omitempty"`
33 | SampleRate int `json:"sample_rate,omitempty"`
34 | Volume int `json:"volume,omitempty"`
35 | SpeechRate int `json:"speech_rate,omitempty"`
36 | PitchRate int `json:"pitch_rate,omitempty"`
37 | EnableSubtitle bool `json:"enable_subtitle,omitempty"`
38 | EnablePhonemeTimestamp bool `json:"enable_phoneme_timestamp,omitempty"`
39 | }
40 |
41 | type RunSynthesisPayload struct {
42 | Text string `json:"text"`
43 | }
44 |
45 | type Message struct {
46 | Header TtsHeader `json:"header"`
47 | Payload interface{} `json:"payload,omitempty"`
48 | }
49 |
50 | func NewTtsClient(accessKeyId, accessKeySecret, appkey string) *TtsClient {
51 | return &TtsClient{
52 | AccessKeyID: accessKeyId,
53 | AccessKeySecret: accessKeySecret,
54 | Appkey: appkey,
55 | }
56 | }
57 |
58 | func (c *TtsClient) Text2Speech(text, voice, outputFile string) error {
59 | file, err := os.OpenFile(outputFile, os.O_CREATE|os.O_WRONLY, 0666)
60 | if err != nil {
61 | return fmt.Errorf("failed to create file: %w", err)
62 | }
63 | defer file.Close()
64 |
65 | var conn *websocket.Conn
66 | token, _ := CreateToken(c.AccessKeyID, c.AccessKeySecret)
67 | fullURL := "wss://nls-gateway-cn-beijing.aliyuncs.com/ws/v1?token=" + token
68 | dialer := websocket.DefaultDialer
69 | if config.Conf.App.Proxy != "" {
70 | dialer.Proxy = http.ProxyURL(config.Conf.App.ParsedProxy)
71 | }
72 | dialer.HandshakeTimeout = 10 * time.Second
73 | conn, _, err = dialer.Dial(fullURL, nil)
74 | if err != nil {
75 | return err
76 | }
77 | _ = conn.SetReadDeadline(time.Now().Add(time.Second * 60))
78 | defer c.Close(conn)
79 |
80 | onTextMessage := func(message string) {
81 | log.GetLogger().Info("Received text message", zap.String("Message", message))
82 | }
83 |
84 | onBinaryMessage := func(data []byte) {
85 | if _, err := file.Write(data); err != nil {
86 | log.GetLogger().Error("Failed to write data to file", zap.Error(err))
87 | }
88 | }
89 |
90 | var (
91 | synthesisStarted = make(chan struct{})
92 | synthesisComplete = make(chan struct{})
93 | )
94 |
95 | startPayload := StartSynthesisPayload{
96 | Voice: voice,
97 | Format: "wav",
98 | SampleRate: 44100,
99 | Volume: 50,
100 | SpeechRate: 0,
101 | PitchRate: 0,
102 | }
103 |
104 | go c.receiveMessages(conn, onTextMessage, onBinaryMessage, synthesisStarted, synthesisComplete)
105 |
106 | taskId := util.GenerateID()
107 | log.GetLogger().Info("SpeechClient StartSynthesis", zap.String("taskId", taskId), zap.Any("payload", startPayload))
108 | if err := c.StartSynthesis(conn, taskId, startPayload, synthesisStarted); err != nil {
109 | return fmt.Errorf("failed to start synthesis: %w", err)
110 | }
111 |
112 | if err := c.RunSynthesis(conn, taskId, text); err != nil {
113 | return fmt.Errorf("failed to run synthesis: %w", err)
114 | }
115 |
116 | if err := c.StopSynthesis(conn, taskId, synthesisComplete); err != nil {
117 | return fmt.Errorf("failed to stop synthesis: %w", err)
118 | }
119 |
120 | return nil
121 | }
122 |
123 | func (c *TtsClient) sendMessage(conn *websocket.Conn, taskId, name string, payload interface{}) error {
124 | message := Message{
125 | Header: TtsHeader{
126 | Appkey: c.Appkey,
127 | MessageID: util.GenerateID(),
128 | TaskID: taskId,
129 | Namespace: "FlowingSpeechSynthesizer",
130 | Name: name,
131 | },
132 | Payload: payload,
133 | }
134 | jsonData, _ := json.Marshal(message)
135 | log.GetLogger().Debug("SpeechClient sendMessage", zap.String("message", string(jsonData)))
136 | return conn.WriteJSON(message)
137 | }
138 |
139 | func (c *TtsClient) StartSynthesis(conn *websocket.Conn, taskId string, payload StartSynthesisPayload, synthesisStarted chan struct{}) error {
140 | err := c.sendMessage(conn, taskId, "StartSynthesis", payload)
141 | if err != nil {
142 | return err
143 | }
144 |
145 | // 阻塞等待 SynthesisStarted 事件
146 | <-synthesisStarted
147 |
148 | return nil
149 | }
150 |
151 | func (c *TtsClient) RunSynthesis(conn *websocket.Conn, taskId, text string) error {
152 | return c.sendMessage(conn, taskId, "RunSynthesis", RunSynthesisPayload{Text: text})
153 | }
154 |
155 | func (c *TtsClient) StopSynthesis(conn *websocket.Conn, taskId string, synthesisComplete chan struct{}) error {
156 | err := c.sendMessage(conn, taskId, "StopSynthesis", nil)
157 | if err != nil {
158 | return err
159 | }
160 |
161 | // 阻塞等待 SynthesisCompleted 事件
162 | <-synthesisComplete
163 |
164 | return nil
165 | }
166 |
167 | func (c *TtsClient) Close(conn *websocket.Conn) error {
168 | err := conn.WriteMessage(websocket.CloseMessage, websocket.FormatCloseMessage(websocket.CloseNormalClosure, ""))
169 | if err != nil {
170 | return err
171 | }
172 | return conn.Close()
173 | }
174 |
175 | func (c *TtsClient) receiveMessages(conn *websocket.Conn, onTextMessage func(string), onBinaryMessage func([]byte), synthesisStarted, synthesisComplete chan struct{}) {
176 | defer close(synthesisComplete)
177 | for {
178 | messageType, message, err := conn.ReadMessage()
179 | if err != nil {
180 | if websocket.IsCloseError(err, websocket.CloseNormalClosure) {
181 | log.GetLogger().Error("SpeechClient receiveMessages websocket非正常关闭", zap.Error(err))
182 | return
183 | }
184 | return
185 | }
186 | if messageType == websocket.TextMessage {
187 | var msg Message
188 | if err := json.Unmarshal(message, &msg); err != nil {
189 | log.GetLogger().Error("SpeechClient receiveMessages json解析失败", zap.Error(err))
190 | return
191 | }
192 | if msg.Header.Name == "SynthesisCompleted" {
193 | log.GetLogger().Info("SynthesisCompleted event received")
194 | // 收到结束消息退出
195 | break
196 | } else if msg.Header.Name == "SynthesisStarted" {
197 | log.GetLogger().Info("SynthesisStarted event received")
198 | close(synthesisStarted)
199 | } else {
200 | onTextMessage(string(message))
201 | }
202 | } else if messageType == websocket.BinaryMessage {
203 | onBinaryMessage(message)
204 | }
205 | }
206 | }
207 |
--------------------------------------------------------------------------------
/pkg/aliyun/voice_clone.go:
--------------------------------------------------------------------------------
1 | package aliyun
2 |
3 | import (
4 | "crypto/hmac"
5 | "crypto/sha1"
6 | "encoding/base64"
7 | "fmt"
8 | "go.uber.org/zap"
9 | "krillin-ai/log"
10 | "net/url"
11 | "sort"
12 | "strings"
13 | "time"
14 |
15 | "github.com/go-resty/resty/v2"
16 | "github.com/google/uuid"
17 | )
18 |
19 | // _encodeText URL-编码文本,保证符合规范
20 | func _encodeText(text string) string {
21 | encoded := url.QueryEscape(text)
22 | // 根据规范替换特殊字符
23 | return strings.ReplaceAll(strings.ReplaceAll(strings.ReplaceAll(encoded, "+", "%20"), "*", "%2A"), "%7E", "~")
24 | }
25 |
26 | // _encodeDict URL-编码字典(map)为查询字符串
27 | func _encodeDict(dic map[string]string) string {
28 | var keys []string
29 | for key := range dic {
30 | keys = append(keys, key)
31 | }
32 | sort.Strings(keys)
33 | values := url.Values{}
34 |
35 | for _, k := range keys {
36 | values.Add(k, dic[k])
37 | }
38 | encodedText := values.Encode()
39 | // 对整个查询字符串进行编码
40 | return strings.ReplaceAll(strings.ReplaceAll(strings.ReplaceAll(encodedText, "+", "%20"), "*", "%2A"), "%7E", "~")
41 | }
42 |
43 | // 生成签名
44 | func GenerateSignature(secret, stringToSign string) string {
45 | key := []byte(secret + "&")
46 | data := []byte(stringToSign)
47 | hash := hmac.New(sha1.New, key)
48 | hash.Write(data)
49 | signature := base64.StdEncoding.EncodeToString(hash.Sum(nil))
50 | // 对签名进行URL编码
51 | return _encodeText(signature)
52 | }
53 |
54 | type VoiceCloneResp struct {
55 | RequestId string `json:"RequestId"`
56 | Message string `json:"Message"`
57 | Code int `json:"Code"`
58 | VoiceName string `json:"VoiceName"`
59 | }
60 |
61 | type VoiceCloneClient struct {
62 | restyClient *resty.Client
63 | accessKeyID string
64 | accessKeySecret string
65 | appkey string
66 | }
67 |
68 | func NewVoiceCloneClient(accessKeyID, accessKeySecret, appkey string) *VoiceCloneClient {
69 | return &VoiceCloneClient{
70 | restyClient: resty.New(),
71 | accessKeyID: accessKeyID,
72 | accessKeySecret: accessKeySecret,
73 | appkey: appkey,
74 | }
75 | }
76 |
77 | func (c *VoiceCloneClient) CosyVoiceClone(voicePrefix, audioURL string) (string, error) {
78 | log.GetLogger().Info("CosyVoiceClone请求开始", zap.String("voicePrefix", voicePrefix), zap.String("audioURL", audioURL))
79 | parameters := map[string]string{
80 | "AccessKeyId": c.accessKeyID,
81 | "Action": "CosyVoiceClone",
82 | "Format": "JSON",
83 | "RegionId": "cn-shanghai",
84 | "SignatureMethod": "HMAC-SHA1",
85 | "SignatureNonce": uuid.New().String(),
86 | "SignatureVersion": "1.0",
87 | "Timestamp": time.Now().UTC().Format("2006-01-02T15:04:05Z"),
88 | "Version": "2019-08-19",
89 | "VoicePrefix": voicePrefix,
90 | "Url": audioURL,
91 | }
92 |
93 | queryString := _encodeDict(parameters)
94 | stringToSign := "POST" + "&" + _encodeText("/") + "&" + _encodeText(queryString)
95 | signature := GenerateSignature(c.accessKeySecret, stringToSign)
96 | fullURL := fmt.Sprintf("https://nls-slp.cn-shanghai.aliyuncs.com/?Signature=%s&%s", signature, queryString)
97 |
98 | values := url.Values{}
99 | for key, value := range parameters {
100 | values.Add(key, value)
101 | }
102 | var res VoiceCloneResp
103 | resp, err := c.restyClient.R().SetResult(&res).Post(fullURL)
104 | if err != nil {
105 | log.GetLogger().Error("CosyVoiceClone post error", zap.Error(err))
106 | return "", fmt.Errorf("CosyVoiceClone post error: %w: ", err)
107 | }
108 | log.GetLogger().Info("CosyVoiceClone请求完毕", zap.String("Response", resp.String()))
109 | if res.Message != "SUCCESS" {
110 | log.GetLogger().Error("CosyVoiceClone res message is not success", zap.String("Request Id", res.RequestId), zap.Int("Code", res.Code), zap.String("Message", res.Message))
111 | return "", fmt.Errorf("CosyVoiceClone res message is not success, message: %s", res.Message)
112 | }
113 | return res.VoiceName, nil
114 | }
115 |
116 | func (c *VoiceCloneClient) CosyCloneList(voicePrefix string, pageIndex, pageSize int) {
117 | parameters := map[string]string{
118 | "AccessKeyId": c.accessKeyID,
119 | "Action": "ListCosyVoice",
120 | "Format": "JSON",
121 | "RegionId": "cn-shanghai",
122 | "SignatureMethod": "HMAC-SHA1",
123 | "SignatureNonce": uuid.New().String(),
124 | "SignatureVersion": "1.0",
125 | "Timestamp": time.Now().UTC().Format("2006-01-02T15:04:05Z"),
126 | "Version": "2019-08-19",
127 | "VoicePrefix": voicePrefix,
128 | "PageIndex": fmt.Sprintf("%d", pageIndex),
129 | "PageSize": fmt.Sprintf("%d", pageSize),
130 | }
131 |
132 | queryString := _encodeDict(parameters)
133 | stringToSign := "POST" + "&" + _encodeText("/") + "&" + _encodeText(queryString)
134 | signature := GenerateSignature(c.accessKeySecret, stringToSign)
135 | fullURL := fmt.Sprintf("https://nls-slp.cn-shanghai.aliyuncs.com/?Signature=%s&%s", signature, queryString)
136 |
137 | values := url.Values{}
138 | for key, value := range parameters {
139 | values.Add(key, value)
140 | }
141 | resp, err := c.restyClient.R().Post(fullURL)
142 | if err != nil {
143 | log.GetLogger().Error("CosyCloneList请求失败", zap.Error(err))
144 | return
145 | }
146 | log.GetLogger().Info("CosyCloneList请求成功", zap.String("Response", resp.String()))
147 | }
148 |
--------------------------------------------------------------------------------
/pkg/fasterwhisper/init.go:
--------------------------------------------------------------------------------
1 | package fasterwhisper
2 |
3 | type FastwhisperProcessor struct {
4 | WorkDir string // 生成中间文件的目录
5 | Model string
6 | }
7 |
8 | func NewFastwhisperProcessor(model string) *FastwhisperProcessor {
9 | return &FastwhisperProcessor{
10 | Model: model,
11 | }
12 | }
13 |
--------------------------------------------------------------------------------
/pkg/fasterwhisper/transcription.go:
--------------------------------------------------------------------------------
1 | package fasterwhisper
2 |
3 | import (
4 | "encoding/json"
5 | "go.uber.org/zap"
6 | "krillin-ai/internal/storage"
7 | "krillin-ai/internal/types"
8 | "krillin-ai/log"
9 | "krillin-ai/pkg/util"
10 | "os"
11 | "os/exec"
12 | "strings"
13 | )
14 |
15 | func (c *FastwhisperProcessor) Transcription(audioFile, language, workDir string) (*types.TranscriptionData, error) {
16 | cmdArgs := []string{
17 | "--model_dir", "./models/",
18 | "--model", c.Model,
19 | "--one_word", "2",
20 | "--output_format", "json",
21 | "--language", language,
22 | "--output_dir", workDir,
23 | audioFile,
24 | }
25 | cmd := exec.Command(storage.FasterwhisperPath, cmdArgs...)
26 | log.GetLogger().Info("FastwhisperProcessor转录开始", zap.String("cmd", cmd.String()))
27 | output, err := cmd.CombinedOutput()
28 | if err != nil && !strings.Contains(string(output), "Subtitles are written to") {
29 | log.GetLogger().Error("FastwhisperProcessor cmd 执行失败", zap.String("output", string(output)), zap.Error(err))
30 | return nil, err
31 | }
32 | log.GetLogger().Info("FastwhisperProcessor转录json生成完毕", zap.String("audio file", audioFile))
33 |
34 | var result types.FasterWhisperOutput
35 | fileData, err := os.Open(util.ChangeFileExtension(audioFile, ".json"))
36 | if err != nil {
37 | log.GetLogger().Error("FastwhisperProcessor 打开json文件失败", zap.Error(err))
38 | return nil, err
39 | }
40 | defer fileData.Close()
41 | decoder := json.NewDecoder(fileData)
42 | if err = decoder.Decode(&result); err != nil {
43 | log.GetLogger().Error("FastwhisperProcessor 解析json文件失败", zap.Error(err))
44 | return nil, err
45 | }
46 |
47 | var (
48 | transcriptionData types.TranscriptionData
49 | num int
50 | )
51 | for _, segment := range result.Segments {
52 | transcriptionData.Text += strings.ReplaceAll(segment.Text, "—", " ") // 连字符处理,因为模型存在很多错误添加到连字符
53 | for _, word := range segment.Words {
54 | if strings.Contains(word.Word, "—") {
55 | // 对称切分
56 | mid := (word.Start + word.End) / 2
57 | seperatedWords := strings.Split(word.Word, "—")
58 | transcriptionData.Words = append(transcriptionData.Words, []types.Word{
59 | {
60 | Num: num,
61 | Text: util.CleanPunction(strings.TrimSpace(seperatedWords[0])),
62 | Start: word.Start,
63 | End: mid,
64 | },
65 | {
66 | Num: num + 1,
67 | Text: util.CleanPunction(strings.TrimSpace(seperatedWords[1])),
68 | Start: mid,
69 | End: word.End,
70 | },
71 | }...)
72 | num += 2
73 | } else {
74 | transcriptionData.Words = append(transcriptionData.Words, types.Word{
75 | Num: num,
76 | Text: util.CleanPunction(strings.TrimSpace(word.Word)),
77 | Start: word.Start,
78 | End: word.End,
79 | })
80 | num++
81 | }
82 | }
83 | }
84 | log.GetLogger().Info("FastwhisperProcessor转录成功")
85 | return &transcriptionData, nil
86 | }
87 |
--------------------------------------------------------------------------------
/pkg/openai/init.go:
--------------------------------------------------------------------------------
1 | package openai
2 |
3 | import (
4 | "github.com/sashabaranov/go-openai"
5 | "krillin-ai/config"
6 | "net/http"
7 | )
8 |
9 | type Client struct {
10 | client *openai.Client
11 | }
12 |
13 | func NewClient(baseUrl, apiKey, proxyAddr string) *Client {
14 | cfg := openai.DefaultConfig(apiKey)
15 | if baseUrl != "" {
16 | cfg.BaseURL = baseUrl
17 | }
18 |
19 | if proxyAddr != "" {
20 | transport := &http.Transport{
21 | Proxy: http.ProxyURL(config.Conf.App.ParsedProxy),
22 | }
23 | cfg.HTTPClient = &http.Client{
24 | Transport: transport,
25 | }
26 | }
27 |
28 | client := openai.NewClientWithConfig(cfg)
29 | return &Client{client: client}
30 | }
31 |
--------------------------------------------------------------------------------
/pkg/openai/openai.go:
--------------------------------------------------------------------------------
1 | package openai
2 |
3 | import (
4 | "context"
5 | openai "github.com/sashabaranov/go-openai"
6 | "go.uber.org/zap"
7 | "io"
8 | "krillin-ai/config"
9 | "krillin-ai/log"
10 | )
11 |
12 | func (c *Client) ChatCompletion(query string) (string, error) {
13 | req := openai.ChatCompletionRequest{
14 | Model: openai.GPT4oMini20240718,
15 | Messages: []openai.ChatCompletionMessage{
16 | {
17 | Role: openai.ChatMessageRoleSystem,
18 | Content: "You are an assistant that helps with subtitle translation.",
19 | },
20 | {
21 | Role: openai.ChatMessageRoleUser,
22 | Content: query,
23 | },
24 | },
25 | Stream: true,
26 | MaxTokens: 8192,
27 | }
28 | if config.Conf.Openai.Model != "" {
29 | req.Model = config.Conf.Openai.Model
30 | }
31 |
32 | stream, err := c.client.CreateChatCompletionStream(context.Background(), req)
33 | if err != nil {
34 | log.GetLogger().Error("openai create chat completion stream failed", zap.Error(err))
35 | return "", err
36 | }
37 | defer stream.Close()
38 |
39 | var resContent string
40 | for {
41 | response, err := stream.Recv()
42 | if err == io.EOF {
43 | break
44 | }
45 | if err != nil {
46 | log.GetLogger().Error("openai stream receive failed", zap.Error(err))
47 | return "", err
48 | }
49 | if len(response.Choices) == 0 {
50 | log.GetLogger().Info("openai stream receive no choices", zap.Any("response", response))
51 | continue
52 | }
53 |
54 | resContent += response.Choices[0].Delta.Content
55 | }
56 |
57 | return resContent, nil
58 | }
59 |
--------------------------------------------------------------------------------
/pkg/util/base.go:
--------------------------------------------------------------------------------
1 | package util
2 |
3 | import (
4 | "archive/zip"
5 | "fmt"
6 | "github.com/google/uuid"
7 | "io"
8 | "math"
9 | "math/rand"
10 | "net/url"
11 | "os"
12 | "path/filepath"
13 | "regexp"
14 | "strconv"
15 | "strings"
16 | "unicode"
17 | )
18 |
19 | var strWithUpperLowerNum = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ123456789")
20 |
21 | func GenerateRandStringWithUpperLowerNum(n int) string {
22 | b := make([]rune, n)
23 | for i := range b {
24 | b[i] = strWithUpperLowerNum[rand.Intn(len(strWithUpperLowerNum))]
25 | }
26 | return string(b)
27 | }
28 |
29 | func GetYouTubeID(youtubeURL string) (string, error) {
30 | parsedURL, err := url.Parse(youtubeURL)
31 | if err != nil {
32 | return "", err
33 | }
34 |
35 | if strings.Contains(parsedURL.Path, "watch") {
36 | queryParams := parsedURL.Query()
37 | if id, exists := queryParams["v"]; exists {
38 | return id[0], nil
39 | }
40 | } else {
41 | pathSegments := strings.Split(parsedURL.Path, "/")
42 | return pathSegments[len(pathSegments)-1], nil
43 | }
44 |
45 | return "", fmt.Errorf("no video ID found")
46 | }
47 |
48 | func GetBilibiliVideoId(url string) string {
49 | re := regexp.MustCompile(`https://(?:www\.)?bilibili\.com/(?:video/|video/av\d+/)(BV[a-zA-Z0-9]+)`)
50 | matches := re.FindStringSubmatch(url)
51 | if len(matches) > 1 {
52 | // 返回匹配到的BV号
53 | return matches[1]
54 | }
55 | return ""
56 | }
57 |
58 | // 将浮点数秒数转换为HH:MM:SS,SSS格式的字符串
59 | func FormatTime(seconds float32) string {
60 | totalSeconds := int(math.Floor(float64(seconds))) // 获取总秒数
61 | milliseconds := int((seconds - float32(totalSeconds)) * 1000) // 获取毫秒部分
62 |
63 | hours := totalSeconds / 3600
64 | minutes := (totalSeconds % 3600) / 60
65 | secs := totalSeconds % 60
66 | return fmt.Sprintf("%02d:%02d:%02d,%03d", hours, minutes, secs, milliseconds)
67 | }
68 |
69 | // 判断字符串是否是纯数字(字幕编号)
70 | func IsNumber(s string) bool {
71 | _, err := strconv.Atoi(s)
72 | return err == nil
73 | }
74 |
75 | func Unzip(zipFile, destDir string) error {
76 | zipReader, err := zip.OpenReader(zipFile)
77 | if err != nil {
78 | return fmt.Errorf("打开zip文件失败: %v", err)
79 | }
80 | defer zipReader.Close()
81 |
82 | err = os.MkdirAll(destDir, 0755)
83 | if err != nil {
84 | return fmt.Errorf("创建目标目录失败: %v", err)
85 | }
86 |
87 | for _, file := range zipReader.File {
88 | filePath := filepath.Join(destDir, file.Name)
89 |
90 | if file.FileInfo().IsDir() {
91 | err := os.MkdirAll(filePath, file.Mode())
92 | if err != nil {
93 | return fmt.Errorf("创建目录失败: %v", err)
94 | }
95 | continue
96 | }
97 |
98 | destFile, err := os.Create(filePath)
99 | if err != nil {
100 | return fmt.Errorf("创建文件失败: %v", err)
101 | }
102 | defer destFile.Close()
103 |
104 | zipFileReader, err := file.Open()
105 | if err != nil {
106 | return fmt.Errorf("打开zip文件内容失败: %v", err)
107 | }
108 | defer zipFileReader.Close()
109 |
110 | _, err = io.Copy(destFile, zipFileReader)
111 | if err != nil {
112 | return fmt.Errorf("复制文件内容失败: %v", err)
113 | }
114 | }
115 |
116 | return nil
117 | }
118 |
119 | func GenerateID() string {
120 | return strings.ReplaceAll(uuid.New().String(), "-", "")
121 | }
122 |
123 | // ChangeFileExtension 修改文件后缀
124 | func ChangeFileExtension(path string, newExt string) string {
125 | ext := filepath.Ext(path)
126 | return path[:len(path)-len(ext)] + newExt
127 | }
128 |
129 | func CleanPunction(word string) string {
130 | return strings.TrimFunc(word, func(r rune) bool {
131 | return unicode.IsPunct(r)
132 | })
133 | }
134 |
135 | func IsAlphabetic(r rune) bool {
136 | if unicode.IsLetter(r) { // 中文在IsLetter中会返回true
137 | switch {
138 | // 英语及其他拉丁字母的范围
139 | case r >= 'A' && r <= 'Z', r >= 'a' && r <= 'z':
140 | return true
141 | // 扩展拉丁字母(法语、西班牙语等使用的附加字符)
142 | case r >= '\u00C0' && r <= '\u024F':
143 | return true
144 | // 希腊字母
145 | case r >= '\u0370' && r <= '\u03FF':
146 | return true
147 | // 西里尔字母(俄语等)
148 | case r >= '\u0400' && r <= '\u04FF':
149 | return true
150 | default:
151 | return false
152 | }
153 | }
154 | return false
155 | }
156 |
157 | func ContainsAlphabetic(text string) bool {
158 | for _, r := range text {
159 | if IsAlphabetic(r) {
160 | return true
161 | }
162 | }
163 | return false
164 | }
165 |
166 | // CopyFile 复制文件
167 | func CopyFile(src, dst string) error {
168 | sourceFile, err := os.Open(src)
169 | if err != nil {
170 | return err
171 | }
172 | defer sourceFile.Close()
173 |
174 | destinationFile, err := os.Create(dst)
175 | if err != nil {
176 | return err
177 | }
178 | defer destinationFile.Close()
179 |
180 | _, err = io.Copy(destinationFile, sourceFile)
181 | if err != nil {
182 | return err
183 | }
184 |
185 | return destinationFile.Sync()
186 | }
187 |
--------------------------------------------------------------------------------
/pkg/util/download.go:
--------------------------------------------------------------------------------
1 | package util
2 |
3 | import (
4 | "fmt"
5 | "go.uber.org/zap"
6 | "io"
7 | "krillin-ai/config"
8 | "krillin-ai/log"
9 | "net/http"
10 | "os"
11 | "time"
12 | )
13 |
14 | // 用于显示下载进度,实现io.Writer
15 | type progressWriter struct {
16 | Total uint64
17 | Downloaded uint64
18 | StartTime time.Time
19 | }
20 |
21 | func (pw *progressWriter) Write(p []byte) (int, error) {
22 | n := len(p)
23 | pw.Downloaded += uint64(n)
24 |
25 | // 初始化开始时间
26 | if pw.StartTime.IsZero() {
27 | pw.StartTime = time.Now()
28 | }
29 |
30 | percent := float64(pw.Downloaded) / float64(pw.Total) * 100
31 | elapsed := time.Since(pw.StartTime).Seconds()
32 | speed := float64(pw.Downloaded) / 1024 / 1024 / elapsed
33 |
34 | fmt.Printf("\r下载进度: %.2f%% (%.2f MB / %.2f MB) | 速度: %.2f MB/s",
35 | percent,
36 | float64(pw.Downloaded)/1024/1024,
37 | float64(pw.Total)/1024/1024,
38 | speed)
39 |
40 | return n, nil
41 | }
42 |
43 | // DownloadFile 下载文件并保存到指定路径,支持代理
44 | func DownloadFile(urlStr, filepath, proxyAddr string) error {
45 | log.GetLogger().Info("开始下载文件", zap.String("url", urlStr))
46 | client := &http.Client{}
47 | if proxyAddr != "" {
48 | client.Transport = &http.Transport{
49 | Proxy: http.ProxyURL(config.Conf.App.ParsedProxy),
50 | }
51 | }
52 |
53 | resp, err := client.Get(urlStr)
54 | if err != nil {
55 | return err
56 | }
57 | defer resp.Body.Close()
58 |
59 | size := resp.ContentLength
60 | fmt.Printf("文件大小: %.2f MB\n", float64(size)/1024/1024)
61 |
62 | out, err := os.Create(filepath)
63 | if err != nil {
64 | return err
65 | }
66 | defer out.Close()
67 |
68 | // 带有进度的 Reader
69 | progress := &progressWriter{
70 | Total: uint64(size),
71 | }
72 | reader := io.TeeReader(resp.Body, progress)
73 |
74 | _, err = io.Copy(out, reader)
75 | if err != nil {
76 | return err
77 | }
78 | fmt.Printf("\n") // 进度信息结束,换新行
79 |
80 | log.GetLogger().Info("文件下载完成", zap.String("路径", filepath))
81 | return nil
82 | }
83 |
--------------------------------------------------------------------------------
/pkg/util/language.go:
--------------------------------------------------------------------------------
1 | package util
2 |
--------------------------------------------------------------------------------
/pkg/util/subtitle.go:
--------------------------------------------------------------------------------
1 | package util
2 |
3 | import (
4 | "bufio"
5 | "fmt"
6 | "krillin-ai/internal/storage"
7 | "os"
8 | "os/exec"
9 | "path/filepath"
10 | "regexp"
11 | "strconv"
12 | "strings"
13 | "unicode"
14 | )
15 |
16 | // 处理每一个字幕块
17 | func ProcessBlock(block []string, targetLanguageFile, targetLanguageTextFile, originLanguageFile, originLanguageTextFile *os.File, isTargetOnTop bool) {
18 | var targetLines, originLines []string
19 | // 匹配时间戳的正则表达式
20 | timePattern := regexp.MustCompile(`\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}`)
21 | for _, line := range block {
22 | if timePattern.MatchString(line) || IsNumber(line) {
23 | // 时间戳和编号行保留在两个文件中
24 | targetLines = append(targetLines, line)
25 | originLines = append(originLines, line)
26 | continue
27 | }
28 | if len(targetLines) == 2 && len(originLines) == 2 { // 刚写完编号和时间戳,到了上方的文字行
29 | if isTargetOnTop {
30 | targetLines = append(targetLines, line)
31 | targetLanguageTextFile.WriteString(line) // 文稿文件
32 | } else {
33 | originLines = append(originLines, line)
34 | originLanguageTextFile.WriteString(line)
35 | }
36 | continue
37 | }
38 | // 到了下方的文字行
39 | if isTargetOnTop {
40 | originLines = append(originLines, line)
41 | originLanguageTextFile.WriteString(line)
42 | } else {
43 | targetLines = append(targetLines, line)
44 | targetLanguageTextFile.WriteString(line)
45 | }
46 | }
47 |
48 | if len(targetLines) > 2 {
49 | // 写入目标语言文件
50 | for _, line := range targetLines {
51 | targetLanguageFile.WriteString(line + "\n")
52 | }
53 | targetLanguageFile.WriteString("\n")
54 | }
55 |
56 | if len(originLines) > 2 {
57 | // 写入源语言文件
58 | for _, line := range originLines {
59 | originLanguageFile.WriteString(line + "\n")
60 | }
61 | originLanguageFile.WriteString("\n")
62 | }
63 | }
64 |
65 | // IsSubtitleText 是否是字幕文件中的字幕文字行
66 | func IsSubtitleText(line string) bool {
67 | if line == "" {
68 | return false
69 | }
70 | if IsNumber(line) {
71 | return false
72 | }
73 | timelinePattern := regexp.MustCompile(`\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}`)
74 | return !timelinePattern.MatchString(line)
75 | }
76 |
77 | type Format struct {
78 | Duration string `json:"duration"`
79 | }
80 |
81 | type ProbeData struct {
82 | Format Format `json:"format"`
83 | }
84 |
85 | type SrtBlock struct {
86 | Index int
87 | Timestamp string
88 | TargetLanguageSentence string
89 | OriginLanguageSentence string
90 | }
91 |
92 | func TrimString(s string) string {
93 | s = strings.Replace(s, "[中文翻译]", "", -1)
94 | s = strings.Replace(s, "[英文句子]", "", -1)
95 | // 去除开头的空格和 '['
96 | s = strings.TrimLeft(s, " [")
97 |
98 | // 去除结尾的空格和 ']'
99 | s = strings.TrimRight(s, " ]")
100 |
101 | //替换中文单引号
102 | s = strings.ReplaceAll(s, "’", "'")
103 |
104 | return s
105 | }
106 |
107 | func ParseSrtNoTsToSrtBlock(srtNoTsFile string) ([]*SrtBlock, error) {
108 | file, err := os.Open(srtNoTsFile)
109 | if err != nil {
110 | return nil, err
111 | }
112 | defer file.Close()
113 |
114 | var blocks []*SrtBlock
115 | var currentBlock SrtBlock
116 | scanner := bufio.NewScanner(file)
117 | start := true
118 |
119 | for scanner.Scan() {
120 | line := TrimString(scanner.Text())
121 | // 去掉最开始的描述
122 | if start && !IsNumber(line) {
123 | continue
124 | } else {
125 | start = false
126 | }
127 | if line == "" { // 空行表示一个块的结束
128 | if currentBlock.Index != 0 {
129 | cur := currentBlock
130 | blocks = append(blocks, &cur)
131 | currentBlock = SrtBlock{} // 重置
132 | }
133 | continue
134 | }
135 |
136 | if currentBlock.Index == 0 { // 按文件内容依次赋值
137 | var index int
138 | _, err = fmt.Sscanf(line, "%d", &index)
139 | if err != nil {
140 | return blocks, nil
141 | } // 可能是空语音等,直接忽略
142 | currentBlock.Index = index
143 | } else if currentBlock.TargetLanguageSentence == "" {
144 | currentBlock.TargetLanguageSentence = line
145 | } else if currentBlock.OriginLanguageSentence == "" {
146 | currentBlock.OriginLanguageSentence = line
147 | }
148 | }
149 | // 最后的块
150 | if currentBlock.Index != 0 {
151 | cur := currentBlock
152 | blocks = append(blocks, &cur)
153 | }
154 |
155 | if err = scanner.Err(); err != nil {
156 | return nil, err
157 | }
158 | return blocks, nil
159 | }
160 |
161 | func SplitSentence(sentence string) []string {
162 | // 使用正则表达式移除标点符号和特殊字符(保留各语言字母、数字和空格)
163 | re := regexp.MustCompile(`[^\p{L}\p{N}\s']+`)
164 | cleanedSentence := re.ReplaceAllString(sentence, " ")
165 |
166 | // 使用 strings.Fields 按空格拆分成单词
167 | words := strings.Fields(cleanedSentence)
168 |
169 | return words
170 | }
171 |
172 | func MergeFile(finalFile string, files ...string) error {
173 | // 创建最终文件
174 | final, err := os.Create(finalFile)
175 | if err != nil {
176 | return err
177 | }
178 |
179 | // 逐个读取文件并写入最终文件
180 | for _, file := range files {
181 | f, err := os.Open(file)
182 | if err != nil {
183 | return err
184 | }
185 | defer f.Close()
186 |
187 | scanner := bufio.NewScanner(f)
188 | for scanner.Scan() {
189 | line := scanner.Text()
190 | final.WriteString(line + "\n")
191 | }
192 | }
193 |
194 | return nil
195 | }
196 |
197 | func MergeSrtFiles(finalFile string, files ...string) error {
198 | output, err := os.Create(finalFile)
199 | if err != nil {
200 | return err
201 | }
202 | defer output.Close()
203 | writer := bufio.NewWriter(output)
204 | lineNumber := 0
205 | for _, file := range files {
206 | // 不存在某一个file就跳过
207 | if _, err = os.Stat(file); os.IsNotExist(err) {
208 | continue
209 | }
210 | // 打开当前字幕文件
211 | f, err := os.Open(file)
212 | if err != nil {
213 | return err
214 | }
215 | defer f.Close()
216 | // 处理当前字幕文件
217 | scanner := bufio.NewScanner(f)
218 | for scanner.Scan() {
219 | line := scanner.Text()
220 |
221 | if strings.Contains(line, "```") {
222 | continue
223 | }
224 |
225 | if IsNumber(line) {
226 | lineNumber++
227 | line = strconv.Itoa(lineNumber)
228 | }
229 |
230 | writer.WriteString(line + "\n")
231 | }
232 | }
233 | writer.Flush()
234 |
235 | return nil
236 | }
237 |
238 | // 给定文件和替换map,将文件中所有的key替换成value
239 | func ReplaceFileContent(srcFile, dstFile string, replacements map[string]string) error {
240 | file, err := os.Open(srcFile)
241 | if err != nil {
242 | return err
243 | }
244 | defer file.Close()
245 |
246 | outFile, err := os.Create(dstFile)
247 | if err != nil {
248 | return err
249 | }
250 | defer outFile.Close()
251 |
252 | scanner := bufio.NewScanner(file)
253 | writer := bufio.NewWriter(outFile) // 提高性能
254 | defer writer.Flush()
255 |
256 | for scanner.Scan() {
257 | line := scanner.Text()
258 | for before, after := range replacements {
259 | line = strings.ReplaceAll(line, before, after)
260 | }
261 | _, _ = writer.WriteString(line + "\n")
262 | }
263 |
264 | if err = scanner.Err(); err != nil {
265 | return err
266 | }
267 |
268 | return nil
269 | }
270 |
271 | // 获得文件名后加上后缀的新文件名,不改变扩展名,例如:/home/ubuntu/abc.srt变成/home/ubuntu/abc_tmp.srt
272 | func AddSuffixToFileName(filePath, suffix string) string {
273 | dir := filepath.Dir(filePath)
274 | ext := filepath.Ext(filePath)
275 | name := strings.TrimSuffix(filepath.Base(filePath), ext)
276 | newName := fmt.Sprintf("%s%s%s", name, suffix, ext)
277 | return filepath.Join(dir, newName)
278 | }
279 |
280 | // 去除字符串中的标点符号等字符,确保字符中的内容都是whisper模型可以识别出来的,便于时间戳对齐
281 | func GetRecognizableString(s string) string {
282 | var result []rune
283 | for _, v := range s {
284 | // 英文字母和数字
285 | if unicode.Is(unicode.Latin, v) || unicode.Is(unicode.Number, v) {
286 | result = append(result, v)
287 | }
288 | // 中文
289 | if unicode.Is(unicode.Han, v) {
290 | result = append(result, v)
291 | }
292 | // 韩文
293 | if unicode.Is(unicode.Hangul, v) {
294 | result = append(result, v)
295 | }
296 | // 日文平假片假
297 | if unicode.Is(unicode.Hiragana, v) || unicode.Is(unicode.Katakana, v) {
298 | result = append(result, v)
299 | }
300 | }
301 | return string(result)
302 | }
303 |
304 | func GetAudioDuration(inputFile string) (float64, error) {
305 | // 使用 ffprobe 获取精确时长
306 | cmd := exec.Command(storage.FfprobePath, "-i", inputFile, "-show_entries", "format=duration", "-v", "quiet", "-of", "csv=p=0")
307 | cmdOutput, err := cmd.Output()
308 | if err != nil {
309 | return 0, fmt.Errorf("GetAudioDuration failed to get audio duration: %w", err)
310 | }
311 |
312 | // 解析时长
313 | duration, err := strconv.ParseFloat(strings.TrimSpace(string(cmdOutput)), 64)
314 | if err != nil {
315 | return 0, fmt.Errorf("GetAudioDuration failed to parse audio duration: %w", err)
316 | }
317 |
318 | return duration, nil
319 | }
320 |
--------------------------------------------------------------------------------
/pkg/whisper/init.go:
--------------------------------------------------------------------------------
1 | package whisper
2 |
3 | import (
4 | "github.com/sashabaranov/go-openai"
5 | "krillin-ai/config"
6 | "net/http"
7 | )
8 |
9 | type Client struct {
10 | client *openai.Client
11 | }
12 |
13 | func NewClient(baseUrl, apiKey, proxyAddr string) *Client {
14 | cfg := openai.DefaultConfig(apiKey)
15 | if baseUrl != "" {
16 | cfg.BaseURL = baseUrl
17 | }
18 |
19 | if proxyAddr != "" {
20 | transport := &http.Transport{
21 | Proxy: http.ProxyURL(config.Conf.App.ParsedProxy),
22 | }
23 | cfg.HTTPClient = &http.Client{
24 | Transport: transport,
25 | }
26 | }
27 |
28 | client := openai.NewClientWithConfig(cfg)
29 | return &Client{client: client}
30 | }
31 |
--------------------------------------------------------------------------------
/pkg/whisper/whisper.go:
--------------------------------------------------------------------------------
1 | package whisper
2 |
3 | import (
4 | "context"
5 | "github.com/sashabaranov/go-openai"
6 | "go.uber.org/zap"
7 | "krillin-ai/internal/types"
8 | "krillin-ai/log"
9 | "strings"
10 | )
11 |
12 | func (c *Client) Transcription(audioFile, language, workDir string) (*types.TranscriptionData, error) {
13 | resp, err := c.client.CreateTranscription(
14 | context.Background(),
15 | openai.AudioRequest{
16 | Model: openai.Whisper1,
17 | FilePath: audioFile,
18 | Format: openai.AudioResponseFormatVerboseJSON,
19 | TimestampGranularities: []openai.TranscriptionTimestampGranularity{
20 | openai.TranscriptionTimestampGranularityWord,
21 | },
22 | Language: language,
23 | },
24 | )
25 | if err != nil {
26 | log.GetLogger().Error("openai create transcription failed", zap.Error(err))
27 | return nil, err
28 | }
29 |
30 | transcriptionData := &types.TranscriptionData{
31 | Language: resp.Language,
32 | Text: strings.ReplaceAll(resp.Text, "-", " "), // 连字符处理,因为模型存在很多错误添加到连字符
33 | Words: make([]types.Word, 0),
34 | }
35 | num := 0
36 | for _, word := range resp.Words {
37 | if strings.Contains(word.Word, "—") {
38 | // 对称切分
39 | mid := (word.Start + word.End) / 2
40 | seperatedWords := strings.Split(word.Word, "—")
41 | transcriptionData.Words = append(transcriptionData.Words, []types.Word{
42 | {
43 | Num: num,
44 | Text: seperatedWords[0],
45 | Start: word.Start,
46 | End: mid,
47 | },
48 | {
49 | Num: num + 1,
50 | Text: seperatedWords[1],
51 | Start: mid,
52 | End: word.End,
53 | },
54 | }...)
55 | num += 2
56 | } else {
57 | transcriptionData.Words = append(transcriptionData.Words, types.Word{
58 | Num: num,
59 | Text: word.Word,
60 | Start: word.Start,
61 | End: word.End,
62 | })
63 | num++
64 | }
65 | }
66 |
67 | return transcriptionData, nil
68 | }
69 |
--------------------------------------------------------------------------------
/pkg/whispercpp/init.go:
--------------------------------------------------------------------------------
1 | package whispercpp
2 |
3 | type WhispercppProcessor struct {
4 | WorkDir string // 生成中间文件的目录
5 | Model string
6 | }
7 |
8 | func NewWhispercppProcessor(model string) *WhispercppProcessor {
9 | return &WhispercppProcessor{
10 | Model: model,
11 | }
12 | }
13 |
--------------------------------------------------------------------------------
/pkg/whispercpp/transcription.go:
--------------------------------------------------------------------------------
1 | package whispercpp
2 |
3 | import (
4 | "encoding/json"
5 | "fmt"
6 | "krillin-ai/internal/storage"
7 | "krillin-ai/internal/types"
8 | "krillin-ai/log"
9 | "krillin-ai/pkg/util"
10 | "os"
11 | "os/exec"
12 | "regexp"
13 | "strconv"
14 | "strings"
15 |
16 | "go.uber.org/zap"
17 | )
18 |
19 | func (c *WhispercppProcessor) Transcription(audioFile, language, workDir string) (*types.TranscriptionData, error) {
20 | name := util.ChangeFileExtension(audioFile, "")
21 | cmdArgs := []string{
22 | "-m", fmt.Sprintf("./models/whispercpp/ggml-%s.bin", c.Model),
23 | "--output-json-full",
24 | "--flash-attn",
25 | "--split-on-word",
26 | "--language", language,
27 | "--output-file", name,
28 | "--file", audioFile,
29 | }
30 | cmd := exec.Command(storage.WhispercppPath, cmdArgs...)
31 | log.GetLogger().Info("WhispercppProcessor转录开始", zap.String("cmd", cmd.String()))
32 | output, err := cmd.CombinedOutput()
33 | if err != nil && !strings.Contains(string(output), "output_json: saving output to") {
34 | log.GetLogger().Error("WhispercppProcessor cmd 执行失败", zap.String("output", string(output)), zap.Error(err))
35 | return nil, err
36 | }
37 | log.GetLogger().Info("WhispercppProcessor转录json生成完毕", zap.String("audio file", audioFile))
38 |
39 | var result types.WhispercppOutput
40 | fileData, err := os.Open(util.ChangeFileExtension(audioFile, ".json"))
41 | if err != nil {
42 | log.GetLogger().Error("WhispercppProcessor 打开json文件失败", zap.Error(err))
43 | return nil, err
44 | }
45 | defer fileData.Close()
46 | decoder := json.NewDecoder(fileData)
47 | if err = decoder.Decode(&result); err != nil {
48 | log.GetLogger().Error("WhispercppProcessor 解析json文件失败", zap.Error(err))
49 | return nil, err
50 | }
51 |
52 | var (
53 | transcriptionData types.TranscriptionData
54 | num int
55 | )
56 | for _, segment := range result.Transcription {
57 | transcriptionData.Text += strings.ReplaceAll(segment.Text, "—", " ") // 连字符处理,因为模型存在很多错误添加到连字符
58 | for _, word := range segment.Tokens {
59 | fromSec, err := parseTimestampToSeconds(word.Timestamps.From)
60 | if err != nil {
61 | log.GetLogger().Error("解析开始时间失败", zap.Error(err))
62 | return nil, err
63 | }
64 |
65 | toSec, err := parseTimestampToSeconds(word.Timestamps.To)
66 | if err != nil {
67 | log.GetLogger().Error("解析结束时间失败", zap.Error(err))
68 | return nil, err
69 | }
70 | regex := regexp.MustCompile(`^\[.*\]$`)
71 | if regex.MatchString(word.Text) {
72 | continue
73 | } else if strings.Contains(word.Text, "—") {
74 | // 对称切分
75 | mid := (fromSec + toSec) / 2
76 | seperatedWords := strings.Split(word.Text, "—")
77 | transcriptionData.Words = append(transcriptionData.Words, []types.Word{
78 | {
79 | Num: num,
80 | Text: util.CleanPunction(strings.TrimSpace(seperatedWords[0])),
81 | Start: fromSec,
82 | End: mid,
83 | },
84 | {
85 | Num: num + 1,
86 | Text: util.CleanPunction(strings.TrimSpace(seperatedWords[1])),
87 | Start: mid,
88 | End: toSec,
89 | },
90 | }...)
91 | num += 2
92 | } else {
93 | transcriptionData.Words = append(transcriptionData.Words, types.Word{
94 | Num: num,
95 | Text: util.CleanPunction(strings.TrimSpace(word.Text)),
96 | Start: fromSec,
97 | End: toSec,
98 | })
99 | num++
100 | }
101 | }
102 | }
103 | log.GetLogger().Info("WhispercppProcessor转录成功")
104 | return &transcriptionData, nil
105 | }
106 |
107 | // 新增时间戳转换函数
108 | func parseTimestampToSeconds(timeStr string) (float64, error) {
109 | parts := strings.Split(timeStr, ",")
110 | if len(parts) != 2 {
111 | return 0, fmt.Errorf("invalid timestamp format: %s", timeStr)
112 | }
113 |
114 | timePart := strings.Split(parts[0], ":")
115 | if len(timePart) != 3 {
116 | return 0, fmt.Errorf("invalid time format: %s", parts[0])
117 | }
118 |
119 | hours, _ := strconv.Atoi(timePart[0])
120 | minutes, _ := strconv.Atoi(timePart[1])
121 | seconds, _ := strconv.Atoi(timePart[2])
122 | milliseconds, _ := strconv.Atoi(parts[1])
123 |
124 | return float64(hours*3600+minutes*60+seconds) + float64(milliseconds)/1000, nil
125 | }
126 |
--------------------------------------------------------------------------------
/pkg/whisperkit/init.go:
--------------------------------------------------------------------------------
1 | package whisperkit
2 |
3 | type WhisperKitProcessor struct {
4 | WorkDir string // 生成中间文件的目录
5 | Model string
6 | }
7 |
8 | func NewWhisperKitProcessor(model string) *WhisperKitProcessor {
9 | return &WhisperKitProcessor{
10 | Model: model,
11 | }
12 | }
13 |
--------------------------------------------------------------------------------
/pkg/whisperkit/transcription.go:
--------------------------------------------------------------------------------
1 | package whisperkit
2 |
3 | import (
4 | "encoding/json"
5 | "krillin-ai/internal/storage"
6 | "krillin-ai/internal/types"
7 | "krillin-ai/log"
8 | "krillin-ai/pkg/util"
9 | "os"
10 | "os/exec"
11 | "strings"
12 |
13 | "go.uber.org/zap"
14 | )
15 |
16 | func (c *WhisperKitProcessor) Transcription(audioFile, language, workDir string) (*types.TranscriptionData, error) {
17 | cmdArgs := []string{
18 | "transcribe",
19 | "--model-path", "./models/whisperkit/openai_whisper-large-v2",
20 | "--audio-encoder-compute-units", "all",
21 | "--text-decoder-compute-units", "all",
22 | "--language", language,
23 | "--report",
24 | "--report-path", workDir,
25 | "--word-timestamps",
26 | "--skip-special-tokens",
27 | "--audio-path", audioFile,
28 | }
29 | cmd := exec.Command(storage.WhisperKitPath, cmdArgs...)
30 | log.GetLogger().Info("WhisperKitProcessor转录开始", zap.String("cmd", cmd.String()))
31 | output, err := cmd.CombinedOutput()
32 | if err != nil {
33 | log.GetLogger().Error("WhisperKitProcessor cmd 执行失败", zap.String("output", string(output)), zap.Error(err))
34 | return nil, err
35 | }
36 | log.GetLogger().Info("WhisperKitProcessor转录json生成完毕", zap.String("audio file", audioFile))
37 |
38 | var result types.WhisperKitOutput
39 | fileData, err := os.Open(util.ChangeFileExtension(audioFile, ".json"))
40 | if err != nil {
41 | log.GetLogger().Error("WhisperKitProcessor 打开json文件失败", zap.Error(err))
42 | return nil, err
43 | }
44 | defer fileData.Close()
45 | decoder := json.NewDecoder(fileData)
46 | if err = decoder.Decode(&result); err != nil {
47 | log.GetLogger().Error("WhisperKitProcessor 解析json文件失败", zap.Error(err))
48 | return nil, err
49 | }
50 |
51 | var (
52 | transcriptionData types.TranscriptionData
53 | num int
54 | )
55 | for _, segment := range result.Segments {
56 | transcriptionData.Text += strings.ReplaceAll(segment.Text, "—", " ") // 连字符处理,因为模型存在很多错误添加到连字符
57 | for _, word := range segment.Words {
58 | if strings.Contains(word.Word, "—") {
59 | // 对称切分
60 | mid := (word.Start + word.End) / 2
61 | seperatedWords := strings.Split(word.Word, "—")
62 | transcriptionData.Words = append(transcriptionData.Words, []types.Word{
63 | {
64 | Num: num,
65 | Text: util.CleanPunction(strings.TrimSpace(seperatedWords[0])),
66 | Start: word.Start,
67 | End: mid,
68 | },
69 | {
70 | Num: num + 1,
71 | Text: util.CleanPunction(strings.TrimSpace(seperatedWords[1])),
72 | Start: mid,
73 | End: word.End,
74 | },
75 | }...)
76 | num += 2
77 | } else {
78 | transcriptionData.Words = append(transcriptionData.Words, types.Word{
79 | Num: num,
80 | Text: util.CleanPunction(strings.TrimSpace(word.Word)),
81 | Start: word.Start,
82 | End: word.End,
83 | })
84 | num++
85 | }
86 | }
87 | }
88 | log.GetLogger().Info("WhisperKitProcessor转录成功")
89 | return &transcriptionData, nil
90 | }
91 |
--------------------------------------------------------------------------------
/static/background.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/static/background.jpg
--------------------------------------------------------------------------------
/static/embed.go:
--------------------------------------------------------------------------------
1 | package static
2 |
3 | import "embed"
4 |
5 | //go:embed index.html background.jpg
6 | var EmbeddedFiles embed.FS
7 |
--------------------------------------------------------------------------------