├── .github └── workflows │ └── release.yml ├── .gitignore ├── .goreleaser.yaml ├── Dockerfile ├── LICENSE ├── README.md ├── cmd ├── desktop │ └── main.go └── server │ └── main.go ├── config ├── config-example.toml └── config.go ├── docs ├── README_ar.md ├── README_de.md ├── README_es.md ├── README_fr.md ├── README_jp.md ├── README_kr.md ├── README_pt.md ├── README_rus.md ├── README_vi.md ├── README_zh.md ├── aliyun.md ├── docker.md ├── faq.md ├── get_cookies.md └── images │ ├── alignment.png │ ├── aliyun_accesskey_1.png │ ├── aliyun_oss_1.png │ ├── aliyun_oss_2.png │ ├── aliyun_oss_3.png │ ├── aliyun_oss_4.png │ ├── aliyun_oss_5.png │ ├── aliyun_speech_1.png │ ├── aliyun_speech_2.png │ ├── aliyun_speech_3.png │ ├── aliyun_speech_4.png │ ├── bailian_1.png │ ├── export_cookies.png │ ├── logo.png │ ├── ui.jpg │ └── ui_desktop.png ├── go.mod ├── go.sum ├── internal ├── api │ └── subtitle.go ├── deps │ └── checker.go ├── desktop │ ├── components.go │ ├── desktop.go │ ├── file.go │ ├── subtitle.go │ ├── theme.go │ └── ui.go ├── dto │ └── subtitle_task.go ├── handler │ ├── init.go │ ├── middleware.go │ └── subtitle_task.go ├── response │ └── response.go ├── router │ └── router.go ├── server │ └── server.go ├── service │ ├── audio2subtitle.go │ ├── audio2subtitle_test.go │ ├── get_video_info.go │ ├── init.go │ ├── link2file.go │ ├── srt2speech.go │ ├── srt_embed.go │ ├── subtitle_service.go │ └── upload_subtitle.go ├── storage │ ├── bin.go │ └── subtitle_task.go └── types │ ├── embed_subtitle.go │ ├── fasterwhisper.go │ ├── interface.go │ ├── language.go │ ├── subtitle_task.go │ ├── whispercpp.go │ └── whisperkit.go ├── log └── zap.go ├── pkg ├── aliyun │ ├── asr.go │ ├── base.go │ ├── chat.go │ ├── oss.go │ ├── tts.go │ └── voice_clone.go ├── fasterwhisper │ ├── init.go │ └── transcription.go ├── openai │ ├── init.go │ └── openai.go ├── util │ ├── base.go │ ├── download.go │ ├── language.go │ └── subtitle.go ├── whisper │ ├── init.go │ └── whisper.go ├── whispercpp │ ├── init.go │ └── transcription.go └── whisperkit │ ├── init.go │ └── transcription.go └── static ├── background.jpg ├── embed.go └── index.html /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: release 2 | 3 | on: 4 | push: 5 | tags: 6 | - "v*" 7 | - "v*-*" 8 | 9 | permissions: 10 | contents: write 11 | 12 | jobs: 13 | build-desktop: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: Checkout code 17 | uses: actions/checkout@v4 18 | 19 | - name: Set up Go 20 | uses: actions/setup-go@v5 21 | with: 22 | go-version-file: "go.mod" 23 | 24 | - name: Install XGO 25 | run: | 26 | go install src.techknowlogick.com/xgo@latest 27 | 28 | - name: Pull Docker Image 29 | run: | 30 | docker pull ghcr.io/techknowlogick/xgo:latest 31 | 32 | - name: Get Version 33 | id: version 34 | uses: actions/github-script@v7 35 | with: 36 | script: | 37 | const rawTag = '${{ github.ref_name }}'; 38 | const version = rawTag.replace(/^v/, ''); // Remove the leading 'v' if present 39 | console.log(`Version: ${version}`); 40 | core.setOutput('version', version); 41 | 42 | - name: Build Binary 43 | run: | 44 | targets=( 45 | # macOS (amd64) 46 | "darwin amd64 _amd64 macOS" 47 | # macOS (arm64) 48 | "darwin arm64 _arm64 macOS" 49 | # Windows (amd64) 50 | "windows amd64 .exe Windows" 51 | # Windows (386) 52 | "windows 386 _i386.exe Windows" 53 | ) 54 | mkdir -p build 55 | 56 | # 遍历所有平台 57 | for entry in "${targets[@]}"; do 58 | ( 59 | # 拆分字符串 60 | IFS=' ' read -r -a parts <<< "$entry" 61 | os="${parts[0]}" 62 | arch="${parts[1]}" 63 | suffix="${parts[2]}" 64 | display_os="${parts[3]}" 65 | log_prefix="[${os}-${arch}]" 66 | # 构建目标目录 67 | target_dir="dist/${os}_${arch}" 68 | mkdir -p "$target_dir" 69 | # 使用 xgo 构建 70 | echo "${log_prefix} 🚀 Building for $os/$arch..." 71 | xgo \ 72 | --targets="$os/$arch" \ 73 | --out "krillinai_desktop" \ 74 | --dest "$target_dir" \ 75 | ./cmd/desktop 2>&1 | sed "s/^/${log_prefix} /" 76 | # 生成最终二进制文件名日志输 77 | binary_name="KrillinAI_${{ steps.version.outputs.version }}_Desktop_${display_os}${suffix}" 78 | # 移动并重命名文件 79 | mv "$target_dir"/krillinai_desktop* "build/$binary_name" 80 | echo "${log_prefix} ✅ Built: build/$binary_name" 81 | ) & 82 | done 83 | 84 | wait 85 | echo "✨ All concurrent tasks completed!" 86 | 87 | - name: Upload artifacts 88 | uses: actions/upload-artifact@v4 89 | with: 90 | path: build/* 91 | retention-days: 1 92 | 93 | goreleaser: 94 | needs: build-desktop 95 | if: always() 96 | runs-on: ubuntu-latest 97 | steps: 98 | - name: Set up QEMU 99 | uses: docker/setup-qemu-action@v3 100 | 101 | - name: Set up Docker Buildx 102 | uses: docker/setup-buildx-action@v3 103 | 104 | - name: Checkout code 105 | uses: actions/checkout@v4 106 | with: 107 | fetch-depth: 20 108 | 109 | - name: Download artifacts 110 | uses: actions/download-artifact@v4 111 | with: 112 | path: build 113 | 114 | - name: Set up Go 115 | uses: actions/setup-go@v5 116 | with: 117 | go-version-file: "go.mod" 118 | 119 | - name: Login to Docker Hub 120 | uses: docker/login-action@v3 121 | with: 122 | username: ${{ secrets.DOCKER_USERNAME }} 123 | password: ${{ secrets.DOCKERHUB_TOKEN }} 124 | 125 | - name: Run GoReleaser 126 | uses: goreleaser/goreleaser-action@v6 127 | with: 128 | distribution: goreleaser 129 | version: latest 130 | args: release --clean 131 | env: 132 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 133 | DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} 134 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | tasks/ 3 | cookies.txt 4 | .vscode/ 5 | config/config.toml 6 | bin/ 7 | models/ 8 | uploads/ 9 | app.log 10 | build/ 11 | dist/ 12 | 13 | # MACOS 14 | .DS_Store 15 | ._* 16 | -------------------------------------------------------------------------------- /.goreleaser.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | builds: 4 | - env: 5 | - CGO_ENABLED=0 6 | goos: 7 | - darwin 8 | - linux 9 | - windows 10 | main: ./cmd/server/main.go 11 | 12 | # Docker 构建配置 13 | dockers: 14 | - image_templates: 15 | - "{{ tolower .Env.DOCKER_USERNAME }}/{{ tolower .ProjectName }}:{{.Version}}-amd64" 16 | - "{{ tolower .Env.DOCKER_USERNAME }}/{{ tolower .ProjectName }}:latest-amd64" 17 | dockerfile: Dockerfile 18 | use: buildx 19 | build_flag_templates: 20 | - "--platform=linux/amd64" 21 | - "--label=org.opencontainers.image.created={{.Date}}" 22 | - "--label=org.opencontainers.image.title={{.ProjectName}}" 23 | - "--label=org.opencontainers.image.revision={{.FullCommit}}" 24 | - "--label=org.opencontainers.image.version={{.Version}}" 25 | - "--label=org.opencontainers.image.source=https://github.com/{{ .Env.GITHUB_REPOSITORY_OWNER }}/{{.ProjectName}}" 26 | - image_templates: 27 | - "{{ tolower .Env.DOCKER_USERNAME }}/{{ tolower .ProjectName }}:{{.Version}}-arm64" 28 | - "{{ tolower .Env.DOCKER_USERNAME }}/{{ tolower .ProjectName }}:latest-arm64" 29 | dockerfile: Dockerfile 30 | use: buildx 31 | build_flag_templates: 32 | - "--platform=linux/arm64" 33 | - "--label=org.opencontainers.image.created={{.Date}}" 34 | - "--label=org.opencontainers.image.title={{.ProjectName}}" 35 | - "--label=org.opencontainers.image.revision={{.FullCommit}}" 36 | - "--label=org.opencontainers.image.version={{.Version}}" 37 | - "--label=org.opencontainers.image.source=https://github.com/{{ .Env.GITHUB_REPOSITORY_OWNER }}/{{.ProjectName}}" 38 | 39 | docker_manifests: 40 | - name_template: "{{ tolower .Env.DOCKER_USERNAME }}/{{ tolower .ProjectName }}:{{.Version}}" 41 | image_templates: 42 | - "{{ tolower .Env.DOCKER_USERNAME }}/{{ tolower .ProjectName }}:{{.Version}}-amd64" 43 | - "{{ tolower .Env.DOCKER_USERNAME }}/{{ tolower .ProjectName }}:{{.Version}}-arm64" 44 | - name_template: "{{ tolower .Env.DOCKER_USERNAME }}/{{ tolower .ProjectName }}:latest" 45 | image_templates: 46 | - "{{ tolower .Env.DOCKER_USERNAME }}/{{ tolower .ProjectName }}:latest-amd64" 47 | - "{{ tolower .Env.DOCKER_USERNAME }}/{{ tolower .ProjectName }}:latest-arm64" 48 | 49 | archives: 50 | - formats: ["binary"] 51 | name_template: >- 52 | {{ .ProjectName }}_ 53 | {{- .Version }}_ 54 | {{- if eq .Os "darwin" }}macOS_{{ .Arch }} 55 | {{- else if and (eq .Os "windows") (eq .Arch "amd64") }}{{ title .Os }} 56 | {{- else }}{{ title .Os }}_ 57 | {{- if eq .Arch "amd64" }}x86_64 58 | {{- else if eq .Arch "386" }}i386 59 | {{- else }}{{ .Arch }}{{ end }} 60 | {{- if .Arm }}v{{ .Arm }}{{ end }} 61 | {{- end }} 62 | 63 | release: 64 | extra_files: 65 | - glob: "build/*" 66 | 67 | changelog: 68 | sort: asc 69 | filters: 70 | exclude: 71 | - "^docs:" 72 | - "^test:" 73 | - "^chore:" 74 | - "^ci:" 75 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine:latest as builder 2 | 3 | # 安装基础工具并创建目录 4 | WORKDIR /build 5 | RUN apk add --no-cache wget && \ 6 | mkdir -p bin && \ 7 | ARCH=$(uname -m) && \ 8 | case "$ARCH" in \ 9 | x86_64) \ 10 | URL="https://github.com/yt-dlp/yt-dlp/releases/download/2025.01.15/yt-dlp_linux"; \ 11 | ;; \ 12 | armv7l) \ 13 | URL="https://github.com/yt-dlp/yt-dlp/releases/download/2025.01.15/yt-dlp_linux_armv7l"; \ 14 | ;; \ 15 | aarch64) \ 16 | URL="https://github.com/yt-dlp/yt-dlp/releases/download/2025.01.15/yt-dlp_linux_aarch64"; \ 17 | ;; \ 18 | *) \ 19 | echo "Unsupported architecture: $ARCH" && exit 1; \ 20 | ;; \ 21 | esac && \ 22 | wget -O bin/yt-dlp "$URL" && \ 23 | chmod +x bin/yt-dlp 24 | 25 | # 最终镜像 26 | FROM jrottenberg/ffmpeg:6.1-alpine 27 | 28 | # 设置工作目录并复制文件 29 | WORKDIR /app 30 | COPY --from=builder /build/bin /app/bin 31 | COPY KrillinAI ./ 32 | 33 | # 创建必要目录并设置权限 34 | RUN mkdir -p /app/models && \ 35 | chmod +x ./KrillinAI 36 | 37 | # 声明卷 38 | VOLUME ["/app/bin", "/app/models"] 39 | 40 | # 设置环境变量 41 | ENV PATH="/app/bin:${PATH}" 42 | 43 | # 设置端口 44 | EXPOSE 8888/tcp 45 | 46 | # 设置入口点 47 | ENTRYPOINT ["./KrillinAI"] -------------------------------------------------------------------------------- /cmd/desktop/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "go.uber.org/zap" 5 | "krillin-ai/config" 6 | "krillin-ai/internal/desktop" 7 | "krillin-ai/internal/server" 8 | "krillin-ai/log" 9 | "os" 10 | ) 11 | 12 | func main() { 13 | log.InitLogger() 14 | defer log.GetLogger().Sync() 15 | 16 | config.LoadConfig() 17 | if config.Conf.App.TranscribeProvider == "" || config.Conf.App.LlmProvider == "" { 18 | // 确保有最基础的配置 19 | config.Conf.App.TranscribeProvider = "openai" 20 | config.Conf.App.LlmProvider = "openai" 21 | err := config.SaveConfig() 22 | if err != nil { 23 | log.GetLogger().Error("保存配置失败", zap.Error(err)) 24 | os.Exit(1) 25 | } 26 | } 27 | config.LoadConfig() 28 | go func() { 29 | if err := server.StartBackend(); err != nil { 30 | log.GetLogger().Error("后端服务启动失败", zap.Error(err)) 31 | os.Exit(1) 32 | } 33 | }() 34 | desktop.Show() 35 | } 36 | -------------------------------------------------------------------------------- /cmd/server/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "go.uber.org/zap" 5 | "krillin-ai/config" 6 | "krillin-ai/internal/deps" 7 | "krillin-ai/internal/server" 8 | "krillin-ai/log" 9 | "os" 10 | ) 11 | 12 | func main() { 13 | log.InitLogger() 14 | defer log.GetLogger().Sync() 15 | 16 | var err error 17 | config.LoadConfig() 18 | 19 | if err = config.CheckConfig(); err != nil { 20 | log.GetLogger().Error("加载配置失败", zap.Error(err)) 21 | return 22 | } 23 | 24 | if err = deps.CheckDependency(); err != nil { 25 | log.GetLogger().Error("依赖环境准备失败", zap.Error(err)) 26 | return 27 | } 28 | if err = server.StartBackend(); err != nil { 29 | log.GetLogger().Error("后端服务启动失败", zap.Error(err)) 30 | os.Exit(1) 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /config/config-example.toml: -------------------------------------------------------------------------------- 1 | [app] 2 | segment_duration = 5 # 音频切分处理间隔,单位:分钟,建议值:5-10,如果视频中话语较少可以适当提高 3 | translate_parallel_num = 5 # 并发进行模型转录和翻译的数量上限,建议值:5,如果使用了本地模型,该项自动不生效 4 | proxy = "" # 网络代理地址,格式如http://127.0.0.1:7890,可不填 5 | transcribe_provider = "openai" # 语音识别,当前可选值:openai,fasterwhisper,whisperkit,whisper.cpp,aliyun。(fasterwhisper不支持macOS,whisperkit只支持M芯片) 6 | llm_provider = "openai" # LLM,当前可选值:openai,aliyun 7 | 8 | [server] 9 | host = "127.0.0.1" 10 | port = 8888 11 | 12 | # 下方的配置非必填,请结合上方的选项和文档说明进行配置 13 | [local_model] 14 | fasterwhisper = "large-v2" # fasterwhisper的本地模型可选值:tiny,medium,large-v2,建议medium及以上 15 | whisperkit = "large-v2" # whisperkit的本地模型可选值:large-v2 16 | whispercpp = "large-v2" # whisper.cpp的本地模型 17 | 18 | [openai] 19 | base_url = "" # OpenAI API 自定义base url,可配合转发站密钥使用,留空为默认API地址 20 | model = "" # 指定模型名,可通过此字段结合base_url使用外部任何与OpenAI API兼容的大模型服务,留空默认为gpt-4o-mini 21 | api_key = "sk-XXX" # OpenAI API密钥 22 | [openai.whisper] # 由于使用whisperAPI进行语音识别时,上方可能配置使用了OpenAI格式兼容的其它厂商的模型,所以此处需要独立填入openai的配置信息 23 | base_url = "" 24 | api_key = "" 25 | 26 | [aliyun] # 具体请参考文档中的“阿里云配置说明” 27 | [aliyun.oss] 28 | access_key_id = "" 29 | access_key_secret = "" 30 | bucket = "" 31 | [aliyun.speech] 32 | access_key_id = "" 33 | access_key_secret = "" 34 | app_key= "" 35 | [aliyun.bailian] 36 | api_key = "" -------------------------------------------------------------------------------- /config/config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "github.com/BurntSushi/toml" 7 | "go.uber.org/zap" 8 | "krillin-ai/log" 9 | "net/url" 10 | "os" 11 | "path/filepath" 12 | "runtime" 13 | ) 14 | 15 | type App struct { 16 | SegmentDuration int `toml:"segment_duration"` 17 | TranslateParallelNum int `toml:"translate_parallel_num"` 18 | Proxy string `toml:"proxy"` 19 | ParsedProxy *url.URL `toml:"-"` 20 | TranscribeProvider string `toml:"transcribe_provider"` 21 | LlmProvider string `toml:"llm_provider"` 22 | } 23 | 24 | type Server struct { 25 | Host string `toml:"host"` 26 | Port int `toml:"port"` 27 | } 28 | 29 | type LocalModel struct { 30 | Fasterwhisper string `toml:"fasterwhisper"` 31 | Whisperkit string `toml:"whisperkit"` 32 | Whispercpp string `toml:"whispercpp"` 33 | } 34 | 35 | type OpenAiWhisper struct { 36 | BaseUrl string `toml:"base_url"` 37 | ApiKey string `toml:"api_key"` 38 | } 39 | 40 | type Openai struct { 41 | BaseUrl string `toml:"base_url"` 42 | Model string `toml:"model"` 43 | ApiKey string `toml:"api_key"` 44 | Whisper OpenAiWhisper `toml:"whisper"` 45 | } 46 | 47 | type AliyunOss struct { 48 | AccessKeyId string `toml:"access_key_id"` 49 | AccessKeySecret string `toml:"access_key_secret"` 50 | Bucket string `toml:"bucket"` 51 | } 52 | 53 | type AliyunSpeech struct { 54 | AccessKeyId string `toml:"access_key_id"` 55 | AccessKeySecret string `toml:"access_key_secret"` 56 | AppKey string `toml:"app_key"` 57 | } 58 | 59 | type AliyunBailian struct { 60 | ApiKey string `toml:"api_key"` 61 | } 62 | 63 | type Aliyun struct { 64 | Oss AliyunOss `toml:"oss"` 65 | Speech AliyunSpeech `toml:"speech"` 66 | Bailian AliyunBailian `toml:"bailian"` 67 | } 68 | 69 | type Config struct { 70 | App App `toml:"app"` 71 | Server Server `toml:"server"` 72 | LocalModel LocalModel `toml:"local_model"` 73 | Openai Openai `toml:"openai"` 74 | Aliyun Aliyun `toml:"aliyun"` 75 | } 76 | 77 | var Conf = Config{ 78 | App: App{ 79 | SegmentDuration: 5, 80 | TranslateParallelNum: 5, 81 | TranscribeProvider: "openai", 82 | LlmProvider: "openai", 83 | }, 84 | Server: Server{ 85 | Host: "127.0.0.1", 86 | Port: 8888, 87 | }, 88 | LocalModel: LocalModel{ 89 | Fasterwhisper: "large-v2", 90 | Whisperkit: "large-v2", 91 | Whispercpp: "large-v2", 92 | }, 93 | } 94 | 95 | // 检查必要的配置是否完整 96 | func validateConfig() error { 97 | // 检查转写服务提供商配置 98 | switch Conf.App.TranscribeProvider { 99 | case "openai": 100 | if Conf.Openai.Whisper.ApiKey == "" { 101 | return errors.New("使用OpenAI转写服务需要配置 OpenAI API Key") 102 | } 103 | case "fasterwhisper": 104 | if Conf.LocalModel.Fasterwhisper != "tiny" && Conf.LocalModel.Fasterwhisper != "medium" && Conf.LocalModel.Fasterwhisper != "large-v2" { 105 | return errors.New("检测到开启了fasterwhisper,但模型选型配置不正确,请检查配置") 106 | } 107 | case "whisperkit": 108 | Conf.App.TranslateParallelNum = 1 109 | if runtime.GOOS != "darwin" { 110 | log.GetLogger().Error("whisperkit只支持macos", zap.String("当前系统", runtime.GOOS)) 111 | return fmt.Errorf("whisperkit只支持macos") 112 | } 113 | if Conf.LocalModel.Whisperkit != "large-v2" { 114 | return errors.New("检测到开启了whisperkit,但模型选型配置不正确,请检查配置") 115 | } 116 | case "whispercpp": 117 | if runtime.GOOS != "windows" { // 当前先仅支持win,模型仅支持large-v2,最小化产品 118 | log.GetLogger().Error("whispercpp only support windows", zap.String("current os", runtime.GOOS)) 119 | return fmt.Errorf("whispercpp only support windows") 120 | } 121 | if Conf.LocalModel.Whispercpp != "large-v2" { 122 | return errors.New("检测到开启了whisper.cpp,但模型选型配置不正确,请检查配置") 123 | } 124 | case "aliyun": 125 | if Conf.Aliyun.Speech.AccessKeyId == "" || Conf.Aliyun.Speech.AccessKeySecret == "" || Conf.Aliyun.Speech.AppKey == "" { 126 | return errors.New("使用阿里云语音服务需要配置相关密钥") 127 | } 128 | default: 129 | return errors.New("不支持的转录提供商") 130 | } 131 | 132 | // 检查LLM提供商配置 133 | switch Conf.App.LlmProvider { 134 | case "openai": 135 | if Conf.Openai.ApiKey == "" { 136 | return errors.New("使用OpenAI LLM服务需要配置 OpenAI API Key") 137 | } 138 | case "aliyun": 139 | if Conf.Aliyun.Bailian.ApiKey == "" { 140 | return errors.New("使用阿里云百炼服务需要配置 API Key") 141 | } 142 | default: 143 | return errors.New("不支持的LLM提供商") 144 | } 145 | 146 | return nil 147 | } 148 | 149 | func LoadConfig() { 150 | var err error 151 | configPath := "./config/config.toml" 152 | if _, err = os.Stat(configPath); os.IsNotExist(err) { 153 | return 154 | } else { 155 | log.GetLogger().Info("已找到配置文件,从配置文件中加载配置") 156 | if _, err = toml.DecodeFile(configPath, &Conf); err != nil { 157 | log.GetLogger().Error("加载配置文件失败", zap.Error(err)) 158 | return 159 | } 160 | } 161 | } 162 | 163 | // 验证配置 164 | func CheckConfig() error { 165 | var err error 166 | // 解析代理地址 167 | Conf.App.ParsedProxy, err = url.Parse(Conf.App.Proxy) 168 | if err != nil { 169 | return err 170 | } 171 | return validateConfig() 172 | } 173 | 174 | // SaveConfig 保存配置到文件 175 | func SaveConfig() error { 176 | configPath := filepath.Join("config", "config.toml") 177 | 178 | if _, err := os.Stat(configPath); os.IsNotExist(err) { 179 | err = os.MkdirAll(filepath.Dir(configPath), os.ModePerm) 180 | if err != nil { 181 | return err 182 | } 183 | } 184 | 185 | data, err := toml.Marshal(Conf) 186 | if err != nil { 187 | return err 188 | } 189 | 190 | err = os.WriteFile(configPath, data, 0644) 191 | if err != nil { 192 | return err 193 | } 194 | 195 | return nil 196 | } 197 | -------------------------------------------------------------------------------- /docs/README_ar.md: -------------------------------------------------------------------------------- 1 |
2 | KrillinAI 3 | 4 | 5 | # أداة ترجمة ودبلجة الصوت والفيديو بالذكاء الاصطناعي 6 | 7 | krillinai%2FKrillinAI | Trendshift 8 | 9 | **[English](../README.md)|[简体中文](../docs/README_zh.md)|[日本語](../docs/README_jp.md)|[한국어](../docs/README_kr.md)|[Tiếng Việt](../docs/README_vi.md)|[Français](../docs/README_fr.md)|[Deutsch](../docs/README_de.md)|[Español](../docs/README_es.md)|[Português](../docs/README_pt.md)|[Русский](../docs/README_rus.md)|[اللغة العربية](../docs/README_ar.md)** 10 | 11 | [![Twitter](https://img.shields.io/badge/Twitter-KrillinAI-orange?logo=twitter)](https://x.com/KrillinAI) 12 | [![Discord](https://img.shields.io/discord/1333374141092331605?label=Discord&logo=discord&style=flat-square)](https://discord.gg/sKUAsHfy) 13 | [![Bilibili](https://img.shields.io/badge/dynamic/json?label=Bilibili&query=%24.data.follower&suffix=%20followers&url=https%3A%2F%2Fapi.bilibili.com%2Fx%2Frelation%2Fstat%3Fvmid%3D242124650&logo=bilibili&color=00A1D6&labelColor=FE7398&logoColor=FFFFFF)](https://space.bilibili.com/242124650) 14 | 15 |
16 | 17 | ### إصدار جديد لنظامي ويندوز وماك - مرحبًا باختباره وتقديم الملاحظات 18 | 19 | ## نظرة عامة 20 | 21 | كريلين AI هو حل متكامل لتحسين وتوطين الفيديوهات بسهولة. هذه الأداة البسيطة لكن القوية تتعامل مع كل شيء من الترجمة والدبلجة إلى استنساخ الأصوات، وإعادة التنسيق - حيث تقوم بتحويل الفيديوهات بسلاسة بين الوضع الأفقي والعمودي لعرض مثالي على جميع منصات المحتوى (يوتيوب، تيك توك، بيلبلي، دويين، قناة وي تشات، ريد نوت، كوايشو). من خلال سير العمل الشامل، يحوّل كريلين AI اللقطات الخام إلى محتوى نهائي وجاهز للنشر ببضع نقرات فقط. 22 | 23 | الميزات الرئيسية: 24 | 🎯 بدء بنقرة واحدة - ابدأ سير العمل فورًا، النسخة الجديدة لسطح المكتب متاحة الآن - أسهل في الاستخدام! 25 | 26 | 📥 تنزيل الفيديو - يدعم yt-dlp ورفع الملفات المحلية 27 | 28 | 📜 ترجمات دقيقة - تعتمد على Whisper للتعرف عالي الدقة 29 | 30 | 🧠 تقسيم ذكي - تجزئة المحاذاة التلقائية للترجمات بناءً على نماذج اللغات الكبيرة (LLM) 31 | 32 | 🌍 ترجمة احترافية - ترجمة على مستوى الفقرات للحفاظ على الاتساق 33 | 34 | 🔄 استبدال المصطلحات - تبديل المفردات المتخصصة بنقرة واحدة 35 | 36 | 🎙️ الدبلجة واستنساخ الأصوات - اختيار أصوات CosyVoice أو استنساخ الأصوات 37 | 38 | 🎬 تكوين الفيديو - إعادة التنسيق التلقائي للوضع الأفقي/العمودي 39 | 40 | ## عرض توضيحي 41 | الصورة التالية توضح النتيجة بعد إدراج ملف الترجمة - الذي تم إنشاؤه بنقرة واحدة بعد استيراد فيديو محلي مدته 46 دقيقة - في المسار. لم يتم إجراء أي تعديل يدوي على الإطلاق. لا توجد ترجمات ناقصة أو متداخلة، وتقسيم الجمل طبيعي، وجودة الترجمة عالية جدًا. 42 | ![Alignment](../docs/images/alignment.png) 43 | 44 | 45 | 46 | 53 | 60 | 61 | 68 | 69 | 70 |
47 | 48 | ### ترجمة الترجمة النصية 49 | --- 50 | https://github.com/user-attachments/assets/bba1ac0a-fe6b-4947-b58d-ba99306d0339 51 | 52 | 54 | 55 | ### الدبلجة 56 | --- 57 | https://github.com/user-attachments/assets/0b32fad3-c3ad-4b6a-abf0-0865f0dd2385 58 | 59 | 62 | 63 | ### الوضع العمودي 64 | --- 65 | https://github.com/user-attachments/assets/c2c7b528-0ef8-4ba9-b8ac-f9f92f6d4e71 66 | 67 |
71 | 72 | ## 🔍 دعم التعرف على الصوت 73 | _**جميع النماذج المحلية في الجدول أدناه تدعم التثبيت التلقائي للملفات التنفيذية + ملفات النماذج. فقط قم باختيارك، وسيتولى KrillinAI كل شيء آخر لك.**_ 74 | 75 | | Service | Supported Platforms | Model Options | Local/Cloud | Notes | 76 | |-----------------|------------------------------|-----------------------------------|-------------|----------------| 77 | | **OpenAI Whisper** | Cross-platform | - | Cloud | Fast with excellent results | 78 | | **FasterWhisper** | Windows/Linux | `tiny`/`medium`/`large-v2` (recommend medium+) | Local | Faster speed, no cloud service overhead | 79 | | **WhisperKit** | macOS (Apple Silicon only) | `large-v2` | Local | Native optimization for Apple chips | 80 | | **Alibaba Cloud ASR** | Cross-platform | - | Cloud | Bypasses China mainland network issues | 81 | 82 | ## 🚀 دعم نماذج اللغة الكبيرة 83 | 84 | ✅ متوافق مع جميع خدمات المتوافقة مع OpenAI API السحابية/المحلية بما في ذلك على سبيل المثال لا الحصر: 85 | - OpenAI 86 | - DeepSeek 87 | - Qwen (Tongyi Qianwen) 88 | - Self-hosted open-source models 89 | - Other OpenAI-format compatible API services 90 | 91 | 92 | ## 🌍 اللغات المدعومة 93 | لغات الإدخال: الصينية، الإنجليزية، اليابانية، الألمانية، التركية (مع إضافة المزيد من اللغات قريبًا) 94 | لغات الترجمة: 101 لغة مدعومة، بما في ذلك الإنجليزية، الصينية، الروسية، الإسبانية، الفرنسية، وغيرها. 95 | 96 | ## معاينة الواجهة 97 | ![ui preview](../docs/images/ui_desktop.png) 98 | 99 | ## 🚀 بدء سريع 100 | ### الخطوات الأساسية 101 | 102 | أولاً، قم بتنزيل ملف الإصدار التنفيذي الذي يتوافق مع نظام جهازك. اتبع التعليمات أدناه للاختيار بين نسخة سطح المكتب أو النسخة العادية، ثم ضع البرنامج في مجلد فارغ. عند تشغيل البرنامج، سيتم إنشاء بعض المجلدات تلقائياً، لذا فإن وضعه في مجلد فارغ يجعل إدارته أسهل. 103 | 104 | [For the desktop version (release files with "desktop" in the name), refer here] 105 | _The desktop version is newly released to address the difficulty beginners face in editing configuration files correctly. It still has some bugs and is being continuously updated._ 106 | 107 | انقر نقرًا مزدوجًا على الملف لبدء استخدامه. 108 | 109 | [لنسخة غير سطح المكتب (ملفات الإصدار التي لا تحتوي على "desktop" في الاسم)، ارجع إلى هنا] 110 | _تعتبر نسخة غير سطح المكتب هي الإصدار الأصلي، تتميز بإعدادات أكثر تعقيدًا ولكن بوظائف مستقرة. وهي مناسبة أيضًا للنشر على الخوادم، حيث توفر واجهة مستخدم تعمل عبر الويب._ 111 | 112 | قم بإنشاء مجلد `config` في الدليل، ثم أنشئ ملف `config.toml` بداخله. انسخ محتويات ملف `config-example.toml` من مجلد `config` في الكود المصدقي إلى ملف `config.toml` الخاص بك وقم بملء تفاصيل الإعدادات. (إذا كنت ترغب في استخدام نماذج OpenAI ولكنك لا تعرف كيفية الحصول على مفتاح، يمكنك الانضمام إلى المجموعة للحصول على وصول تجريبي مجاني.) 113 | 114 | انقر نقرًا مزدوجًا على الملف التنفيذي أو قم بتشغيله في الطرفية لبدء الخدمة. 115 | 116 | افتح متصفحك وأدخل http://127.0.0.1:8888 لبدء استخدامه. (استبدل 8888 برقم المنفذ الذي حددته في ملف الإعدادات.) 117 | 118 | ### إلى: مستخدمي نظام macOS 119 | [لنسخة سطح المكتب (أي ملفات الإصدار التي تحتوي على "desktop" في الاسم)، ارجع هنا] 120 | طريقة التغليف الحالية لنسخة سطح المكتب لا تدعم التشغيل المباشر بالنقر المزدوج أو التثبيت عبر DMG بسبب مشاكل التوقيع. يتطلب ذلك إعداد الثقة يدوياً كما يلي: 121 | 122 | 1. افتح المجلد الذي يحتوي على الملف التنفيذي (لنفترض أن اسم الملف هو KrillinAI_1.0.0_desktop_macOS_arm64) في Terminal 123 | 124 | 2. نفّذ الأوامر التالية بالتسلسل: 125 | 126 | 127 | ``` 128 | sudo xattr -cr ./KrillinAI_1.0.0_desktop_macOS_arm64 129 | sudo chmod +x ./KrillinAI_1.0.0_desktop_macOS_arm64 130 | ./KrillinAI_1.0.0_desktop_macOS_arm64 131 | ``` 132 | 133 | [للنسخة العادية (ملفات الإصدار التي لا تحتوي على "desktop" في الاسم)، راجع هنا] 134 | هذا البرنامج غير موقّع، لذا بعد إكمال إعداد الملفات وفق "الخطوات الأساسية"، ستحتاج إلى منح الثقة يدوياً للتطبيق على نظام macOS. اتبع هذه الخطوات: 135 | 1. افتح Terminal وانتقل إلى المجلد الذي يحتوي على الملف التنفيذي (لنفترض أن اسم الملف هو KrillinAI_1.0.0_macOS_arm64). 136 | 137 | 2. نفّذ الأوامر التالية بالتسلسل: 138 | 139 | ``` 140 | sudo xattr -rd com.apple.quarantine ./KrillinAI_1.0.0_macOS_arm64 141 | sudo chmod +x ./KrillinAI_1.0.0_macOS_arm64 142 | ./KrillinAI_1.0.0_macOS_arm64 143 | ``` 144 | سيؤدي هذا إلى بدء تشغيل الخدمة. 145 | 146 | ### النشر باستخدام Docker 147 | 148 | هذا المشروع يدعم النشر عبر يُرجى الرجوع إلى [Docker Deployment Instructions](../docs/docker.md). 149 | 150 | ### تعليمات إعداد Cookie 151 | 152 | إذا واجهت فشلًا في تنزيل الفيديو، يُرجى الرجوع إلى تعليمات إعداد Cookie لتهيئة معلومات الـ Cookie الخاصة بك. 153 | 154 | (ملاحظة: تم الحفاظ على نفس تنسيق الروابط والعناوين كما في النص الأصلي) 155 | 156 | 157 | ### مساعدة في الإعدادات 158 | أسرع وأكثر طريقة ملائمة للإعداد: 159 | * د openai لكل من transcription_provider و llm_provider. بهذه الطريقة، تحتاج فقط إلى ملء openai.apikey في الفئات الثلاث الرئيسية لبنود الإعداد التالية، وهي openai، local_model، و aliyun، ثم يمكنك إجراء ترجمة الترجمة النصية. (املأ app.proxy، model و openai.base_url حسب حالتك الخاصة.) 160 | 161 | طريقة الإعداد لاستخدام نموذج التعرف على الكلام المحلي (غير مدعوم على macOS في الوقت الحالي) (خيار يأخذ في الاعتبار التكلفة والسرعة والجودة): 162 | 163 | * املأ fasterwhisper لـ transcription_provider و openai لـ llm_provider. بهذه الطريقة، تحتاج فقط إلى ملء openai.apikey و local_model.faster_whisper في الفئتين الرئيسيتين لبنود الإعداد التالية، وهما openai و local_model، ثم يمكنك إجراء ترجمة الترجمة النصية. سيتم تنزيل النموذج المحلي تلقائيًا. (ينطبق نفس الأمر على app.proxy و openai.base_url كما ذكر أعلاه.) 164 | 165 | حالات الاستخدام التي تتطلب إعدادات علي بابا السحابية 166 | * إذا تم تعيين llm_provider إلى aliyun، فهذا يعني أنه سيتم استخدام خدمة النماذج الكبيرة من علي بابا السحابية. وبالتالي، يجب إعداد عنصر aliyun.bailian في الإعدادات. 167 | * إذا تم تعيين transcription_provider إلى aliyun، أو إذا تم تمكين وظيفة "الدبلجة الصوتية" عند بدء المهمة، فسيتم استخدام خدمة الصوت من علي بابا السحابية. لذلك، يجب ملء عنصر aliyun.speech في الإعدادات. 168 | * إذا تم تمكين وظيفة "الدبلجة الصوتية" وتم تحميل ملفات صوتية محلية لاستنساخ نبرة الصوت في نفس الوقت، فسيتم أيضًا استخدام خدمة التخزين السحابي OSS من علي بابا السحابية. وبالتالي، يجب ملء عنصر aliyun.oss في الإعدادات. 169 | دليل الإعدادات: [Alibaba Cloud Configuration Instructions](../docs/aliyun.md) 170 | 171 | ## الأسئلة الشائعة 172 | يُرجى الرجوع إلى [Frequently Asked Questions](../docs/faq.md) 173 | 174 | ## إرشادات المساهمة 175 | 176 | - لا تقم بإرسال ملفات غير ضرورية مثل .vscode، .idea، وغيرها. يُرجى استخدام .gitignore بشكل صحيح لتصفيتها. 177 | - لا تقم بإرسال ملف config.toml؛ بدلاً من ذلك، قم بإرسال ملف config-example.toml. 178 | ## تاريخ النجوم 179 | 180 | [![Star History Chart](https://api.star-history.com/svg?repos=krillinai/KrillinAI&type=Date)](https://star-history.com/#krillinai/KrillinAI&Date) 181 | 182 | -------------------------------------------------------------------------------- /docs/README_fr.md: -------------------------------------------------------------------------------- 1 |
2 | KrillinAI 3 | 4 | 5 | # Outil de Traduction et Doublage Audio/Video par IA 6 | 7 | krillinai%2FKrillinAI | Trendshift 8 | 9 | **[English](../README.md)|[简体中文](../docs/README_zh.md)|[日本語](../docs/README_jp.md)|[한국어](../docs/README_kr.md)|[Tiếng Việt](../docs/README_vi.md)|[Français](../docs/README_fr.md)|[Deutsch](../docs/README_de.md)|[Español](../docs/README_es.md)|[Português](../docs/README_pt.md)|[Русский](../docs/README_rus.md)|[اللغة العربية](../docs/README_ar.md)** 10 | 11 | [![Twitter](https://img.shields.io/badge/Twitter-KrillinAI-orange?logo=twitter)](https://x.com/KrillinAI) 12 | [![Discord](https://img.shields.io/discord/1333374141092331605?label=Discord&logo=discord&style=flat-square)](https://discord.gg/sKUAsHfy) 13 | [![Bilibili](https://img.shields.io/badge/dynamic/json?label=Bilibili&query=%24.data.follower&suffix=%20followers&url=https%3A%2F%2Fapi.bilibili.com%2Fx%2Frelation%2Fstat%3Fvmid%3D242124650&logo=bilibili&color=00A1D6&labelColor=FE7398&logoColor=FFFFFF)](https://space.bilibili.com/242124650) 14 | 15 |
16 | 17 | ### 📢 Nouvelle Version Bureau pour Windows & Mac – Testez et Donnez Votre Avis 18 | 19 | ## Présentation 20 | 21 | Krillin AI est une solution tout-en-un pour la localisation et l'amélioration simplifiée de vidéos. Cet outil minimaliste mais puissant gère tout : traduction, doublage, clonage vocal, et reformatage – convertissant facilement les vidéos entre formats paysage et portrait pour un affichage optimal sur toutes les plateformes (YouTube, TikTok, Bilibili, Douyin, WeChat Channel, RedNote, Kuaishou). Avec son workflow intégré, Krillin AI transforme des vidéos brutes en contenu professionnel en quelques clics. 22 | 23 | ## Fonctionnalités Clés : 24 | 25 | 🎯 **Lancement Instantané** - Démarrez votre workflow en un clic. Nouvelle version bureau plus intuitive ! 26 | 27 | 📥 **Téléchargement Vidéo** - Prise en charge d'yt-dlp et des fichiers locaux 28 | 29 | 📜 **Sous-titres Précis** - Reconnaissance haute précision via Whisper 30 | 31 | 🧠 **Segmentation Intelligente** - Découpage des sous-titres par IA (LLM) 32 | 33 | 🌍 **Traduction Professionnelle** - Traduction cohérente par paragraphes 34 | 35 | 🔄 **Remplacement de Termes** - Échange de vocabulaire spécialisé en un clic 36 | 37 | 🎙️ **Doublage et Clonage Vocal** - Sélection de voix CosyVoice ou clonage 38 | 39 | 🎬 **Composition Vidéo** - Formatage automatique paysage/portrait 40 | 41 | ## Démonstration 42 | L'image ci-dessous montre le résultat après insertion automatique des sous-titres générés pour une vidéo locale de 46 minutes (sans ajustement manuel). Aucun sous-titre manquant ou chevauchant, une segmentation naturelle et une traduction de qualité. 43 | ![Alignment](../docs/images/alignment.png) 44 | 45 | 46 | 47 | 54 | 61 | 62 | 69 | 70 | 71 |
48 | 49 | ### Traduction de Sous-titres 50 | --- 51 | https://github.com/user-attachments/assets/bba1ac0a-fe6b-4947-b58d-ba99306d0339 52 | 53 | 55 | 56 | ### Doublage 57 | --- 58 | https://github.com/user-attachments/assets/0b32fad3-c3ad-4b6a-abf0-0865f0dd2385 59 | 60 | 63 | 64 | ### Format Portrait 65 | --- 66 | https://github.com/user-attachments/assets/c2c7b528-0ef8-4ba9-b8ac-f9f92f6d4e71 67 | 68 |
72 | 73 | ## 🔍 Reconnaissance Vocale 74 | _**Tous les modèles locaux dans le tableau ci-dessous prennent en charge l'installation automatique des fichiers exécutables + fichiers de modèle. Il vous suffit de faire votre sélection, et KrillinAI s'occupera du reste.**_ 75 | 76 | | Service | Plateformes supportées | Options de modèle | Local/Cloud | Remarques | 77 | |-----------------|------------------------------|-----------------------------------|-------------|----------------| 78 | | **OpenAI Whisper** | Multi-plateforme | - | Cloud | Rapide avec d'excellents résultats | 79 | | **FasterWhisper** | Windows/Linux | `tiny`/`medium`/`large-v2` (recommandé medium+) | Local | Vitesse accrue, pas de surcharge de service cloud | 80 | | **WhisperKit** | macOS (Apple Silicon uniquement) | `large-v2` | Local | Optimisation native pour puces Apple | 81 | | **Alibaba Cloud ASR** | Multi-plateforme | - | Cloud | Contourne les problèmes réseau en Chine continentale | 82 | 83 | ## 🚀 Prise en charge des Grands Modèles de Langage 84 | 85 | ✅ Compatible avec tous les services cloud/locaux **compatibles avec l'API OpenAI**, y compris mais sans s'y limiter : 86 | - OpenAI 87 | - DeepSeek 88 | - Qwen (Tongyi Qianwen) 89 | - Modèles open source auto-hébergés 90 | - Autres services API compatibles avec le format OpenAI 91 | 92 | ## 🌍 Langues Prises en Charge 93 | Langues d'entrée : Chinois, Anglais, Japonais, Allemand, Turc (autres en cours d'ajout) 94 | Langues de traduction : 101 langues dont Anglais, Chinois, Russe, Espagnol, Français, etc. 95 | 96 | ## Aperçu de l'Interface 97 | ![ui preview](../docs/images/ui_desktop.png) 98 | 99 | ## 🚀 Guide de Démarrage Rapide 100 | ### Étapes de Base 101 | Téléchargez d'abord le fichier exécutable de la version Release correspondant à votre système. Suivez les instructions ci-dessous pour choisir entre la version bureau ou standard, puis placez le logiciel dans un dossier vide. L'exécution du programme générera des répertoires supplémentaires - un dossier vide facilite la gestion. 102 | 103 | [Pour la version bureau (fichiers avec "desktop" dans le nom)] 104 | _La version bureau est une nouveauté conçue pour simplifier la configuration (sans éditer de fichiers). Elle contient encore quelques bugs et est mise à jour régulièrement._ 105 | 106 | Double-cliquez sur le fichier pour l'utiliser. 107 | 108 | [Pour la version standard (fichiers sans "desktop" dans le nom), voir ici] 109 | _La version standard est la publication originale, offrant une configuration plus complexe mais une fonctionnalité stable. Elle convient également au déploiement sur serveur grâce à son interface web._ 110 | 111 | Créez un dossier `config` dans le répertoire, puis créez un fichier `config.toml` à l'intérieur. Copiez le contenu du fichier `config-example.toml` du dossier `config` du code source dans votre `config.toml` et remplissez les détails de configuration. (Si vous souhaitez utiliser les modèles OpenAI mais ne savez pas comment obtenir une clé, vous pouvez rejoindre le groupe pour un accès d'essai gratuit.) 112 | 113 | Double-cliquez sur l'exécutable ou exécutez-le dans le terminal pour démarrer le service. 114 | 115 | Ouvrez votre navigateur et entrez http://127.0.0.1:8888 pour commencer à l'utiliser. (Remplacez 8888 par le numéro de port que vous avez spécifié dans le fichier config.) 116 | 117 | ### Pour les utilisateurs macOS 118 | [Pour la version bureau (fichiers avec "desktop" dans le nom), voir ici] 119 | La méthode actuelle d'empaquetage ne permet pas d'exécution par double-clic ni d'installation via DMG en raison de problèmes de signature. Une configuration manuelle de confiance est nécessaire : 120 | 121 | 1. Ouvrez dans le Terminal le répertoire contenant le fichier exécutable (nommé par exemple KrillinAI_1.0.0_desktop_macOS_arm64) 122 | 123 | 2. Exécutez les commandes suivantes dans l'ordre : 124 | 125 | ``` 126 | sudo xattr -cr ./KrillinAI_1.0.0_desktop_macOS_arm64 127 | sudo chmod +x ./KrillinAI_1.0.0_desktop_macOS_arm64 128 | ./KrillinAI_1.0.0_desktop_macOS_arm64 129 | ``` 130 | 131 | [Pour la version standard (fichiers sans "desktop" dans le nom), voir ici] 132 | Ce logiciel n'est pas signé. Après avoir complété la configuration des fichiers comme décrit dans les "Étapes de base", vous devrez approuver manuellement l'application sur macOS. Procédez comme suit : 133 | 134 | 1. Ouvrez le terminal et accédez au répertoire contenant le fichier exécutable (par exemple `KrillinAI_1.0.0_macOS_arm64`) 135 | 2. Exécutez les commandes suivantes dans l'ordre : 136 | ``` 137 | sudo xattr -rd com.apple.quarantine ./KrillinAI_1.0.0_macOS_arm64 138 | sudo chmod +x ./KrillinAI_1.0.0_macOS_arm64 139 | ./KrillinAI_1.0.0_macOS_arm64 140 | ``` 141 | Cela démarrera le service. 142 | 143 | ### Déploiement Docker 144 | Consultez le [Docker Deployment Instructions](../docs/docker.md). 145 | 146 | ### Configuration des Cookies 147 | 148 | En cas d'échec de téléchargement, suivez le [Cookie Configuration Instructions](../docs/get_cookies.md) . 149 | 150 | ### Aide à la Configuration 151 | La méthode de configuration la plus rapide et pratique : 152 | * Sélectionnez `openai` pour `transcription_provider` et `llm_provider`. Ainsi, vous n'aurez qu'à renseigner `openai.apikey` dans les trois catégories de configuration principales (`openai`, `local_model`, et `aliyun`) pour effectuer la traduction de sous-titres. (Complétez `app.proxy`, `model` et `openai.base_url` selon votre situation.) 153 | 154 | Méthode utilisant le modèle local de reconnaissance vocale (non supporté sur macOS pour le moment) (optimisant coût, vitesse et qualité) : 155 | * Utilisez `fasterwhisper` pour `transcription_provider` et `openai` pour `llm_provider`. Vous devrez alors renseigner `openai.apikey` et `local_model.faster_whisper` dans les catégories `openai` et `local_model`. Le modèle local sera téléchargé automatiquement. (`app.proxy` et `openai.base_url` restent configurables comme mentionné ci-dessus.) 156 | 157 | Cas nécessitant la configuration d'Alibaba Cloud : 158 | * Si `llm_provider` est défini sur `aliyun`, le service de grands modèles d'Alibaba Cloud sera utilisé. Configurez alors `aliyun.bailian`. 159 | * Si `transcription_provider` est sur `aliyun` ou si la fonction "doublage vocal" est activée, le service vocal d'Alibaba Cloud sera utilisé. Configurez `aliyun.speech`. 160 | * Si le "doublage vocal" est activé avec clonage de timbre vocal via fichiers audio locaux, le service OSS d'Alibaba Cloud sera aussi utilisé. Configurez alors `aliyun.oss`. 161 | Guide : [Instructions de configuration Alibaba Cloud](./docs/aliyun.md) 162 | 163 | ## Foire Aux Questions 164 | Consultez la [FAQ](../docs/faq.md) (Foire Aux Questions) 165 | 166 | ## Directives de Contribution 167 | 168 | - Ne soumettez pas de fichiers inutiles comme `.vscode`, `.idea`, etc. Utilisez correctement le fichier `.gitignore` pour les exclure. 169 | - Ne soumettez pas `config.toml` ; soumettez plutôt `config-example.toml`. 170 | 171 | ## Historique des Stars 172 | 173 | [![Star History Chart](https://api.star-history.com/svg?repos=krillinai/KrillinAI&type=Date)](https://star-history.com/#krillinai/KrillinAI&Date) 174 | -------------------------------------------------------------------------------- /docs/README_jp.md: -------------------------------------------------------------------------------- 1 |
2 | KrillinAI 3 | 4 | # AI動画翻訳・吹き替えツール(簡単デプロイ) 5 | 6 | krillinai%2FKrillinAI | Trendshift 7 | 8 | **[English](../README.md)|[简体中文](../docs/README_zh.md)|[日本語](../docs/README_jp.md)|[한국어](../docs/README_kr.md)|[Tiếng Việt](../docs/README_vi.md)|[Français](../docs/README_fr.md)|[Deutsch](../docs/README_de.md)|[Español](../docs/README_es.md)|[Português](../docs/README_pt.md)|[Русский](../docs/README_rus.md)|[اللغة العربية](../docs/README_ar.md)** 9 | 10 | [![Twitter](https://img.shields.io/badge/Twitter-KrillinAI-orange?logo=twitter)](https://x.com/KrillinAI) 11 | [![Discord](https://img.shields.io/discord/1333374141092331605?label=Discord&logo=discord&style=flat-square)](https://discord.gg/sKUAsHfy) 12 | [![Bilibili](https://img.shields.io/badge/dynamic/json?label=Bilibili&query=%24.data.follower&suffix=%20フォロワー&url=https%3A%2F%2Fapi.bilibili.com%2Fx%2Frelation%2Fstat%3Fvmid%3D242124650&logo=bilibili&color=00A1D6&labelColor=FE7398&logoColor=FFFFFF)](https://space.bilibili.com/242124650) 13 | 14 |
15 | 16 | ## 🚀 プロジェクト概要 17 | 18 | クリリンAIは、動画のローカライズと品質向上を簡単に実現するオールインワンソリューションです。このシンプルでありながら強力なツールは、翻訳、吹き替え、ボイスクローニングからフォーマット調整までをカバー。縦横画面のシームレスな変換により、YouTube、TikTok、Bilibili、抖音(Douyin)、微信チャンネル、RedNote、快手(Kuaishou)など、あらゆるコンテンツプラットフォームに最適化された表示を実現します。エンドツーエンドのワークフローで、わずかなクリックだけで未編集の素材から完成度の高いプラットフォーム対応コンテンツへと仕上げます。 19 | 20 | ## 主な特徴と機能: 21 | 🎯 **ワンクリック起動**:複雑な環境設定不要、依存関係を自動インストール 22 | 📥 **動画取得**:yt-dlpダウンロードまたはローカルファイルアップロード対応 23 | 📜 **高精度認識**:Whisperベースの音声認識 24 | 🧠 **インテリジェント分割**:LLMを使用した字幕分割と調整 25 | 🔄 **用語置換**:専門分野の語彙をワンクリックで置換 26 | 🌍 **プロ翻訳**:LLMベースの段落単位翻訳で文脈一貫性を保持 27 | 🎙️ **音声クローン**:デフォルト音声またはカスタム音声クローニング 28 | 🎬 **動画合成**:縦横画面と字幕レイアウトを自動処理 29 | 30 | ## 効果デモ 31 | 下図は46分のローカル動画をインポートし、ワンクリック実行後に生成された字幕ファイルをトラックに追加した効果です。手動調整なしで、欠落・重複なく、自然な文節区切りと高品質な翻訳を実現。 32 | ![調整効果](./images/alignment.png) 33 | 34 | 35 | 36 | 43 | 52 | 53 | 60 | 61 |
37 | 38 | ### 字幕翻訳 39 | --- 40 | https://github.com/user-attachments/assets/bba1ac0a-fe6b-4947-b58d-ba99306d0339 41 | 42 | 44 | 45 | 46 | 47 | ### 配音 48 | --- 49 | https://github.com/user-attachments/assets/0b32fad3-c3ad-4b6a-abf0-0865f0dd2385 50 | 51 | 54 | 55 | ### 縦画面 56 | --- 57 | https://github.com/user-attachments/assets/c2c7b528-0ef8-4ba9-b8ac-f9f92f6d4e71 58 | 59 |
62 | 63 | ## 🔍 音声認識サポート 64 | _**以下の表に記載されているすべてのローカルモデルは、実行ファイル+モデルファイルの自動インストールに対応しています。選択するだけで、KrillinAIが残りの作業をすべて処理します。**_ 65 | 66 | | サービス | 対応プラットフォーム | モデルオプション | ローカル/クラウド | 備考 | 67 | |-----------------|------------------------------|-----------------------------------|-------------|----------------| 68 | | **OpenAI Whisper** | クロスプラットフォーム | - | クラウド | 高速で優れた精度 | 69 | | **FasterWhisper** | Windows/Linux | `tiny`/`medium`/`large-v2` (medium以上推奨) | ローカル | 高速処理、クラウド依存なし | 70 | | **WhisperKit** | macOS (Apple Siliconのみ) | `large-v2` | ローカル | Appleチップ向け最適化 | 71 | | **Alibaba Cloud ASR** | クロスプラットフォーム | - | クラウド | 中国本土のネットワーク制限回避 | 72 | 73 | ## 🚀 大規模言語モデル(LLM)サポート 74 | 75 | ✅ **OpenAI API互換**のクラウド/ローカルLLMサービスすべてに対応(以下に限定されません): 76 | - OpenAI 77 | - DeepSeek 78 | - Qwen (Tongyi Qianwen) 79 | - セルフホスト型オープンソースモデル 80 | - その他OpenAI形式互換APIサービス 81 | 82 | ## 対応言語 83 | 入力言語対応:中国語、英語、日本語、ドイツ語、トルコ語、マレー語(随時追加中) 84 | 85 | 翻訳言語対応:英語、中国語、ロシア語、スペイン語、フランス語など101言語 86 | 87 | ## インターフェースプレビュー 88 | ![インターフェースプレビュー](./images/ui_desktop.png) 89 | 90 | 91 | ## クイックスタート 92 | ### 基本手順 93 | 1. [Release](https://github.com/krillinai/KrillinAI/releases)からお使いのデバイスに合った実行ファイルをダウンロードし、空のフォルダに配置 94 | 2. フォルダ内に`config`フォルダを作成し、`config`フォルダ内に`config.toml`ファイルを作成、ソースコードの`config`ディレクトリにある`config-example.toml`ファイルの内容をコピーして貼り付け、設定情報を記入(OpenAIモデルを使いたいがキーの取得方法がわからない場合はグループに参加して無料で試用可能) 95 | 3. 実行ファイルをダブルクリック、またはターミナルで実行してサービスを起動 96 | 4. ブラウザを開き `http://127.0.0.1:8888`と入力して使用開始 97 | 98 | ### macOSユーザー向け 99 | 本ソフトウェアは署名されていないため、macOSで実行する場合、「基本手順」のファイル設定完了後、手動でアプリを信頼する必要があります。方法は以下の通り: 100 | 1. ターミナルで実行ファイル(ファイル名がKrillinAI_1.0.0_macOS_arm64と仮定)があるディレクトリを開く 101 | 2. 以下のコマンドを順に実行: 102 | ``` 103 | sudo xattr -rd com.apple.quarantine ./KrillinAI_1.0.0_macOS_arm64 104 | sudo chmod +x ./KrillinAI_1.0.0_macOS_arm64 105 | ./KrillinAI_1.0.0_macOS_arm64 106 | ``` 107 | これでサービスが起動します 108 | 109 | ### Dockerデプロイ 110 | 本プロジェクトはDockerデプロイをサポートしています。[Docker部署说明](./docker.md)を参照してください 111 | 112 | ### Cookie設定説明(オプション) 113 | 114 | 動画ダウンロードに失敗する場合 115 | 116 | [Cookie 配置说明](./get_cookies.md) を参照してCookie情報を設定してください。 117 | 118 | ### 設定ヘルプ(必読) 119 | 最速で簡単な設定方法: 120 | * transcription_providerとllm_providerの両方にopenaiを選択すると、openai、local_model、aliyunの3つの設定項目でopenai.apikeyのみ記入すれば字幕翻訳が可能です。(app.proxy、model、openai.base_urlは状況に応じて記入) 121 | 122 | ローカル音声認識モデルを使用する設定方法(macOS未対応)(コスト、速度、品質を考慮した選択) 123 | * transcription_providerにfasterwhisper、llm_providerにopenaiを記入すると、openai、local_modelの2つの設定項目でopenai.apikeyとlocal_model.faster_whisperを記入するだけで字幕翻訳が可能で、ローカルモデルは自動ダウンロードされます。(app.proxyとopenai.base_urlは同上) 124 | 125 | 以下の使用状況では、Alibaba Cloudの設定が必要です: 126 | * llm_providerにaliyunを記入した場合、Alibaba Cloudの大規模モデルサービスを使用するため、aliyun.bailian項目の設定が必要 127 | * transcription_providerにaliyunを記入した場合、またはタスク起動時に「吹き替え」機能を有効にした場合、Alibaba Cloudの音声サービスを使用するため、aliyun.speech項目の記入が必要 128 | * 「吹き替え」機能を有効にし、ローカルオーディオを音声クローニング用にアップロードした場合、Alibaba CloudのOSSクラウドストレージサービスを使用するため、aliyun.oss項目の記入が必要 129 | Alibaba Cloud設定ヘルプ:[阿里云配置说明](./aliyun.md) 130 | 131 | ## よくある質問 132 | 133 | [よくある質問](./faq.md)をご覧ください 134 | 135 | ## コントリビューション規範 136 | 1. .vscode、.ideaなどの不要なファイルをコミットしないでください。.gitignoreを活用してフィルタリングしてください 137 | 2. config.tomlをコミットせず、代わりにconfig-example.tomlを使用してコミットしてください 138 | 139 | ## お問い合わせ 140 | 1. QQグループに参加して質問にお答えします:754069680 141 | 2. ソーシャルメディアアカウントBilibiliをフォローし、AI技術分野の高品質なコンテンツを毎日シェアしています 142 | 143 | ## Star History 144 | 145 | [![Star History Chart](https://api.star-history.com/svg?repos=krillinai/KrillinAI&type=Date)](https://star-history.com/#krillinai/KrillinAI&Date) 146 | -------------------------------------------------------------------------------- /docs/README_kr.md: -------------------------------------------------------------------------------- 1 |
2 | KrillinAI 3 | 4 | 5 | # AI 오디오&비디오 번역 및 더빙 도구 6 | 7 | krillinai%2FKrillinAI | Trendshift 8 | 9 | **[English](../README.md)|[简体中文](../docs/README_zh.md)|[日本語](../docs/README_jp.md)|[한국어](../docs/README_kr.md)|[Tiếng Việt](../docs/README_vi.md)|[Français](../docs/README_fr.md)|[Deutsch](../docs/README_de.md)|[Español](../docs/README_es.md)|[Português](../docs/README_pt.md)|[Русский](../docs/README_rus.md)|[اللغة العربية](../docs/README_ar.md)** 10 | 11 | [![Twitter](https://img.shields.io/badge/Twitter-KrillinAI-orange?logo=twitter)](https://x.com/KrillinAI) 12 | [![Discord](https://img.shields.io/discord/1333374141092331605?label=Discord&logo=discord&style=flat-square)](https://discord.gg/sKUAsHfy) 13 | [![Bilibili](https://img.shields.io/badge/dynamic/json?label=Bilibili&query=%24.data.follower&suffix=%20followers&url=https%3A%2F%2Fapi.bilibili.com%2Fx%2Frelation%2Fstat%3Fvmid%3D242124650&logo=bilibili&color=00A1D6&labelColor=FE7398&logoColor=FFFFFF)](https://space.bilibili.com/242124650) 14 | 15 |
16 | 17 | ### 📢 Win & Mac 데스크톱 버전 신규 출시 – 테스트 후 피드백 제공 부탁드립니다 18 | 19 | ## 개요 20 | 21 | 크릴린 AI(Krillin AI)는 번역, 더빙, 음성 복제에서부터 화면 비율 변환까지 모든 과정을 처리하는 올인원 비디오 현지화 및 향상 솔루션입니다. 이 미니멀하면서도 강력한 도구는 유튜브, 틱톡, 빌리빌리, 더우인, 위챗 채널, 레드노트, 쿠아이쇼우 등 모든 콘텐츠 플랫폼에 최적화된 가로/세로 영상 변환을 자동으로 수행합니다. 엔드투엔드 워크플로우로 원본 영상을 클릭 몇 번만에 각 플랫폼에 맞는 완성된 콘텐츠로 변환해 줍니다. 22 | 23 | ## 주요 기능: 24 | 🎯 **원클릭 시작** - 즉시 작업 프로세스 실행 25 | 26 | 📥 **비디오 다운로드** - yt-dlp 지원 및 로컬 파일 업로드 가능 27 | 28 | 📜 **정밀 자막** - Whisper 기반 고정확도 음성 인식 29 | 30 | 🧠 **스마트 분할** - LLM 기반 자막 청크 분할 및 정렬 31 | 32 | 🌍 **전문가 수준 번역** - 문단 단위 자연스러운 번역 33 | 34 | 🔄 **용어 대체** - 분야별 전문 어휘 한 번에 변경 35 | 36 | 🎙️ ** 더빙 및 음성 복제** - CosyVoice 선택 또는 개인 음성 클로닝 37 | 38 | 🎬 **비디오 합성** - 가로/세로 레이아웃 자동 포맷팅 39 | 40 | ## 데모 영상 41 | 46분 분량의 로컬 비디오 파일을 불러온 후 원클릭 작업으로 생성된 자막 파일을 트랙에 삽입한 결과입니다. 전혀 수동 조정 없이도 자막 누락이나 겹침 현상 없이 문장 분할이 자연스럽게 이루어졌으며, 번역 품질 또한 매우 우수합니다. 42 | ![Alignment](./docs/images/alignment.png) 43 | 44 | 45 | 46 | 53 | 60 | 61 |
47 | 48 | ### 자막 번역 49 | --- 50 | https://github.com/user-attachments/assets/bba1ac0a-fe6b-4947-b58d-ba99306d0339 51 | 52 | 54 | 55 | ### 더빙 56 | --- 57 | https://github.com/user-attachments/assets/0b32fad3-c3ad-4b6a-abf0-0865f0dd2385 58 | 59 |
62 | 63 | ## 🔍 음성 인식 지원 64 | _**아래 표의 모든 로컬 모델은 실행 파일 + 모델 파일의 자동 설치를 지원합니다. 원하는 모델을 선택하기만 하면 KrillinAI이 나머지 모든 작업을 처리합니다.**_ 65 | 66 | | 서비스 | 지원 플랫폼 | 모델 옵션 | 로컬/클라우드 | 참고사항 | 67 | |-----------------|------------------------------|-----------------------------------|-------------|----------------| 68 | | **OpenAI Whisper** | 크로스 플랫폼 | - | 클라우드 | 빠른 속도와 우수한 결과 | 69 | | **FasterWhisper** | Windows/Linux | `tiny`/`medium`/`large-v2` (medium+ 권장) | 로컬 | 더 빠른 속도, 클라우드 서비스 오버헤드 없음 | 70 | | **WhisperKit** | macOS (Apple Silicon 전용) | `large-v2` | 로컬 | Apple 칩에 최적화 | 71 | | **Alibaba Cloud ASR** | 크로스 플랫폼 | - | 클라우드 | 중국 본토 네트워크 문제 회피 | 72 | 73 | ## 🚀 대규모 언어 모델 지원 74 | 75 | ✅ **OpenAI API 호환** 클라우드/로컬 LLM 서비스와 완벽 호환 (다음 포함): 76 | - OpenAI 77 | - DeepSeek 78 | - Qwen (Tongyi Qianwen) 79 | - 자체 호스팅 오픈소스 모델 80 | - 기타 OpenAI 형식 호환 API 서비스 81 | 82 | ## 🌍 언어 지원 83 | 입력 언어: 중국어, 영어, 일본어, 독일어, 터키어, 한국어 지원 (추가 언어 계속 확장 중) 84 | 번역 언어: 영어, 중국어, 러시아어, 스페인어, 프랑스어 등 101개 언어 지원 85 | 86 | ## 인터페이스 미리보기 87 | ![ui preview](./docs/images/ui_desktop.png) 88 | 89 | ## 🚀 빠른 시작 90 | ### 기본 단계 91 | 1. 릴리스에서 사용자 기기 시스템에 맞는 실행 파일을 다운로드 후 빈 폴더에 배치하세요. 92 | 2. 해당 폴더 내부에 config 폴더를 생성하고, config 폴더 안에 config.toml 파일을 만드세요. 소스 코드의 config 디렉토리에 있는 config-example.toml 파일 내용을 복사해 config.toml에 붙여넣은 후 설정 정보를 입력하세요. 93 | 3. 실행 파일을 더블클릭해 서비스를 시작하세요. 94 | 4. 브라우저에서 http://127.0.0.1:8888 주소로 접속하면 사용이 가능합니다(8888은 config.toml에서 설정한 포트 번호로 변경해주세요). 95 | 96 | ### macOS 사용자분들께 97 | 본 소프트웨어는 서명되지 않았으므로, "기본 단계"의 파일 구성 완료 후 macOS에서 수동으로 애플리케이션 신뢰 설정이 필요합니다. 다음 절차를 따라주세요: 98 | 1. 터미널을 열고 실행 파일(예: 파일명이 KrillinAI_1.0.0_macOS_arm64인 경우)이 위치한 디렉토리로 이동합니다. 99 | 2. 다음 명령어들을 순차적으로 실행해주세요: 100 | ``` 101 | sudo xattr -rd com.apple.quarantine ./KrillinAI_1.0.0_macOS_arm64 102 | sudo chmod +x ./KrillinAI_1.0.0_macOS_arm64 103 | ./KrillinAI_1.0.0_macOS_arm64 104 | ``` 105 | 이렇게 하면 서비스가 시작됩니다. 106 | 107 | ### 도커 배포 108 | 이 프로젝트는 도커 배포를 지원합니다. 자세한 내용은 [Docker Deployment Instructions](./docs/docker.md)를 참고해주세요. 109 | 110 | ### 쿠키 설정 안내 111 | 112 | 비디오 다운로드 실패 시 [Cookie Configuration Instructions](./docs/get_cookies.md) 를 참조하여 쿠키 정보를 설정해주세요. 113 | 114 | ### 설정 가이드 115 | 가장 빠르고 편리한 설정 방법: 116 | * transcription_provider와 llm_provider 모두 openai를 선택하세요. 이 경우 다음 3가지 주요 설정 항목 카테고리(openai, local_model, aliyun) 중 openai.apikey만 입력하면 자막 번역을 수행할 수 있습니다. (app.proxy, model, openai.base_url은 각자의 상황에 맞게 입력하세요.) 117 | 118 | 로컬 음성 인식 모델 사용 설정 방법 (현재 macOS 미지원) (비용, 속도, 품질을 고려한 선택): 119 | * transcription_provider에는 fasterwhisper를, llm_provider에는 openai를 입력하세요. 이 경우 openai와 local_model 카테고리에서 openai.apikey와 local_model.faster_whisper만 입력하면 자막 번역이 가능합니다. 로컬 모델은 자동으로 다운로드됩니다. (위에서 언급한 app.proxy와 openai.base_url도 동일하게 적용됩니다.) 120 | 121 | 다음 사용 상황에서는 알리바바 클라우드 설정이 필요합니다: 122 | * llm_provider에 aliyun을 입력한 경우: 알리바바 클라우드의 대형 모델 서비스를 사용하게 되므로, aliyun.bailian 항목 설정이 필요합니다. 123 | * transcription_provider에 aliyun을 입력하거나 작업 시작 시 "보이스 더빙" 기능을 활성화한 경우: 알리바바 클라우드의 음성 서비스를 사용하게 되므로, aliyun.speech 항목 설정이 필요합니다. 124 | * "보이스 더빙" 기능을 활성화하면서 동시에 로컬 오디오 파일을 업로드해 음색 복제를 하는 경우: 알리바바 클라우드의 OSS 클라우드 스토리지 서비스도 사용하게 되므로, aliyun.oss 항목 설정이 필요합니다. 125 | 설정 가이드: [Alibaba Cloud Configuration Instructions](./docs/aliyun.md) 126 | 127 | ## 자주 묻는 질문 128 | 자세한 내용은 [Frequently Asked Questions](./docs/faq.md)를 참조해주세요. 129 | 130 | ## 기여 가이드라인 131 | 132 | - .vscode, .idea 등 불필요한 파일은 제출하지 마세요. .gitignore 파일을 활용해 필터링해주세요. 133 | - config.toml 대신 config-example.toml 파일을 제출해주세요. 134 | 135 | ## 스타 히스토리 136 | 137 | [![Star History Chart](https://api.star-history.com/svg?repos=krillinai/KrillinAI&type=Date)](https://star-history.com/#krillinai/KrillinAI&Date) 138 | -------------------------------------------------------------------------------- /docs/README_rus.md: -------------------------------------------------------------------------------- 1 |
2 | KrillinAI 3 | 4 | # AI инструмент для перевода и озвучки аудио и видео 5 | 6 | krillinai%2FKrillinAI | Trendshift 7 | 8 | **[English](../README.md)|[简体中文](../docs/README_zh.md)|[日本語](../docs/README_jp.md)|[한국어](../docs/README_kr.md)|[Tiếng Việt](../docs/README_vi.md)|[Français](../docs/README_fr.md)|[Deutsch](../docs/README_de.md)|[Español](../docs/README_es.md)|[Português](../docs/README_pt.md)|[Русский](../docs/README_rus.md)|[اللغة العربية](../docs/README_ar.md)** 9 | 10 | [![Twitter](https://img.shields.io/badge/Twitter-KrillinAI-orange?logo=twitter)](https://x.com/KrillinAI) 11 | [![Discord](https://img.shields.io/discord/1333374141092331605?label=Discord&logo=discord&style=flat-square)](https://discord.gg/sKUAsHfy) 12 | [![Bilibili](https://img.shields.io/badge/dynamic/json?label=Bilibili&query=%24.data.follower&suffix=%20подписчиков&url=https%3A%2F%2Fapi.bilibili.com%2Fx%2Frelation%2Fstat%3Fvmid%3D242124650&logo=bilibili&color=00A1D6&labelColor=FE7398&logoColor=FFFFFF)](https://space.bilibili.com/242124650) 13 | 14 |
15 | 16 | ## Обзор 17 | 18 | Krillin AI — это универсальное решение для простой локализации и улучшения видео. Этот минималистичный, но мощный инструмент выполняет всё: от перевода и дубляжа до клонирования голоса и адаптации формата — легко преобразует видео между горизонтальным и вертикальным режимами для идеального отображения на любых платформах (YouTube, TikTok, Bilibili, Douyin, WeChat Channel, RedNote, Kuaishou). Благодаря сквозному рабочему процессу Krillin AI превращает исходные материалы в готовый к публикации контент всего за несколько кликов. 19 | 20 | ## Ключевые возможности: 21 | 🎯 **Запуск в один клик** - мгновенное начало работы 22 | 23 | 📥 **Загрузка видео** - поддержка yt-dlp и локальных файлов 24 | 25 | 📜 **Точные субтитры** - распознавание с высокой точностью на основе Whisper 26 | 27 | 🧠 **Умное разделение** - логическая разбивка и выравнивание субтитров с помощью LLM 28 | 29 | 🌍 **Профессиональный перевод** - согласованный перевод на уровне абзацев 30 | 31 | 🔄 **Замена терминов** - смена специализированной лексики в один клик 32 | 33 | 🎙️ **Озвучка и клонирование голоса** - выбор голосов CosyVoice или создание копий 34 | 35 | 🎬 **Видеомонтаж** - автоматическое форматирование для горизонтальных и вертикальных форматов 36 | 37 | ## Пример работы 38 | На изображении ниже показан результат автоматической вставки субтитров в видео после однокликового запуска обработки 46-минутного локального видео. Никаких ручных корректировок не производилось. 39 | ![Alignment](../docs/images/alignment.png) 40 | 41 | 42 | 43 | 50 | 57 | 58 | 65 | 66 |
44 | 45 | ### Перевод субтитров 46 | --- 47 | https://github.com/user-attachments/assets/bba1ac0a-fe6b-4947-b58d-ba99306d0339 48 | 49 | 51 | 52 | ### Озвучка 53 | --- 54 | https://github.com/user-attachments/assets/0b32fad3-c3ad-4b6a-abf0-0865f0dd2385 55 | 56 | 59 | 60 | ### портретный режим 61 | --- 62 | https://github.com/user-attachments/assets/c2c7b528-0ef8-4ba9-b8ac-f9f92f6d4e71 63 | 64 |
67 | 68 | ## 🔍 Поддержка распознавания речи 69 | _**Все локальные модели в таблице ниже поддерживают автоматическую установку исполняемых файлов + файлов моделей. Просто сделайте свой выбор, а KrillinAI сделает всё остальное за вас.**_ 70 | 71 | | Сервис | Поддерживаемые платформы | Варианты моделей | Локально/Облако | Примечания | 72 | |-----------------|------------------------------|-----------------------------------|-------------|----------------| 73 | | **OpenAI Whisper** | Кроссплатформенный | - | Облако | Быстрое с отличными результатами | 74 | | **FasterWhisper** | Windows/Linux | `tiny`/`medium`/`large-v2` (recommend medium+) | Локально | Более высокая скорость, без нагрузки на облачный сервис | 75 | | **WhisperKit** | macOS (Apple Silicon only) | `large-v2` | Локально | Нативная оптимизация для чипов Apple | 76 | | **Alibaba Cloud ASR** | Кроссплатформенный | - | Облако | Обходит проблемы сети в материковом Китае | 77 | 78 | ## 🚀 Поддержка больших языковых моделей 79 | 80 | ✅ Совместим со всеми **совместимыми с OpenAI API** облачными/локальными LLM-сервисами, включая, но не ограничиваясь: 81 | - OpenAI 82 | - DeepSeek 83 | - Qwen (Tongyi Qianwen) 84 | - Самостоятельно размещённые open-source модели 85 | - Другие API-сервисы, совместимые с форматом OpenAI 86 | 87 | 88 | ## 🌍 Поддерживаемые языки 89 | Входные языки: китайский, английский, японский, немецкий, турецкий (добавляются новые языки) 90 | Языки перевода: 101 языков, включая английский, китайский, русский, испанский, французский и др. 91 | 92 | ## Предпросмотр интерфейса 93 | ![Предпросмотр интерфейса](../docs/images/ui_desktop.png) 94 | 95 | 96 | ## 🚀 Быстрый старт 97 | ### Основные шаги 98 | 1. Скачайте исполняемый файл, соответствующий вашей операционной системе, из раздела релизов и поместите его в пустую папку. 99 | 2. Создайте папку config внутри этой папки, затем создайте файл config.toml в папке config. Скопируйте содержимое файла config-example.toml из директории config исходного кода в config.toml и заполните вашу конфигурационную информацию соответствующим образом. 100 | 3. Дважды щелкните на исполняемом файле, чтобы запустить сервис. 101 | 4. Откройте браузер и введите http://127.0.0.1:8888, чтобы начать использование (замените 8888 на порт, который вы указали в файле config.toml). 102 | 103 | ### Для пользователей macOS 104 | Это программное обеспечение не подписано, поэтому после завершения настройки файлов в "Основных шагах" вам потребуется вручную подтвердить доверие к приложению в macOS. Выполните следующие действия: 105 | 1. Откройте терминал и перейдите в директорию, где находится исполняемый файл (предположим, имя файла `KrillinAI_1.0.0_macOS_arm64`). 106 | 2. Выполните следующие команды по порядку: 107 | ``` 108 | sudo xattr -rd com.apple.quarantine ./KrillinAI_1.0.0_macOS_arm64 109 | sudo chmod +x ./KrillinAI_1.0.0_macOS_arm64 110 | ./KrillinAI_1.0.0_macOS_arm64 111 | ``` 112 | Это запустит сервис. 113 | 114 | ### Инструкции по настройке Cookie 115 | Этот проект поддерживает развертывание через Docker. Пожалуйста, обратитесь к [Docker Deployment Instructions](./docs/docker.md). 116 | 117 | ### Cookie Configuration Instructions 118 | 119 | Если вы столкнулись с ошибками при загрузке видео, пожалуйста, обратитесь к [Cookie Configuration Instructions](./docs/get_cookies.md) для настройки информации о ваших cookie. 120 | 121 | ### Помощь по настройке 122 | Самый быстрый и удобный способ настройки: 123 | * Выберите openai для transcription_provider и llm_provider. Таким образом, вам нужно будет заполнить только openai.apikey в следующих трех основных категориях конфигурации, а именно openai, local_model и aliyun, и затем вы сможете выполнять перевод субтитров. (Заполните app.proxy, model и openai.base_url в соответствии с вашей ситуацией.) 124 | 125 | Способ настройки для использования локальной модели распознавания речи (временно не поддерживается на macOS) (выбор, учитывающий стоимость, скорость и качество): 126 | * Заполните fasterwhisper для transcription_provider и openai для llm_provider. Таким образом, вам нужно будет заполнить только openai.apikey и local_model.faster_whisper в следующих двух основных категориях конфигурации, а именно openai и local_model, и затем вы сможете выполнять перевод субтитров. Локальная модель будет загружена автоматически. (То же самое относится к app.proxy и openai.base_url, как упоминалось выше.) 127 | 128 | Следующие ситуации использования требуют настройки Alibaba Cloud: 129 | * Если llm_provider заполнен как aliyun, это означает, что будет использоваться сервис больших моделей Alibaba Cloud. Следовательно, необходимо настроить параметр aliyun.bailian. 130 | * Если transcription_provider заполнен как aliyun, или если функция "озвучки" включена при запуске задачи, будет использоваться голосовой сервис Alibaba Cloud. Поэтому необходимо заполнить параметр aliyun.speech. 131 | * Если функция "озвучки" включена и одновременно загружаются локальные аудиофайлы для клонирования тембра голоса, также будет использоваться сервис облачного хранилища OSS от Alibaba Cloud. Следовательно, необходимо заполнить параметр aliyun.oss. 132 | Руководство по настройке: [Alibaba Cloud Configuration Instructions](./docs/aliyun.md) 133 | 134 | ## Часто задаваемые вопросы 135 | Пожалуйста, обратитесь к [Frequently Asked Questions](./docs/faq.md) 136 | 137 | ## Рекомендации по внесению вклада 138 | 139 | - Не отправляйте ненужные файлы, такие как .vscode, .idea и т.д. Пожалуйста, используйте .gitignore для их фильтрации. 140 | - Не отправляйте config.toml; вместо этого отправляйте config-example.toml. 141 | 142 | ## История звезд 143 | 144 | [![Star History Chart](https://api.star-history.com/svg?repos=krillinai/KrillinAI&type=Date)](https://star-history.com/#krillinai/KrillinAI&Date) 145 | -------------------------------------------------------------------------------- /docs/README_vi.md: -------------------------------------------------------------------------------- 1 |
2 | KrillinAI 3 | 4 | 5 | # # Công Cụ Dịch Thuật và Lồng Tiếng AI cho Âm Thanh & Video 6 | 7 | krillinai%2FKrillinAI | Trendshift 8 | 9 | **[English](../README.md)|[简体中文](../docs/README_zh.md)|[日本語](../docs/README_jp.md)|[한국어](../docs/README_kr.md)|[Tiếng Việt](../docs/README_vi.md)|[Français](../docs/README_fr.md)|[Deutsch](../docs/README_de.md)|[Español](../docs/README_es.md)|[Português](../docs/README_pt.md)|[Русский](../docs/README_rus.md)|[اللغة العربية](../docs/README_ar.md)** 10 | 11 | [![Twitter](https://img.shields.io/badge/Twitter-KrillinAI-orange?logo=twitter)](https://x.com/KrillinAI) 12 | [![Discord](https://img.shields.io/discord/1333374141092331605?label=Discord&logo=discord&style=flat-square)](https://discord.gg/sKUAsHfy) 13 | [![Bilibili](https://img.shields.io/badge/dynamic/json?label=Bilibili&query=%24.data.follower&suffix=%20followers&url=https%3A%2F%2Fapi.bilibili.com%2Fx%2Frelation%2Fstat%3Fvmid%3D242124650&logo=bilibili&color=00A1D6&labelColor=FE7398&logoColor=FFFFFF)](https://space.bilibili.com/242124650) 14 | 15 |
16 | 17 | ### 📢 Phiên Bản Mới Cho Desktop Win & Mac – Chào Đón Trải Nghiệm Và Đóng Góp Ý Kiến 18 | 19 | ## Tổng Quan 20 | 21 | Krillin AI là giải pháp toàn diện để địa phương hóa và nâng cấp video một cách dễ dàng. Công cụ tối giản nhưng mạnh mẽ này xử lý mọi thứ từ dịch thuật, lồng tiếng đến nhân bản giọng nói, định dạng – chuyển đổi liền mạch video giữa chế độ ngang và dọc để tối ưu hiển thị trên mọi nền tảng nội dung (YouTube, TikTok, Bilibili, Douyin, Kênh WeChat, RedNote, Kuaishou). Với quy trình làm việc end-to-end, Krillin AI biến footage thô thành nội dung hoàn thiện, sẵn sàng đăng tải chỉ với vài cú nhấp chuột. 22 | 23 | ## Tính năng chính: 24 | 🎯 **Khởi động một chạm** - Bắt đầu quy trình làm việc ngay lập tức, Phiên bản desktop mới - sử dụng dễ dàng hơn! 25 | 26 | 📥 **Tải video** - Hỗ trợ yt-dlp và tải file từ máy tính 27 | 28 | 📜 **Phụ đề chính xác** - Nhận diện với độ chính xác cao nhờ Whisper 29 | 30 | 🧠 **Phân đoạn thông minh** - Chia nhỏ và căn chỉnh phụ đề dựa trên LLM 31 | 32 | 🌍 **Dịch thuật chuyên nghiệp** - Dịch theo đoạn văn để đảm bảo tính nhất quán 33 | 34 | 🔄 **Thay thế thuật ngữ** - Đổi từ vựng chuyên ngành chỉ với một cú nhấp chuột 35 | 36 | 🎙️ **Lồng tiếng & Nhân bản giọng nói** - Lựa chọn giọng CosyVoice hoặc giọng nhân bản 37 | 38 | 🎬 **Tổng hợp video** - Tự động định dạng cho bố cục ngang/dọc 39 | 40 | ## Minh họa 41 | Bức ảnh dưới đây thể hiện kết quả sau khi file phụ đề - được tạo tự động chỉ bằng một cú nhấp chuột từ video local 46 phút - được chèn vào timeline. Toàn bộ quá trình không hề có bất kỳ chỉnh sửa thủ công nào. Phụ đề hiển thị đầy đủ không bị thiếu hay chồng chéo, cách phân đoạn câu tự nhiên, chất lượng bản dịch cũng rất cao. 42 | ![Alignment](../docs/images/alignment.png) 43 | 44 | 45 | 46 | 53 | 60 | 61 | 68 | 69 | 70 |
47 | 48 | ### Phụ đề dịch 49 | --- 50 | https://github.com/user-attachments/assets/bba1ac0a-fe6b-4947-b58d-ba99306d0339 51 | 52 | 54 | 55 | ### Lồng tiếng 56 | --- 57 | https://github.com/user-attachments/assets/0b32fad3-c3ad-4b6a-abf0-0865f0dd2385 58 | 59 | 62 | 63 | ### Dọc 64 | --- 65 | https://github.com/user-attachments/assets/c2c7b528-0ef8-4ba9-b8ac-f9f92f6d4e71 66 | 67 |
71 | 72 | ## 🔍 Hỗ trợ Nhận dạng Giọng nói 73 | _**Tất cả mô hình cục bộ trong bảng dưới đây hỗ trợ cài đặt tự động file thực thi + file mô hình. Chỉ cần lựa chọn, KrillinAI sẽ tự động xử lý phần còn lại cho bạn.**_ 74 | 75 | | Dịch vụ | Nền tảng hỗ trợ | Tùy chọn mô hình | Cục bộ/Đám mây | Ghi chú | 76 | |-----------------|------------------------------|-----------------------------------|-------------|----------------| 77 | | **OpenAI Whisper** | Đa nền tảng | - | Đám mây | Tốc độ nhanh với kết quả xuất sắc | 78 | | **FasterWhisper** | Windows/Linux | `tiny`/`medium`/`large-v2` (recommend medium+) | Cục bộ | Tốc độ nhanh hơn, không phụ thuộc dịch vụ đám mây | 79 | | **WhisperKit** | macOS (Apple Silicon only) | `large-v2` | Cục bộ | Tối ưu hóa riêng cho chip Apple | 80 | | **Alibaba Cloud ASR** | Đa nền tảng | - | Đám mây | Không gặp vấn đề mạng tại Trung Quốc đại lục | 81 | 82 | ## 🚀 Hỗ trợ Mô hình Ngôn ngữ Lớn 83 | 84 | ✅ Tương thích với tất cả dịch vụ LLM đám mây/cục bộ tương thích **OpenAI API** bao gồm nhưng không giới hạn: 85 | - OpenAI 86 | - DeepSeek 87 | - Qwen (Tongyi Qianwen) 88 | - Các mô hình mã nguồn mở tự triển khai 89 | - Các dịch vụ API tương thích định dạng OpenAI khác 90 | 91 | ## 🌍 Hỗ trợ Ngôn ngữ 92 | Ngôn ngữ đầu vào: Hỗ trợ tiếng Trung, Anh, Nhật, Đức, Thổ Nhĩ Kỳ (đang tiếp tục bổ sung thêm) 93 | Ngôn ngữ dịch: Hỗ trợ 101 ngôn ngữ bao gồm tiếng Anh, Trung, Nga, Tây Ban Nha, Pháp,... 94 | 95 | ## Xem trước giao diện 96 | ![ui preview](../docs/images/ui_desktop.png) 97 | 98 | ## 🚀 Bắt đầu nhanh 99 | ### Các bước cơ bản 100 | Đầu tiên, tải file thực thi Release phù hợp với hệ thống thiết bị của bạn. Làm theo hướng dẫn dưới đây để chọn giữa phiên bản desktop hoặc non-desktop, sau đó đặt phần mềm vào thư mục trống. Chạy chương trình sẽ tạo ra một số thư mục, vì vậy việc đặt trong thư mục trống giúp quản lý dễ dàng hơn. 101 | 102 | [Đối với phiên bản desktop (file release có chứa "desktop" trong tên), xem hướng dẫn tại đây] 103 | Phiên bản desktop mới được phát hành để giải quyết khó khăn cho người mới trong việc chỉnh sửa file cấu hình. Phiên bản này vẫn còn một số lỗi và đang được cập nhật liên tục. 104 | 105 | Nhấp đúp vào file để bắt đầu sử dụng. 106 | 107 | [Đối với phiên bản non-desktop (file release không có "desktop" trong tên), xem hướng dẫn tại đây] 108 | Phiên bản non-desktop là bản phát hành gốc, có cấu hình phức tạp hơn nhưng chức năng ổn định. Phiên bản này cũng phù hợp để triển khai trên server, vì cung cấp giao diện web. 109 | 110 | Tạo thư mục config trong thư mục chứa phần mềm, sau đó tạo file config.toml trong đó. Sao chép nội dung từ file config-example.toml trong thư mục config của mã nguồn vào file config.toml của bạn và điền các thông tin cấu hình. (Nếu bạn muốn sử dụng các mô hình OpenAI nhưng không biết cách lấy key, có thể tham gia nhóm để được dùng thử miễn phí.) 111 | 112 | Nhấp đúp vào file thực thi hoặc chạy trong terminal để khởi động dịch vụ. 113 | 114 | Mở trình duyệt và truy cập http://127.0.0.1:8888 để bắt đầu sử dụng. (Thay 8888 bằng số cổng bạn đã chỉ định trong file config.) 115 | 116 | ### Dành cho người dùng macOS 117 | [Đối với phiên bản desktop (file bản phát hành có chứa "desktop" trong tên), làm theo hướng dẫn sau] 118 | Do vấn đề chứng thực, phiên bản desktop hiện chưa hỗ trợ chạy trực tiếp bằng double-click hoặc cài đặt qua DMG. Cần cấu hình thủ công như sau: 119 | 120 | 1. Mở Terminal và truy cập thư mục chứa file thực thi (giả sử tên file là KrillinAI_1.0.0_desktop_macOS_arm64) 121 | 122 | 2. Thực hiện lần lượt các lệnh sau: 123 | 124 | ``` 125 | sudo xattr -cr ./KrillinAI_1.0.0_desktop_macOS_arm64 126 | sudo chmod +x ./KrillinAI_1.0.0_desktop_macOS_arm64 127 | ./KrillinAI_1.0.0_desktop_macOS_arm64 128 | ``` 129 | 130 | [Đối với phiên bản non-desktop (file bản phát hành không có "desktop" trong tên), làm theo hướng dẫn sau] 131 | Phần mềm này chưa được chứng thực, nên sau khi hoàn thành các bước cấu hình file ở mục "Các bước cơ bản", bạn cần thủ công cấp quyền trust ứng dụng trên macOS. Thực hiện theo các bước sau: 132 | 1. Mở Terminal và điều hướng đến thư mục chứa file thực thi (giả sử tên file là KrillinAI_1.0.0_macOS_arm64) 133 | 2. Thực hiện lần lượt các lệnh sau: 134 | ``` 135 | sudo xattr -rd com.apple.quarantine ./KrillinAI_1.0.0_macOS_arm64 136 | sudo chmod +x ./KrillinAI_1.0.0_macOS_arm64 137 | ./KrillinAI_1.0.0_macOS_arm64 138 | ``` 139 | Thao tác này sẽ khởi động dịch vụ. 140 | 141 | ### Triển khai bằng Docker 142 | Dự án này hỗ trợ triển khai qua Docker. Vui lòng tham khảo [Docker Deployment Instructions](../docs/docker.md). 143 | 144 | ### Hướng dẫn cấu hình Cookie 145 | 146 | Nếu gặp lỗi khi tải video xuống, vui lòng tham khảo [Cookie Configuration Instructions](./docs/get_cookies.md) để thiết lập thông tin cookie của bạn. 147 | 148 | ### Hướng dẫn cấu hình 149 | Cách cấu hình nhanh chóng và tiện lợi nhất: 150 | * Chọn openai cho cả transcription_provider và llm_provider. Với cách này, bạn chỉ cần điền openai.apikey trong ba nhóm cấu hình chính sau: openai, local_model và aliyun là có thể thực hiện dịch phụ đề. (Điền app.proxy, model và openai.base_url theo tình hình thực tế của bạn.) 151 | 152 | Cách cấu hình sử dụng mô hình nhận dạng giọng nói cục bộ (tạm thời chưa hỗ trợ macOS) (lựa chọn cân bằng giữa chi phí, tốc độ và chất lượng): 153 | * Điền fasterwhisper cho transcription_provider và openai cho llm_provider. Với cách này, bạn chỉ cần điền openai.apikey và local_model.faster_whisper trong hai nhóm cấu hình openai và local_model là có thể thực hiện dịch phụ đề. Mô hình cục bộ sẽ được tải xuống tự động. (Tương tự với app.proxy và openai.base_url như đã đề cập ở trên.) 154 | 155 | Các trường hợp sử dụng sau yêu cầu cấu hình Alibaba Cloud: 156 | * Nếu llm_provider điền aliyun nghĩa là sẽ sử dụng dịch vụ mô hình lớn của Alibaba Cloud, do đó cần cấu hình mục aliyun.bailian. 157 | * Nếu transcription_provider điền aliyun, hoặc khi bật chức năng "lồng tiếng" khi bắt đầu tác vụ sẽ sử dụng dịch vụ giọng nói của Alibaba Cloud, do đó cần điền cấu hình mục aliyun.speech. 158 | * Nếu bật chức năng "lồng tiếng" đồng thời tải lên file âm thanh cục bộ để nhân bản giọng nói thì sẽ sử dụng cả dịch vụ lưu trữ đám mây OSS của Alibaba Cloud, do đó cần điền cấu hình mục aliyun.oss. 159 | Hướng dẫn cấu hình: [Alibaba Cloud Configuration Instructions](../docs/aliyun.md) 160 | 161 | ## Câu hỏi thường gặp 162 | Vui lòng tham khảo [Frequently Asked Questions](../docs/faq.md) 163 | 164 | ## Hướng dẫn đóng góp 165 | 166 | - Không gửi các file không cần thiết như .vscode, .idea,... Hãy sử dụng tốt file .gitignore để lọc chúng. 167 | - Không gửi file config.toml mà hãy gửi file config-example.toml. 168 | 169 | ## Lịch sử sao 170 | 171 | [![Star History Chart](https://api.star-history.com/svg?repos=krillinai/KrillinAI&type=Date)](https://star-history.com/#krillinai/KrillinAI&Date) 172 | 173 | -------------------------------------------------------------------------------- /docs/README_zh.md: -------------------------------------------------------------------------------- 1 |
2 | KrillinAI 3 | 4 | # 极简部署AI视频翻译配音工具 5 | 6 | krillinai%2FKrillinAI | Trendshift 7 | 8 | **[English](../README.md)|[简体中文](../docs/README_zh.md)|[日本語](../docs/README_jp.md)|[한국어](../docs/README_kr.md)|[Tiếng Việt](../docs/README_vi.md)|[Français](../docs/README_fr.md)|[Deutsch](../docs/README_de.md)|[Español](../docs/README_es.md)|[Português](../docs/README_pt.md)|[Русский](../docs/README_rus.md)|[اللغة العربية](../docs/README_ar.md)** 9 | 10 | [![QQ 群](https://img.shields.io/badge/QQ%20群-754069680-green?logo=tencent-qq)](https://jq.qq.com/?_wv=1027&k=754069680) 11 | [![Bilibili](https://img.shields.io/badge/dynamic/json?label=Bilibili&query=%24.data.follower&suffix=粉丝&url=https%3A%2F%2Fapi.bilibili.com%2Fx%2Frelation%2Fstat%3Fvmid%3D242124650&logo=bilibili&color=00A1D6&labelColor=FE7398&logoColor=FFFFFF)](https://space.bilibili.com/242124650) 12 | 13 |
14 | 15 | ### 📢win&mac桌面端新发布 欢迎测试反馈[文档有点落后,持续更新中] 16 | 17 | ## 项目简介 18 | 19 | Krillin AI 是一款全能型音视频本地化与增强解决方案。这款简约而强大的工具,集音视频翻译、配音、语音克隆于一身,支持横竖屏格式输出,确保在所有主流平台(哔哩哔哩,小红书,抖音,视频号,快手,YouTube,TikTok等)都能完美呈现。通过端到端的工作流程,Krillin AI 仅需点击几次,就能将原始素材转化为精美即用的跨平台内容。 20 | 21 | ## 主要特点与功能: 22 | 🎯 **一键启动**:无需复杂的环境配置,自动安装依赖,立即投入使用,新增桌面版本,使用更便捷! 23 | 24 | 📥 **视频获取**:支持yt-dlp下载或本地文件上传 25 | 26 | 📜 **精准识别**:基于Whisper的高准确度语音识别 27 | 28 | 🧠 **智能分段**:使用LLM进行字幕分段和对齐 29 | 30 | 🔄 **术语替换**:一键替换专业领域词汇 31 | 32 | 🌍 **专业翻译**:基于LLM,段落级翻译保持语义连贯性 33 | 34 | 🎙️ **配音克隆**:提供CosyVoice精选音色或自定义音色克隆 35 | 36 | 🎬 **视频合成**:自动处理横竖版视频和字幕排版 37 | 38 | 39 | ## 效果展示 40 | 下图为46分钟的本地视频导入,一键执行后生成的字幕文件入轨后的效果,无任何手动调整。无缺失、重叠,断句自然,翻译质量也非常高。 41 | ![对齐效果](./images/alignment.png) 42 | 43 | 44 | 45 | 52 | 61 | 62 | 69 | 70 | 71 |
46 | 47 | ### 字幕翻译 48 | --- 49 | https://github.com/user-attachments/assets/bba1ac0a-fe6b-4947-b58d-ba99306d0339 50 | 51 | 53 | 54 | 55 | 56 | ### 配音 57 | --- 58 | https://github.com/user-attachments/assets/0b32fad3-c3ad-4b6a-abf0-0865f0dd2385 59 | 60 | 63 | 64 | ### 竖屏 65 | --- 66 | https://github.com/user-attachments/assets/c2c7b528-0ef8-4ba9-b8ac-f9f92f6d4e71 67 | 68 |
72 | 73 | ## 🔍 语音识别服务支持 74 | _**下表中的本地模型全部支持自动安装可执行文件+模型文件,你只要选择,其它的KrillinAI帮你全部准备完毕。**_ 75 | 76 | | 服务源 | 支持平台 | 模型可选项 | 本地/云端 | 备注 | 77 | |----------------|------------------------------|-----------------------------------|-------|-------------| 78 | | **OpenAI Whisper** | 全平台 | - | 云端 | 速度快效果好 | 79 | | **FasterWhisper** | Windows/Linux | `tiny`/`medium`/`large-v2` (推荐medium+) | 本地 | 速度更快,无云服务开销 | 80 | | **WhisperKit** | macOS (仅限M系列芯片) | `large-v2` | 本地 | Apple芯片原生优化 | 81 | | **阿里云ASR** | 全平台 | - | 云端 | 避免中国大陆网络问题 | 82 | 83 | ## 🚀 大模型支持 84 | 85 | ✅ 兼容所有符合 **OpenAI API规范** 的云端/本地大模型服务,包括但不限于: 86 | - OpenAI 87 | - DeepSeek 88 | - 通义千问 89 | - 本地部署的开源模型 90 | - 其他兼容OpenAI格式的API服务 91 | 92 | ## 语言支持 93 | 输入语言支持:中文,英文,日语,德语,土耳其,韩语,俄语,马来语(持续增加中) 94 | 95 | 翻译语言支持:英文,中文,俄语,西班牙语,法语等101种语言 96 | 97 | ## 界面预览 98 | ![界面预览](../docs/images/ui_desktop.png) 99 | 100 | 101 | ## 🚀 快速开始 102 | ### 基本步骤 103 | 首先下载[Release](https://github.com/krillinai/KrillinAI/releases)中与你设备系统匹配的可执行文件,按照下面的教程选择桌面版还是非桌面版,然后放入空文件夹,把软件下载到一个空文件夹,因为运行之后会生成一些目录,放到空文件夹会好管理一些。 104 | 105 | 【如果是桌面版,即release文件带desktop的看此处】 106 | _桌面版是新发布的,为了解决新手用户难以正确编辑配置文件的问题,还有不少bug,持续更新中_ 107 | 1. 双击文件即可开始使用(桌面端也是需要配置的,在软件内配置) 108 | 109 | 【如果是非桌面版,即release文件不带desktop的看此处】 110 | _非桌面版是一开始的版本,配置比较复杂,但是功能稳定,同时适合服务器部署,因为会以web的方式提供ui_ 111 | 1. 在文件夹内创建`config`文件夹,然后在`config`文件夹创建`config.toml`文件,复制源代码`config`目录下的`config-example.toml`文件的内容填入`config.toml`,并对照填写你的配置信息。 112 | 2. 双击,或在终端执行可执行文件,启动服务 113 | 3. 打开浏览器,输入`http://127.0.0.1:8888`,开始使用 (8888替换成你在配置文件中填写的端口) 114 | 115 | ### To: macOS用户 116 | 【如果是桌面版,即release文件带desktop的看此处】 117 | 桌面端目前打包方式由于签名等问题,还不能够做到双击直接运行或者dmg安装,需要手动信任应用,方法如下: 118 | 1. 在终端打开可执行文件(假设文件名是KrillinAI_1.0.0_desktop_macOS_arm64)所在目录 119 | 2. 依次执行以下命令: 120 | ``` 121 | sudo xattr -cr ./KrillinAI_1.0.0_desktop_macOS_arm64 122 | sudo chmod +x ./KrillinAI_1.0.0_desktop_macOS_arm64 123 | ./KrillinAI_1.0.0_desktop_macOS_arm64 124 | ``` 125 | 126 | 【如果是非桌面版,即release文件不带desktop的看此处】 127 | 本软件没有做签名,因此在macOS上运行时,在完成“基本步骤”中的文件配置后,还需要手动信任应用,方法如下: 128 | 1. 在终端打开可执行文件(假设文件名是KrillinAI_1.0.0_macOS_arm64)所在目录 129 | 2. 依次执行以下命令: 130 | ``` 131 | sudo xattr -rd com.apple.quarantine ./KrillinAI_1.0.0_macOS_arm64 132 | sudo chmod +x ./KrillinAI_1.0.0_macOS_arm64 133 | ./KrillinAI_1.0.0_macOS_arm64 134 | ``` 135 | 即可启动服务 136 | 137 | ### Docker部署 138 | 本项目支持Docker部署,请参考[Docker部署说明](./docker.md) 139 | 140 | ### Cookie配置说明(非必选) 141 | 142 | 如果你遇到视频下载失败的情况 143 | 144 | 请参考 [Cookie 配置说明](./get_cookies.md) 配置你的Cookie信息。 145 | 146 | ### 配置帮助(必看) 147 | 最快速便捷的配置方式: 148 | * `transcription_provider`和`llm_provider`都选择`openai`,这样在下方`openai`、`local_model`、`aliyun`三个配置项大类里只需要填写`openai.apikey`就可以进行字幕翻译。(`app.proxy`、`model`和`openai.base_url`按自己情况选填) 149 | 150 | 使用本地语言识别模型(暂不支持macOS)的配置方式(兼顾成本、速度与质量的选择) 151 | * `transcription_provider`填写`fasterwhisper`,`llm_provider`填写`openai`,这样在下方`openai`、`local_model`三个配置项大类里只需要填写`openai.apikey`和`local_model.faster_whisper`就可以进行字幕翻译,本地模型会自动下载。(`app.proxy`和`openai.base_url`同上) 152 | 153 | 以下几种使用情况,需要进行阿里云的配置: 154 | * 如果`llm_provider`填写了`aliyun`,需要使用阿里云的大模型服务,因此需要配置`aliyun.bailian`项的配置 155 | * 如果`transcription_provider`填写了`aliyun`,或者在启动任务时开启了“配音”功能,都需要使用阿里云的语音服务,因此需要填写`aliyun.speech`项的配置 156 | * 如果开启了“配音”功能,同时上传了本地的音频做音色克隆,则还需要使用阿里云的OSS云存储服务,因此需要填写`aliyun.oss`项的配置 157 | 阿里云配置帮助:[阿里云配置说明](./aliyun.md) 158 | 159 | ## 常见问题 160 | 161 | 请移步[常见问题](./faq.md) 162 | 163 | ## 贡献规范 164 | 1. 不要提交无用文件,如.vscode、.idea等,请善于使用.gitignore过滤 165 | 2. 不要提交config.toml,而是使用config-example.toml提交 166 | 167 | ## 联系我们 168 | 1. 加入我们的QQ群,解答问题:754069680 169 | 2. 关注我们的社交媒体账号,[哔哩哔哩](https://space.bilibili.com/242124650),每天分享AI科技领域优质内容 170 | 171 | ## Star History 172 | 173 | [![Star History Chart](https://api.star-history.com/svg?repos=krillinai/KrillinAI&type=Date)](https://star-history.com/#krillinai/KrillinAI&Date) 174 | -------------------------------------------------------------------------------- /docs/aliyun.md: -------------------------------------------------------------------------------- 1 | ## 前提条件 2 | 需要先有[阿里云](https://www.aliyun.com)账号并经过实名认证,多数服务有免费额度 3 | 4 | ## 阿里云百炼平台密钥获取 5 | 1. 登录[阿里云百炼大模型服务平台](https://bailian.console.aliyun.com/),鼠标悬停于页面右上角的个人中心图标上,在下拉菜单中单击API-KEY 6 | ![百炼](./images/bailian_1.png) 7 | 2. 在左侧导航栏,选择全部API-KEY或我的API-KEY,然后创建或查看API Key 8 | 9 | ## 阿里云`access_key_id`和`access_key_secret`获取 10 | 1. 进入[阿里云AccessKey管理页面](https://ram.console.aliyun.com/profile/access-keys) 11 | 2. 点击创建AccessKey,如需要选择使用方式,选择“本地开发环境中使用” 12 | ![阿里云access key](./images/aliyun_accesskey_1.png) 13 | 3. 妥善保管,最好复制到本地文件保存 14 | 15 | ## 阿里云语音服务开通 16 | 1. 进入[阿里云语音服务管理页面](https://nls-portal.console.aliyun.com/applist),首次进入需开通服务 17 | 2. 点击创建项目 18 | ![阿里云speech](images/aliyun_speech_1.png) 19 | 3. 选择功能并开通 20 | ![阿里云speech](images/aliyun_speech_2.png) 21 | 4. “流式文本语音合成(CosyVoice大模型)”需要升级成商业版,其它服务可以用免费体验版 22 | ![阿里云speech](images/aliyun_speech_3.png) 23 | 5. 复制app key即可 24 | ![阿里云speech](images/aliyun_speech_4.png) 25 | 26 | ## 阿里云OSS服务开通 27 | 1. 进入[阿里云对象存储服务控制台](https://oss.console.aliyun.com/overview),首次进入需开通服务 28 | 2. 左侧选择Bucket列表,然后点击创建 29 | ![阿里云OSS](./images/aliyun_oss_1.png) 30 | 3. 选择快捷创建,填写符合要求的Bucket名称并选择**上海**地域,完成创建(此处填写的名字就是配置项`aliyun.oss.bucket`的值) 31 | ![阿里云OSS](./images/aliyun_oss_2.png) 32 | 4. 创建完成后进入Bucket 33 | ![阿里云OSS](./images/aliyun_oss_3.png) 34 | 5. 将“阻止公共访问”开关关闭,并设置读写权限为“公共读” 35 | ![阿里云OSS](./images/aliyun_oss_4.png) 36 | ![阿里云OSS](./images/aliyun_oss_5.png) -------------------------------------------------------------------------------- /docs/docker.md: -------------------------------------------------------------------------------- 1 | # Docker 部署指南 2 | 3 | ## 快速开始 4 | 先准备好配置文件,设置服务器监听端口为`8888`、服务器监听地址为`0.0.0.0` 5 | 6 | ### docker run启动 7 | ```bash 8 | docker run -d \ 9 | -p 8888:8888 \ 10 | -v /path/to/config.toml:/app/config/config.toml \ 11 | asteria798/krillinai 12 | ``` 13 | 14 | ### docker-compose启动 15 | ```yaml 16 | version: '3' 17 | services: 18 | krillin: 19 | image: asteria798/krillinai 20 | ports: 21 | - "8888:8888" 22 | volumes: 23 | - /path/to/config.toml:/app/config/config.toml 24 | ``` 25 | 26 | ## 持久化模型 27 | 如果使用fasterwhisper模型, KrillinAI 会自动下载模型所需文件到`/app/models`目录和`/app/bin`目录。容器删除后,这些文件会丢失。如果需要持久化模型,可以将这两个目录映射到宿主机的目录。 28 | 29 | ### docker run启动 30 | ```bash 31 | docker run -d \ 32 | -p 8888:8888 \ 33 | -v /path/to/config.toml:/app/config/config.toml \ 34 | -v /path/to/models:/app/models \ 35 | -v /path/to/bin:/app/bin \ 36 | krillinai/krillin 37 | ``` 38 | 39 | ### docker-compose启动 40 | ```yaml 41 | version: '3' 42 | services: 43 | krillin: 44 | image: krillinai/krillin 45 | ports: 46 | - "8888:8888" 47 | volumes: 48 | - /path/to/config.toml:/app/config/config.toml 49 | - /path/to/models:/app/models 50 | - /path/to/bin:/app/bin 51 | ``` 52 | 53 | ## 注意事项 54 | 1. 如果docker容器的网络模式不为host,建议将配置文件服务器监听地址设置为`0.0.0.0`,否则可能无法访问服务。 55 | 2. 如果容器内需要访问宿主机的网络代理,请将代理地址配置项`proxy`的`127.0.0.1`设置为`host.docker.internal`,例如`http://host.docker.internal:7890` -------------------------------------------------------------------------------- /docs/faq.md: -------------------------------------------------------------------------------- 1 | ### 1. 看不到`app.log`配置文件,无法知道报错内容 2 | Windows用户请将本软件的工作目录放在非C盘的文件夹。 3 | 4 | ### 2. 非桌面版明明创建了配置文件,但还是报错“找不到配置文件” 5 | 确保配置文件名是`config.toml`,而不是`config.toml.txt`或其它。 6 | 配置完成后,本软件的工作文件夹的结构应该是这样的: 7 | ``` 8 | /── config/ 9 | │ └── config.toml 10 | ├── cookies.txt (<- 可选的cookies.txt文件) 11 | └── krillinai.exe 12 | ``` 13 | 14 | ### 3. 填写了大模型配置,但是报错“xxxxx需要配置xxxxx API Key” 15 | 模型服务和语音服务虽然可以都用openai的服务,但是也有大模型单独使用非openai的场景,因此这两块配置是分开的,除了大模型配置,请往配置下方找whisper配置填写对应的密钥等信息。 16 | 17 | ### 3. 报错内含“yt-dlp error” 18 | 视频下载器的问题,目前看来无非就是网络问题或者下载器版本问题,检查下网络代理有没有打开并且配置到配置文件的代理配置项,同时建议选择香港节点。下载器是本软件自动安装的,安装的源我会更新但毕竟不是官方源,所以可能会有落后,遇到问题尝试手动更新一下,更新方法: 19 | 20 | 在软件bin目录位置打开终端,执行 21 | ``` 22 | ./yt-dlp.exe -U 23 | ``` 24 | 此处`yt-dlp.exe`替换为你系统实际的ytdlp软件名称。 -------------------------------------------------------------------------------- /docs/get_cookies.md: -------------------------------------------------------------------------------- 1 | # Cookie 配置说明 2 | 3 | ## 问题说明 4 | 在生成字幕的时候,可能会遇到出错的情况,例如“Sign in to confirm you are not a bot”: 5 | 6 | 这是因为: 7 | 1. 部分视频平台需要用户登录信息才能获取高质量视频 8 | 2. 您当前的代理的ip不够纯净,已被视频网站官方限制 9 | 10 | ## 解决方法 11 | 12 | ### 1. 安装浏览器扩展 13 | 根据你使用的浏览器选择安装: 14 | 15 | - Chrome浏览器: [Get CookieTxt Locally](https://chromewebstore.google.com/detail/get-cookiestxt-locally/cclelndahbckbenkjhflpdbgdldlbecc) 16 | - Edge浏览器: [Export Cookies File](https://microsoftedge.microsoft.com/addons/detail/export-cookies-file/hbglikhfdcfhdfikmocdflffaecbnedo) 17 | 18 | ### 2. 导出Cookie文件 19 | 1. 登录需要下载视频的网站(如B站、YouTube等) 20 | 2. 点击浏览器扩展图标 21 | 3. 选择"Export Cookies"选项 22 | 4. 将导出的cookies.txt文件保存到本软件所在的目录下 23 | 5. 如果导出的文件名不是cookies.txt,请将文件名改为cookies.txt 24 | 25 | 图示: 26 | ![导出cookies](./images/export_cookies.png) 27 | 28 | 导出后,工具的工作文件夹的结构应该是这样的: 29 | ``` 30 | /── config/ 31 | │ └── config.toml 32 | ├── tasks/ 33 | ├── cookies.txt (<- 导出的cookies.txt文件) 34 | └── krillinai.exe 35 | ``` -------------------------------------------------------------------------------- /docs/images/alignment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/alignment.png -------------------------------------------------------------------------------- /docs/images/aliyun_accesskey_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/aliyun_accesskey_1.png -------------------------------------------------------------------------------- /docs/images/aliyun_oss_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/aliyun_oss_1.png -------------------------------------------------------------------------------- /docs/images/aliyun_oss_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/aliyun_oss_2.png -------------------------------------------------------------------------------- /docs/images/aliyun_oss_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/aliyun_oss_3.png -------------------------------------------------------------------------------- /docs/images/aliyun_oss_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/aliyun_oss_4.png -------------------------------------------------------------------------------- /docs/images/aliyun_oss_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/aliyun_oss_5.png -------------------------------------------------------------------------------- /docs/images/aliyun_speech_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/aliyun_speech_1.png -------------------------------------------------------------------------------- /docs/images/aliyun_speech_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/aliyun_speech_2.png -------------------------------------------------------------------------------- /docs/images/aliyun_speech_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/aliyun_speech_3.png -------------------------------------------------------------------------------- /docs/images/aliyun_speech_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/aliyun_speech_4.png -------------------------------------------------------------------------------- /docs/images/bailian_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/bailian_1.png -------------------------------------------------------------------------------- /docs/images/export_cookies.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/export_cookies.png -------------------------------------------------------------------------------- /docs/images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/logo.png -------------------------------------------------------------------------------- /docs/images/ui.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/ui.jpg -------------------------------------------------------------------------------- /docs/images/ui_desktop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/docs/images/ui_desktop.png -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module krillin-ai 2 | 3 | go 1.22 4 | 5 | require ( 6 | fyne.io/fyne/v2 v2.5.4 7 | github.com/BurntSushi/toml v1.4.0 8 | github.com/aliyun/alibaba-cloud-sdk-go v1.63.72 9 | github.com/aliyun/alibabacloud-oss-go-sdk-v2 v1.1.3 10 | github.com/gin-gonic/gin v1.10.0 11 | github.com/go-resty/resty/v2 v2.7.0 12 | github.com/google/uuid v1.4.0 13 | github.com/gorilla/websocket v1.5.0 14 | github.com/samber/lo v1.38.1 15 | github.com/sashabaranov/go-openai v1.36.0 16 | go.uber.org/zap v1.25.0 17 | golang.org/x/sync v0.9.0 18 | ) 19 | 20 | require ( 21 | fyne.io/systray v1.11.0 // indirect 22 | github.com/bytedance/sonic v1.11.6 // indirect 23 | github.com/bytedance/sonic/loader v0.1.1 // indirect 24 | github.com/cloudwego/base64x v0.1.4 // indirect 25 | github.com/cloudwego/iasm v0.2.0 // indirect 26 | github.com/davecgh/go-spew v1.1.1 // indirect 27 | github.com/fredbi/uri v1.1.0 // indirect 28 | github.com/fsnotify/fsnotify v1.7.0 // indirect 29 | github.com/fyne-io/gl-js v0.0.0-20220119005834-d2da28d9ccfe // indirect 30 | github.com/fyne-io/glfw-js v0.0.0-20241126112943-313d8a0fe1d0 // indirect 31 | github.com/fyne-io/image v0.0.0-20220602074514-4956b0afb3d2 // indirect 32 | github.com/gabriel-vasile/mimetype v1.4.3 // indirect 33 | github.com/gin-contrib/sse v0.1.0 // indirect 34 | github.com/go-gl/gl v0.0.0-20211210172815-726fda9656d6 // indirect 35 | github.com/go-gl/glfw/v3.3/glfw v0.0.0-20240506104042-037f3cc74f2a // indirect 36 | github.com/go-playground/locales v0.14.1 // indirect 37 | github.com/go-playground/universal-translator v0.18.1 // indirect 38 | github.com/go-playground/validator/v10 v10.20.0 // indirect 39 | github.com/go-text/render v0.2.0 // indirect 40 | github.com/go-text/typesetting v0.2.0 // indirect 41 | github.com/goccy/go-json v0.10.2 // indirect 42 | github.com/godbus/dbus/v5 v5.1.0 // indirect 43 | github.com/google/go-cmp v0.5.9 // indirect 44 | github.com/gopherjs/gopherjs v1.17.2 // indirect 45 | github.com/jeandeaual/go-locale v0.0.0-20240223122105-ce5225dcaa49 // indirect 46 | github.com/jmespath/go-jmespath v0.4.0 // indirect 47 | github.com/json-iterator/go v1.1.12 // indirect 48 | github.com/jsummers/gobmp v0.0.0-20151104160322-e2ba15ffa76e // indirect 49 | github.com/klauspost/cpuid/v2 v2.2.7 // indirect 50 | github.com/kr/pretty v0.3.1 // indirect 51 | github.com/leodido/go-urn v1.4.0 // indirect 52 | github.com/mattn/go-isatty v0.0.20 // indirect 53 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 54 | github.com/modern-go/reflect2 v1.0.2 // indirect 55 | github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 // indirect 56 | github.com/nicksnyder/go-i18n/v2 v2.4.0 // indirect 57 | github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b // indirect 58 | github.com/pelletier/go-toml/v2 v2.2.2 // indirect 59 | github.com/pmezard/go-difflib v1.0.0 // indirect 60 | github.com/rymdport/portal v0.3.0 // indirect 61 | github.com/srwiley/oksvg v0.0.0-20221011165216-be6e8873101c // indirect 62 | github.com/srwiley/rasterx v0.0.0-20220730225603-2ab79fcdd4ef // indirect 63 | github.com/stretchr/testify v1.9.0 // indirect 64 | github.com/twitchyliquid64/golang-asm v0.15.1 // indirect 65 | github.com/ugorji/go/codec v1.2.12 // indirect 66 | github.com/yuin/goldmark v1.7.1 // indirect 67 | go.uber.org/atomic v1.10.0 // indirect 68 | go.uber.org/multierr v1.11.0 // indirect 69 | golang.org/x/arch v0.8.0 // indirect 70 | golang.org/x/crypto v0.23.0 // indirect 71 | golang.org/x/exp v0.0.0-20221031165847-c99f073a8326 // indirect 72 | golang.org/x/image v0.18.0 // indirect 73 | golang.org/x/mobile v0.0.0-20231127183840-76ac6878050a // indirect 74 | golang.org/x/net v0.25.0 // indirect 75 | golang.org/x/sys v0.20.0 // indirect 76 | golang.org/x/text v0.20.0 // indirect 77 | golang.org/x/time v0.4.0 // indirect 78 | google.golang.org/protobuf v1.34.1 // indirect 79 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect 80 | gopkg.in/ini.v1 v1.67.0 // indirect 81 | gopkg.in/yaml.v3 v3.0.1 // indirect 82 | ) 83 | -------------------------------------------------------------------------------- /internal/api/subtitle.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "path/filepath" 7 | "time" 8 | ) 9 | 10 | // WordReplacement 词语替换 11 | type WordReplacement struct { 12 | From string `json:"from"` 13 | To string `json:"to"` 14 | } 15 | 16 | // SubtitleTask 字幕任务 17 | type SubtitleTask struct { 18 | URL string `json:"url"` // 视频URL 19 | Language string `json:"language"` // 界面语言 20 | OriginLang string `json:"origin_lang"` // 源语言 21 | TargetLang string `json:"target_lang"` // 目标语言 22 | Bilingual int `json:"bilingual"` // 是否双语 1:是 2:否 23 | TranslationSubtitlePos int `json:"translation_subtitle_pos"` // 翻译字幕位置 1:上方 2:下方 24 | TTS int `json:"tts"` // 是否配音 1:是 2:否 25 | TTSVoiceCode int `json:"tts_voice_code,omitempty"` // 配音声音代码 1:女声 2:男声 26 | TTSVoiceCloneSrcFileURL string `json:"tts_voice_clone_src_file_url,omitempty"` // 音色克隆源文件URL 27 | ModalFilter int `json:"modal_filter"` // 是否过滤语气词 1:是 2:否 28 | Replace []string `json:"replace,omitempty"` // 词汇替换列表 29 | EmbedSubtitleVideoType string `json:"embed_subtitle_video_type"` // 字幕嵌入视频类型 none:不嵌入 horizontal:横屏 vertical:竖屏 all:全部 30 | VerticalMajorTitle string `json:"vertical_major_title,omitempty"` // 竖屏主标题 31 | VerticalMinorTitle string `json:"vertical_minor_title,omitempty"` // 竖屏副标题 32 | } 33 | 34 | // SubtitleResult 字幕结果 35 | type SubtitleResult struct { 36 | Name string `json:"name"` // 文件名 37 | DownloadURL string `json:"download_url"` // 下载URL 38 | } 39 | 40 | // TaskStatus 任务状态 41 | type TaskStatus struct { 42 | TaskId string `json:"task_id"` // 任务ID 43 | ProcessPercent int `json:"process_percent"` // 处理进度百分比 44 | Status string `json:"status"` // 任务状态 45 | Message string `json:"message"` // 状态消息 46 | SubtitleInfo []SubtitleResult `json:"subtitle_info"` // 字幕信息 47 | SpeechDownloadURL string `json:"speech_download_url"` // 配音下载URL 48 | } 49 | 50 | // CreateSubtitleTask 创建字幕任务 51 | func CreateSubtitleTask(task *SubtitleTask) (*TaskStatus, error) { 52 | // 生成任务ID 53 | taskId := generateTaskId() 54 | 55 | // 创建任务目录 56 | taskDir := filepath.Join("tasks", taskId) 57 | if err := createTaskDirectory(taskDir); err != nil { 58 | return nil, fmt.Errorf("创建任务目录失败: %v", err) 59 | } 60 | 61 | // 启动异步任务处理 62 | go processTask(taskId, task) 63 | 64 | return &TaskStatus{ 65 | TaskId: taskId, 66 | ProcessPercent: 0, 67 | Status: "created", 68 | Message: "任务已创建", 69 | }, nil 70 | } 71 | 72 | // GetSubtitleTaskStatus 获取任务状态 73 | func GetSubtitleTaskStatus(taskId string) (*TaskStatus, error) { 74 | // 获取任务状态 75 | status, err := getTaskStatus(taskId) 76 | if err != nil { 77 | return nil, fmt.Errorf("获取任务状态失败: %v", err) 78 | } 79 | 80 | // 如果任务完成,添加下载链接 81 | if status.ProcessPercent >= 100 { 82 | status.SubtitleInfo = []SubtitleResult{ 83 | { 84 | Name: "字幕.srt", 85 | DownloadURL: fmt.Sprintf("/tasks/%s/output/subtitle.srt", taskId), 86 | }, 87 | { 88 | Name: "字幕.ass", 89 | DownloadURL: fmt.Sprintf("/tasks/%s/output/subtitle.ass", taskId), 90 | }, 91 | } 92 | 93 | // 如果启用了配音,添加配音下载链接 94 | if status.SpeechDownloadURL == "" { 95 | status.SpeechDownloadURL = fmt.Sprintf("/tasks/%s/output/speech.mp3", taskId) 96 | } 97 | } 98 | 99 | return status, nil 100 | } 101 | 102 | // 以下是辅助函数,需要在实际使用时实现 103 | func generateTaskId() string { 104 | // TODO: 实现任务ID生成逻辑 105 | return "task-" + time.Now().Format("20060102150405") 106 | } 107 | 108 | func createTaskDirectory(taskDir string) error { 109 | // TODO: 实现任务目录创建逻辑 110 | return os.MkdirAll(taskDir, 0755) 111 | } 112 | 113 | func processTask(taskId string, task *SubtitleTask) { 114 | // TODO: 实现任务处理逻辑 115 | // 1. 下载视频 116 | // 2. 提取音频 117 | // 3. 语音识别 118 | // 4. 翻译字幕 119 | // 5. 生成字幕文件 120 | // 6. 如果需要,生成配音 121 | // 7. 如果需要,嵌入字幕到视频 122 | // 8. 更新任务状态 123 | } 124 | 125 | func getTaskStatus(taskId string) (*TaskStatus, error) { 126 | // TODO: 实现任务状态获取逻辑 127 | return &TaskStatus{ 128 | TaskId: taskId, 129 | ProcessPercent: 50, 130 | Status: "processing", 131 | Message: "正在处理中", 132 | }, nil 133 | } 134 | -------------------------------------------------------------------------------- /internal/desktop/components.go: -------------------------------------------------------------------------------- 1 | package desktop 2 | 3 | import ( 4 | "fmt" 5 | "image/color" 6 | "time" 7 | 8 | "fyne.io/fyne/v2" 9 | "fyne.io/fyne/v2/canvas" 10 | "fyne.io/fyne/v2/container" 11 | "fyne.io/fyne/v2/layout" 12 | "fyne.io/fyne/v2/widget" 13 | ) 14 | 15 | // FadeAnimation 淡入淡出动画 16 | func FadeAnimation(content fyne.CanvasObject, duration time.Duration, startOpacity, endOpacity float64) { 17 | // 使用更柔和的动画效果 18 | rect := canvas.NewRectangle(color.NRGBA{R: 240, G: 246, B: 252, A: 0}) 19 | rect.FillColor = color.NRGBA{R: 240, G: 246, B: 252, A: uint8(startOpacity * 255)} 20 | 21 | anim := canvas.NewColorRGBAAnimation( 22 | color.NRGBA{R: 240, G: 246, B: 252, A: uint8(startOpacity * 255)}, 23 | color.NRGBA{R: 240, G: 246, B: 252, A: uint8(endOpacity * 255)}, 24 | duration, 25 | func(c color.Color) { 26 | rect.FillColor = c 27 | content.Refresh() 28 | }) 29 | 30 | anim.Start() 31 | } 32 | 33 | // PrimaryButton 创建主要按钮 34 | func PrimaryButton(text string, icon fyne.Resource, action func()) *widget.Button { 35 | btn := widget.NewButtonWithIcon(text, icon, action) 36 | btn.Importance = widget.HighImportance 37 | return btn 38 | } 39 | 40 | // SecondaryButton 创建次要按钮 41 | func SecondaryButton(text string, icon fyne.Resource, action func()) *widget.Button { 42 | btn := widget.NewButtonWithIcon(text, icon, action) 43 | btn.Importance = widget.MediumImportance 44 | return btn 45 | } 46 | 47 | // TitleText 创建标题文本 48 | func TitleText(text string) *canvas.Text { 49 | title := canvas.NewText(text, color.NRGBA{R: 88, G: 157, B: 246, A: 255}) 50 | title.TextSize = 22 51 | title.TextStyle = fyne.TextStyle{Bold: true} 52 | title.Alignment = fyne.TextAlignCenter 53 | return title 54 | } 55 | 56 | // SubtitleText 创建副标题文本 57 | func SubtitleText(text string) *canvas.Text { 58 | subtitle := canvas.NewText(text, color.NRGBA{R: 100, G: 120, B: 160, A: 255}) 59 | subtitle.TextSize = 16 60 | subtitle.TextStyle = fyne.TextStyle{Italic: true} 61 | subtitle.Alignment = fyne.TextAlignCenter 62 | return subtitle 63 | } 64 | 65 | func createShadowRectangle(fillColor color.Color, cornerRadius float32) *canvas.Rectangle { 66 | rect := canvas.NewRectangle(fillColor) 67 | rect.CornerRadius = cornerRadius 68 | return rect 69 | } 70 | 71 | func GlassCard(title, subtitle string, content fyne.CanvasObject) *fyne.Container { 72 | glassBackground := createShadowRectangle(color.NRGBA{R: 255, G: 255, B: 255, A: 200}, 12) 73 | 74 | titleLabel := canvas.NewText(title, color.NRGBA{R: 60, G: 80, B: 120, A: 255}) 75 | titleLabel.TextSize = 16 76 | titleLabel.TextStyle = fyne.TextStyle{Bold: true} 77 | 78 | // 副标题 79 | var subtitleLabel *canvas.Text 80 | if subtitle != "" { 81 | subtitleLabel = canvas.NewText(subtitle, color.NRGBA{R: 100, G: 120, B: 150, A: 200}) 82 | subtitleLabel.TextSize = 12 83 | } 84 | 85 | // 标题容器 86 | var headerContainer *fyne.Container 87 | if subtitleLabel != nil { 88 | headerContainer = container.NewVBox(titleLabel, subtitleLabel) 89 | } else { 90 | headerContainer = container.NewVBox(titleLabel) 91 | } 92 | 93 | // 分隔线 94 | divider := canvas.NewLine(color.NRGBA{R: 220, G: 230, B: 240, A: 255}) 95 | divider.StrokeWidth = 1 96 | 97 | contentWithPadding := container.NewPadded(content) 98 | 99 | // 布局 100 | cardContent := container.NewBorder( 101 | container.NewVBox(container.NewPadded(headerContainer), divider), 102 | nil, nil, nil, 103 | contentWithPadding, 104 | ) 105 | 106 | // 阴影 107 | shadow := canvas.NewRectangle(color.NRGBA{R: 0, G: 0, B: 0, A: 20}) 108 | shadow.Move(fyne.NewPos(3, 3)) 109 | shadow.Resize(fyne.NewSize(cardContent.Size().Width, cardContent.Size().Height)) 110 | shadow.CornerRadius = 12 111 | 112 | return container.NewStack(shadow, glassBackground, cardContent) 113 | } 114 | 115 | func StyledCard(title string, content fyne.CanvasObject) *fyne.Container { 116 | bg := createShadowRectangle(color.NRGBA{R: 250, G: 251, B: 254, A: 255}, 8) 117 | 118 | titleLabel := canvas.NewText(title, color.NRGBA{R: 60, G: 80, B: 120, A: 255}) 119 | titleLabel.TextSize = 16 120 | titleLabel.TextStyle = fyne.TextStyle{Bold: true} 121 | 122 | divider := canvas.NewRectangle(color.NRGBA{R: 230, G: 235, B: 240, A: 255}) 123 | divider.SetMinSize(fyne.NewSize(0, 1)) 124 | 125 | // 组合 126 | contentContainer := container.NewBorder( 127 | container.NewVBox( 128 | container.NewPadded(titleLabel), 129 | divider, 130 | ), 131 | nil, nil, nil, 132 | container.NewPadded(content), 133 | ) 134 | 135 | shadow := canvas.NewRectangle(color.NRGBA{R: 0, G: 0, B: 0, A: 15}) 136 | shadow.Move(fyne.NewPos(2, 2)) 137 | shadow.SetMinSize(fyne.NewSize(contentContainer.Size().Width+4, contentContainer.Size().Height+4)) 138 | shadow.CornerRadius = 8 139 | 140 | return container.NewStack(shadow, bg, contentContainer) 141 | } 142 | 143 | func StyledSelect(options []string, selected func(string)) *widget.Select { 144 | sel := widget.NewSelect(options, selected) 145 | 146 | // 针对包含"翻译后字幕"的选项增加宽度 147 | for _, option := range options { 148 | if len(option) > 8 { 149 | 150 | extraOptions := make([]string, len(options)) 151 | copy(extraOptions, options) 152 | 153 | maxOption := "" 154 | for _, opt := range options { 155 | if len(opt) > len(maxOption) { 156 | maxOption = opt 157 | } 158 | } 159 | 160 | // 添加额外空格来扩展宽度 161 | padding := " " 162 | if len(maxOption) < 20 { 163 | maxOption = maxOption + padding 164 | } 165 | 166 | sel = widget.NewSelect(extraOptions, selected) 167 | break 168 | } 169 | } 170 | 171 | return sel 172 | } 173 | 174 | func StyledEntry(placeholder string) *widget.Entry { 175 | entry := widget.NewEntry() 176 | entry.SetPlaceHolder(placeholder) 177 | return entry 178 | } 179 | 180 | func StyledPasswordEntry(placeholder string) *widget.Entry { 181 | entry := widget.NewPasswordEntry() 182 | entry.SetPlaceHolder(placeholder) 183 | return entry 184 | } 185 | 186 | func DividedContainer(vertical bool, items ...fyne.CanvasObject) *fyne.Container { 187 | if len(items) <= 1 { 188 | if len(items) == 1 { 189 | return container.NewPadded(items[0]) 190 | } 191 | return container.NewPadded() 192 | } 193 | 194 | var dividers []fyne.CanvasObject 195 | for i := 0; i < len(items)-1; i++ { 196 | dividers = append(dividers, createDivider(vertical)) 197 | } 198 | 199 | var objects []fyne.CanvasObject 200 | for i, item := range items { 201 | objects = append(objects, item) 202 | if i < len(dividers) { 203 | objects = append(objects, dividers[i]) 204 | } 205 | } 206 | 207 | if vertical { 208 | return container.New(layout.NewVBoxLayout(), objects...) 209 | } 210 | return container.New(layout.NewHBoxLayout(), objects...) 211 | } 212 | 213 | func createDivider(vertical bool) fyne.CanvasObject { 214 | divider := canvas.NewRectangle(color.NRGBA{R: 210, G: 220, B: 240, A: 255}) 215 | if vertical { 216 | divider.SetMinSize(fyne.NewSize(0, 1)) 217 | } else { 218 | divider.SetMinSize(fyne.NewSize(1, 0)) 219 | } 220 | return divider 221 | } 222 | 223 | func ProgressWithLabel(initial float64) (*widget.ProgressBar, *widget.Label, *fyne.Container) { 224 | progress := widget.NewProgressBar() 225 | progress.SetValue(initial) 226 | 227 | label := widget.NewLabel("0%") 228 | 229 | container := container.NewBorder(nil, nil, nil, label, progress) 230 | 231 | return progress, label, container 232 | } 233 | 234 | // UpdateProgressLabel 更新进度条标签 235 | func UpdateProgressLabel(progress *widget.ProgressBar, label *widget.Label) { 236 | percentage := int(progress.Value * 100) 237 | label.SetText(fmt.Sprintf("%d%%", percentage)) 238 | } 239 | 240 | func AnimatedContainer() *fyne.Container { 241 | return container.NewStack() 242 | } 243 | 244 | func SwitchContent(container *fyne.Container, content fyne.CanvasObject, duration time.Duration) { 245 | if container == nil || content == nil { 246 | return 247 | } 248 | 249 | if len(container.Objects) > 0 { 250 | oldContent := container.Objects[0] 251 | FadeAnimation(oldContent, duration/2, 1.0, 0.0) 252 | 253 | go func() { 254 | defer func() { 255 | if r := recover(); r != nil { 256 | fmt.Println("内容切换时发生错误:", r) 257 | } 258 | }() 259 | 260 | time.Sleep(duration / 2) 261 | container.Objects = []fyne.CanvasObject{content} 262 | container.Refresh() 263 | FadeAnimation(content, duration/2, 0.0, 1.0) 264 | }() 265 | } else { 266 | container.Objects = []fyne.CanvasObject{content} 267 | container.Refresh() 268 | FadeAnimation(content, duration/2, 0.0, 1.0) 269 | } 270 | } 271 | -------------------------------------------------------------------------------- /internal/desktop/desktop.go: -------------------------------------------------------------------------------- 1 | package desktop 2 | 3 | import ( 4 | "image/color" 5 | "time" 6 | 7 | "fyne.io/fyne/v2" 8 | "fyne.io/fyne/v2/app" 9 | "fyne.io/fyne/v2/canvas" 10 | "fyne.io/fyne/v2/container" 11 | "fyne.io/fyne/v2/layout" 12 | "fyne.io/fyne/v2/theme" 13 | "fyne.io/fyne/v2/widget" 14 | ) 15 | 16 | func createNavButton(text string, icon fyne.Resource, isSelected bool, onTap func()) *widget.Button { 17 | btn := widget.NewButtonWithIcon(text, icon, onTap) 18 | 19 | // 根据选中状态设置颜色 20 | if isSelected { 21 | btn.Importance = widget.HighImportance 22 | } else { 23 | btn.Importance = widget.LowImportance 24 | } 25 | 26 | return btn 27 | } 28 | 29 | // Show 展示桌面 30 | func Show() { 31 | myApp := app.New() 32 | 33 | // 自定义主题 34 | myApp.Settings().SetTheme(NewCustomTheme(false)) 35 | 36 | myWindow := myApp.NewWindow("Krillin AI") 37 | 38 | logoContainer := container.NewVBox() 39 | 40 | logo := canvas.NewText("Krillin AI", color.NRGBA{R: 88, G: 157, B: 246, A: 255}) 41 | logo.TextSize = 28 42 | logo.TextStyle = fyne.TextStyle{Bold: true} 43 | logo.Alignment = fyne.TextAlignCenter 44 | 45 | separator := canvas.NewRectangle(color.NRGBA{R: 210, G: 225, B: 245, A: 255}) 46 | separator.SetMinSize(fyne.NewSize(0, 2)) 47 | 48 | slogan := canvas.NewText("智能内容创作助手", color.NRGBA{R: 100, G: 120, B: 160, A: 255}) 49 | slogan.TextSize = 12 50 | slogan.Alignment = fyne.TextAlignCenter 51 | 52 | logoContainer.Add(logo) 53 | logoContainer.Add(separator) 54 | logoContainer.Add(slogan) 55 | 56 | // 创建左侧导航栏 57 | navItems := []string{"工作台 Workbench", "配置 Config"} 58 | navIcons := []fyne.Resource{theme.DocumentIcon(), theme.SettingsIcon()} 59 | 60 | // 存储导航按钮列表 61 | var navButtons []*widget.Button 62 | navContainer := container.NewVBox() 63 | 64 | // 创建内容区域,使用Stack容器来叠放多个内容 65 | contentStack := container.NewStack() 66 | 67 | // 预先创建两个tab的内容 68 | workbenchContent := CreateSubtitleTab(myWindow) 69 | configContent := CreateConfigTab(myWindow) 70 | 71 | // 默认显示工作台内容 72 | contentStack.Add(workbenchContent) 73 | contentStack.Add(configContent) 74 | 75 | configContent.Hide() 76 | 77 | currentSelectedIndex := 0 78 | 79 | // 创建导航项 80 | for i, item := range navItems { 81 | index := i // 捕获变量 82 | isSelected := (i == currentSelectedIndex) 83 | 84 | // 创建导航按钮以及点击处理函数 85 | navBtn := createNavButton(item, navIcons[i], isSelected, func() { 86 | // 如果已经是当前选中项,不做任何操作 87 | if currentSelectedIndex == index { 88 | return 89 | } 90 | 91 | // 更新所有导航项的状态 92 | for j, btn := range navButtons { 93 | if j == index { 94 | btn.Importance = widget.HighImportance 95 | } else { 96 | btn.Importance = widget.LowImportance 97 | } 98 | } 99 | 100 | // 更新当前选中的索引 101 | currentSelectedIndex = index 102 | 103 | navContainer.Refresh() 104 | 105 | if index == 0 { 106 | workbenchContent.Show() 107 | configContent.Hide() 108 | // 确保进度条和下载区域状态正确显示 109 | workbenchContent.Refresh() 110 | FadeAnimation(workbenchContent, 300*time.Millisecond, 0.0, 1.0) 111 | } else { 112 | workbenchContent.Hide() 113 | configContent.Show() 114 | FadeAnimation(configContent, 300*time.Millisecond, 0.0, 1.0) 115 | } 116 | 117 | contentStack.Refresh() 118 | }) 119 | 120 | // 将导航按钮添加到列表和容器中 121 | navButtons = append(navButtons, navBtn) 122 | navContainer.Add(container.NewPadded(navBtn)) 123 | } 124 | 125 | navBackground := canvas.NewRectangle(color.NRGBA{R: 250, G: 251, B: 254, A: 255}) 126 | 127 | navWithBackground := container.NewStack( 128 | navBackground, 129 | container.NewBorder( 130 | container.NewPadded(logoContainer), 131 | nil, nil, nil, 132 | container.NewPadded(navContainer), 133 | ), 134 | ) 135 | 136 | // 主布局 137 | split := container.NewHSplit(navWithBackground, container.NewPadded(contentStack)) 138 | split.SetOffset(0.2) 139 | 140 | mainContainer := container.NewPadded(split) 141 | 142 | // 底部状态栏 143 | statusText := canvas.NewText("就绪", color.NRGBA{R: 100, G: 120, B: 160, A: 180}) 144 | statusText.TextSize = 12 145 | statusBar := container.NewHBox( 146 | layout.NewSpacer(), 147 | statusText, 148 | ) 149 | 150 | finalContainer := container.NewBorder(nil, container.NewPadded(statusBar), nil, nil, mainContainer) 151 | 152 | myWindow.SetContent(finalContainer) 153 | myWindow.Resize(fyne.NewSize(1000, 700)) 154 | myWindow.CenterOnScreen() 155 | myWindow.ShowAndRun() 156 | } 157 | -------------------------------------------------------------------------------- /internal/desktop/file.go: -------------------------------------------------------------------------------- 1 | package desktop 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "fmt" 7 | "io" 8 | "mime/multipart" 9 | "net/http" 10 | "os" 11 | "path/filepath" 12 | 13 | "fyne.io/fyne/v2" 14 | "fyne.io/fyne/v2/dialog" 15 | ) 16 | 17 | type FileManager struct { 18 | window fyne.Window 19 | files []string 20 | } 21 | 22 | func NewFileManager(window fyne.Window) *FileManager { 23 | return &FileManager{ 24 | window: window, 25 | files: make([]string, 0), 26 | } 27 | } 28 | 29 | func (fm *FileManager) ShowUploadDialog() { 30 | fd := dialog.NewFileOpen(func(reader fyne.URIReadCloser, err error) { 31 | if err != nil { 32 | dialog.ShowError(err, fm.window) 33 | return 34 | } 35 | if reader == nil { 36 | return 37 | } 38 | 39 | // 获取文件路径 40 | filePath := reader.URI().Path() 41 | fileName := filepath.Base(filePath) 42 | 43 | err = fm.uploadFile(filePath, fileName) 44 | if err != nil { 45 | dialog.ShowError(err, fm.window) 46 | return 47 | } 48 | 49 | dialog.ShowInformation("成功", "文件上传成功", fm.window) 50 | }, fm.window) 51 | 52 | fd.Show() 53 | } 54 | 55 | func (fm *FileManager) uploadFile(filePath, fileName string) error { 56 | file, err := os.Open(filePath) 57 | if err != nil { 58 | return err 59 | } 60 | defer file.Close() 61 | 62 | // 创建multipart form 63 | body := &bytes.Buffer{} 64 | writer := multipart.NewWriter(body) 65 | part, err := writer.CreateFormFile("file", fileName) 66 | if err != nil { 67 | return err 68 | } 69 | _, err = io.Copy(part, file) 70 | if err != nil { 71 | return err 72 | } 73 | writer.Close() 74 | 75 | // 发送请求 76 | resp, err := http.Post("http://localhost:8888/api/file", writer.FormDataContentType(), body) 77 | if err != nil { 78 | return err 79 | } 80 | defer resp.Body.Close() 81 | 82 | var result struct { 83 | Error int `json:"error"` 84 | Msg string `json:"msg"` 85 | Data struct { 86 | FilePath string `json:"file_path"` 87 | } `json:"data"` 88 | } 89 | 90 | err = json.NewDecoder(resp.Body).Decode(&result) 91 | if err != nil { 92 | return err 93 | } 94 | 95 | if result.Error != 0 && result.Error != 200 { 96 | return fmt.Errorf(result.Msg) 97 | } 98 | 99 | fm.files = append(fm.files, result.Data.FilePath) 100 | return nil 101 | } 102 | 103 | func (fm *FileManager) GetFileCount() int { 104 | return len(fm.files) 105 | } 106 | 107 | func (fm *FileManager) GetFileName(index int) string { 108 | if index < 0 || index >= len(fm.files) { 109 | return "" 110 | } 111 | return filepath.Base(fm.files[index]) 112 | } 113 | 114 | func (fm *FileManager) DownloadFile(index int) { 115 | if index < 0 || index >= len(fm.files) { 116 | return 117 | } 118 | 119 | filePath := fm.files[index] 120 | 121 | dialog.ShowFileSave(func(writer fyne.URIWriteCloser, err error) { 122 | if err != nil { 123 | dialog.ShowError(err, fm.window) 124 | return 125 | } 126 | if writer == nil { 127 | return 128 | } 129 | 130 | resp, err := http.Get("http://localhost:8888" + filePath) 131 | if err != nil { 132 | dialog.ShowError(err, fm.window) 133 | return 134 | } 135 | defer resp.Body.Close() 136 | 137 | _, err = io.Copy(writer, resp.Body) 138 | if err != nil { 139 | dialog.ShowError(err, fm.window) 140 | return 141 | } 142 | 143 | writer.Close() 144 | dialog.ShowInformation("成功", "文件下载完成", fm.window) 145 | }, fm.window) 146 | } 147 | -------------------------------------------------------------------------------- /internal/desktop/theme.go: -------------------------------------------------------------------------------- 1 | package desktop 2 | 3 | import ( 4 | "image/color" 5 | 6 | "fyne.io/fyne/v2" 7 | "fyne.io/fyne/v2/theme" 8 | ) 9 | 10 | // customTheme 自定义主题 11 | type customTheme struct { 12 | baseTheme fyne.Theme 13 | forceDark bool 14 | } 15 | 16 | func NewCustomTheme(forceDark bool) fyne.Theme { 17 | if forceDark { 18 | return &customTheme{baseTheme: theme.DefaultTheme(), forceDark: true} 19 | } 20 | return &customTheme{baseTheme: theme.DefaultTheme(), forceDark: false} 21 | } 22 | 23 | func (t *customTheme) Color(name fyne.ThemeColorName, variant fyne.ThemeVariant) color.Color { 24 | if t.forceDark || variant == theme.VariantDark { 25 | return t.darkColors(name) 26 | } 27 | return t.lightColors(name) 28 | } 29 | 30 | // lightColors 浅色主题配色方案 31 | func (t *customTheme) lightColors(name fyne.ThemeColorName) color.Color { 32 | switch name { 33 | // 主色系 34 | case theme.ColorNamePrimary: 35 | return color.NRGBA{R: 100, G: 150, B: 240, A: 255} 36 | 37 | // 背景与前景 38 | case theme.ColorNameBackground: 39 | return color.NRGBA{R: 248, G: 249, B: 252, A: 255} // 极浅灰背景 40 | case theme.ColorNameForeground: 41 | return color.NRGBA{R: 30, G: 35, B: 45, A: 255} // 深灰文字 42 | case theme.ColorNameDisabled: 43 | return color.NRGBA{R: 180, G: 185, B: 190, A: 150} // 柔和禁用色 44 | 45 | // 按钮状态 46 | case theme.ColorNameButton: 47 | return color.NRGBA{R: 70, G: 130, B: 230, A: 255} 48 | case theme.ColorNameHover: 49 | return color.NRGBA{R: 90, G: 150, B: 240, A: 255} // 浅蓝悬停 50 | case theme.ColorNamePressed: 51 | return color.NRGBA{R: 50, G: 110, B: 210, A: 255} // 深蓝按下 52 | 53 | // 输入组件 54 | case theme.ColorNameInputBackground: 55 | return color.NRGBA{R: 255, G: 255, B: 255, A: 255} // 纯白输入框 56 | case theme.ColorNameInputBorder: 57 | return color.NRGBA{R: 210, G: 215, B: 220, A: 255} // 浅灰边框 58 | case theme.ColorNamePlaceHolder: 59 | return color.NRGBA{R: 160, G: 165, B: 170, A: 200} // 灰占位符 60 | 61 | // 其他 62 | case theme.ColorNameSelection: 63 | return color.NRGBA{R: 200, G: 225, B: 255, A: 180} // 淡蓝选中 64 | case theme.ColorNameScrollBar: 65 | return color.NRGBA{R: 200, G: 205, B: 210, A: 200} 66 | case theme.ColorNameShadow: 67 | return color.NRGBA{R: 0, G: 0, B: 0, A: 25} // 柔和阴影 68 | 69 | // 状态色 70 | case theme.ColorNameError: 71 | return color.NRGBA{R: 230, G: 70, B: 70, A: 255} // 红色错误 72 | case theme.ColorNameWarning: 73 | return color.NRGBA{R: 245, G: 160, B: 50, A: 255} // 橙色警告 74 | case theme.ColorNameSuccess: 75 | return color.NRGBA{R: 60, G: 180, B: 120, A: 255} // 绿色成功 76 | case theme.ColorNameFocus: 77 | return color.NRGBA{R: 70, G: 130, B: 230, A: 100} // 半透明焦点 78 | 79 | default: 80 | return t.baseTheme.Color(name, theme.VariantLight) 81 | } 82 | } 83 | 84 | // darkColors 深色主题配色方案 85 | func (t *customTheme) darkColors(name fyne.ThemeColorName) color.Color { 86 | switch name { 87 | // 主色系 88 | case theme.ColorNamePrimary: 89 | return color.NRGBA{R: 90, G: 150, B: 250, A: 255} // 稍亮的蓝色 90 | 91 | // 背景与前景 92 | case theme.ColorNameBackground: 93 | return color.NRGBA{R: 20, G: 22, B: 30, A: 255} // 更深的灰蓝背景 94 | case theme.ColorNameForeground: 95 | return color.NRGBA{R: 230, G: 235, B: 240, A: 255} // 浅灰文字 96 | case theme.ColorNameDisabled: 97 | return color.NRGBA{R: 100, G: 105, B: 110, A: 150} // 深色禁用 98 | 99 | // 按钮状态 100 | case theme.ColorNameButton: 101 | return color.NRGBA{R: 50, G: 55, B: 65, A: 255} // 更深的按钮背景 102 | case theme.ColorNameHover: 103 | return color.NRGBA{R: 70, G: 75, B: 85, A: 255} // 浅灰悬停 104 | case theme.ColorNamePressed: 105 | return color.NRGBA{R: 30, G: 35, B: 45, A: 255} // 更深按下 106 | 107 | // 输入组件 108 | case theme.ColorNameInputBackground: 109 | return color.NRGBA{R: 35, G: 38, B: 48, A: 255} // 更深的输入框背景 110 | case theme.ColorNameInputBorder: 111 | return color.NRGBA{R: 60, G: 65, B: 75, A: 255} // 更深的边框 112 | case theme.ColorNamePlaceHolder: 113 | return color.NRGBA{R: 120, G: 125, B: 130, A: 200} // 灰占位符 114 | 115 | // 其他 116 | case theme.ColorNameSelection: 117 | return color.NRGBA{R: 70, G: 130, B: 230, A: 180} // 蓝色选中 118 | case theme.ColorNameScrollBar: 119 | return color.NRGBA{R: 60, G: 65, B: 75, A: 200} // 更深的滚动条 120 | case theme.ColorNameShadow: 121 | return color.NRGBA{R: 0, G: 0, B: 0, A: 50} // 深色阴影 122 | 123 | // 状态色(更鲜艳) 124 | case theme.ColorNameError: 125 | return color.NRGBA{R: 240, G: 80, B: 80, A: 255} 126 | case theme.ColorNameWarning: 127 | return color.NRGBA{R: 255, G: 170, B: 60, A: 255} 128 | case theme.ColorNameSuccess: 129 | return color.NRGBA{R: 70, G: 190, B: 130, A: 255} 130 | case theme.ColorNameFocus: 131 | return color.NRGBA{R: 80, G: 140, B: 240, A: 100} 132 | 133 | default: 134 | return t.baseTheme.Color(name, theme.VariantDark) 135 | } 136 | } 137 | 138 | // Icon 主题图标 139 | func (t *customTheme) Icon(name fyne.ThemeIconName) fyne.Resource { 140 | return t.baseTheme.Icon(name) 141 | } 142 | 143 | // Font 主题字体 144 | func (t *customTheme) Font(style fyne.TextStyle) fyne.Resource { 145 | return t.baseTheme.Font(style) 146 | } 147 | 148 | // Size 主题尺寸设置 149 | func (t *customTheme) Size(name fyne.ThemeSizeName) float32 { 150 | switch name { 151 | case theme.SizeNamePadding: 152 | return 10 153 | case theme.SizeNameInlineIcon: 154 | return 20 155 | case theme.SizeNameScrollBar: 156 | return 10 157 | case theme.SizeNameScrollBarSmall: 158 | return 4 159 | case theme.SizeNameSeparatorThickness: 160 | return 1 161 | case theme.SizeNameText: 162 | return 14 163 | case theme.SizeNameInputBorder: 164 | return 1.5 165 | case theme.SizeNameInputRadius: 166 | return 5 167 | default: 168 | return t.baseTheme.Size(name) 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /internal/dto/subtitle_task.go: -------------------------------------------------------------------------------- 1 | package dto 2 | 3 | type StartVideoSubtitleTaskReq struct { 4 | AppId uint32 `json:"app_id"` 5 | Url string `json:"url"` 6 | OriginLanguage string `json:"origin_lang"` 7 | TargetLang string `json:"target_lang"` 8 | Bilingual uint8 `json:"bilingual"` 9 | TranslationSubtitlePos uint8 `json:"translation_subtitle_pos"` 10 | ModalFilter uint8 `json:"modal_filter"` 11 | Tts uint8 `json:"tts"` 12 | TtsVoiceCode uint8 `json:"tts_voice_code"` 13 | TtsVoiceCloneSrcFileUrl string `json:"tts_voice_clone_src_file_url"` 14 | Replace []string `json:"replace"` 15 | Language string `json:"language"` 16 | EmbedSubtitleVideoType string `json:"embed_subtitle_video_type"` 17 | VerticalMajorTitle string `json:"vertical_major_title"` 18 | VerticalMinorTitle string `json:"vertical_minor_title"` 19 | OriginLanguageWordOneLine int `json:"origin_language_word_one_line"` 20 | } 21 | 22 | type StartVideoSubtitleTaskResData struct { 23 | TaskId string `json:"task_id"` 24 | } 25 | 26 | type StartVideoSubtitleTaskRes struct { 27 | Error int32 `json:"error"` 28 | Msg string `json:"msg"` 29 | Data *StartVideoSubtitleTaskResData `json:"data"` 30 | } 31 | 32 | type GetVideoSubtitleTaskReq struct { 33 | TaskId string `form:"taskId"` 34 | } 35 | 36 | type VideoInfo struct { 37 | Title string `json:"title"` 38 | Description string `json:"description"` 39 | TranslatedTitle string `json:"translated_title"` 40 | TranslatedDescription string `json:"translated_description"` 41 | Language string `json:"language"` 42 | } 43 | 44 | type SubtitleInfo struct { 45 | Name string `json:"name"` 46 | DownloadUrl string `json:"download_url"` 47 | } 48 | 49 | type GetVideoSubtitleTaskResData struct { 50 | TaskId string `json:"task_id"` 51 | ProcessPercent uint8 `json:"process_percent"` 52 | VideoInfo *VideoInfo `json:"video_info"` 53 | SubtitleInfo []*SubtitleInfo `json:"subtitle_info"` 54 | TargetLanguage string `json:"target_language"` 55 | SpeechDownloadUrl string `json:"speech_download_url"` 56 | } 57 | 58 | type GetVideoSubtitleTaskRes struct { 59 | Error int32 `json:"error"` 60 | Msg string `json:"msg"` 61 | Data *GetVideoSubtitleTaskResData `json:"data"` 62 | } 63 | -------------------------------------------------------------------------------- /internal/handler/init.go: -------------------------------------------------------------------------------- 1 | package handler 2 | 3 | import "krillin-ai/internal/service" 4 | 5 | type Handler struct { 6 | Service *service.Service 7 | } 8 | 9 | func NewHandler() *Handler { 10 | return &Handler{ 11 | Service: service.NewService(), 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /internal/handler/middleware.go: -------------------------------------------------------------------------------- 1 | package handler 2 | -------------------------------------------------------------------------------- /internal/handler/subtitle_task.go: -------------------------------------------------------------------------------- 1 | package handler 2 | 3 | import ( 4 | "github.com/gin-gonic/gin" 5 | "krillin-ai/internal/dto" 6 | "krillin-ai/internal/response" 7 | "os" 8 | "path/filepath" 9 | ) 10 | 11 | func (h Handler) StartSubtitleTask(c *gin.Context) { 12 | var req dto.StartVideoSubtitleTaskReq 13 | if err := c.ShouldBindJSON(&req); err != nil { 14 | response.R(c, response.Response{ 15 | Error: -1, 16 | Msg: "参数错误", 17 | Data: nil, 18 | }) 19 | return 20 | } 21 | 22 | svc := h.Service 23 | 24 | data, err := svc.StartSubtitleTask(req) 25 | if err != nil { 26 | response.R(c, response.Response{ 27 | Error: -1, 28 | Msg: err.Error(), 29 | Data: nil, 30 | }) 31 | return 32 | } 33 | response.R(c, response.Response{ 34 | Error: 0, 35 | Msg: "成功", 36 | Data: data, 37 | }) 38 | } 39 | 40 | func (h Handler) GetSubtitleTask(c *gin.Context) { 41 | var req dto.GetVideoSubtitleTaskReq 42 | if err := c.ShouldBindQuery(&req); err != nil { 43 | response.R(c, response.Response{ 44 | Error: -1, 45 | Msg: "参数错误", 46 | Data: nil, 47 | }) 48 | return 49 | } 50 | svc := h.Service 51 | data, err := svc.GetTaskStatus(req) 52 | if err != nil { 53 | response.R(c, response.Response{ 54 | Error: -1, 55 | Msg: err.Error(), 56 | Data: nil, 57 | }) 58 | return 59 | } 60 | response.R(c, response.Response{ 61 | Error: 0, 62 | Msg: "成功", 63 | Data: data, 64 | }) 65 | } 66 | 67 | func (h Handler) UploadFile(c *gin.Context) { 68 | file, err := c.FormFile("file") 69 | if err != nil { 70 | response.R(c, response.Response{ 71 | Error: -1, 72 | Msg: "未能获取文件", 73 | Data: nil, 74 | }) 75 | return 76 | } 77 | 78 | savePath := "./uploads/" + file.Filename 79 | if err = c.SaveUploadedFile(file, savePath); err != nil { 80 | response.R(c, response.Response{ 81 | Error: -1, 82 | Msg: "文件保存失败", 83 | Data: nil, 84 | }) 85 | return 86 | } 87 | 88 | response.R(c, response.Response{ 89 | Error: 0, 90 | Msg: "文件上传成功", 91 | Data: gin.H{"file_path": "local:" + savePath}, 92 | }) 93 | } 94 | 95 | func (h Handler) DownloadFile(c *gin.Context) { 96 | requestedFile := c.Param("filepath") 97 | if requestedFile == "" { 98 | response.R(c, response.Response{ 99 | Error: -1, 100 | Msg: "文件路径为空", 101 | Data: nil, 102 | }) 103 | return 104 | } 105 | 106 | localFilePath := filepath.Join(".", requestedFile) 107 | if _, err := os.Stat(localFilePath); os.IsNotExist(err) { 108 | response.R(c, response.Response{ 109 | Error: -1, 110 | Msg: "文件不存在", 111 | Data: nil, 112 | }) 113 | return 114 | } 115 | c.FileAttachment(localFilePath, filepath.Base(localFilePath)) 116 | } 117 | -------------------------------------------------------------------------------- /internal/response/response.go: -------------------------------------------------------------------------------- 1 | package response 2 | 3 | import "github.com/gin-gonic/gin" 4 | 5 | type Response struct { 6 | Error int32 `json:"error"` 7 | Msg string `json:"msg"` 8 | Data any `json:"data"` 9 | } 10 | 11 | func R(c *gin.Context, data any) { 12 | c.JSON(200, data) 13 | } 14 | -------------------------------------------------------------------------------- /internal/router/router.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "krillin-ai/internal/handler" 5 | "krillin-ai/static" 6 | "net/http" 7 | 8 | "github.com/gin-gonic/gin" 9 | ) 10 | 11 | func SetupRouter(r *gin.Engine) { 12 | api := r.Group("/api") 13 | 14 | hdl := handler.NewHandler() 15 | { 16 | api.POST("/capability/subtitleTask", hdl.StartSubtitleTask) 17 | api.GET("/capability/subtitleTask", hdl.GetSubtitleTask) 18 | api.POST("/file", hdl.UploadFile) 19 | api.GET("/file/*filepath", hdl.DownloadFile) 20 | } 21 | 22 | r.GET("/", func(c *gin.Context) { 23 | c.Redirect(http.StatusMovedPermanently, "/static") 24 | }) 25 | r.StaticFS("/static", http.FS(static.EmbeddedFiles)) 26 | } 27 | -------------------------------------------------------------------------------- /internal/server/server.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "fmt" 5 | "krillin-ai/config" 6 | "krillin-ai/internal/router" 7 | "krillin-ai/log" 8 | 9 | "github.com/gin-gonic/gin" 10 | "go.uber.org/zap" 11 | ) 12 | 13 | func StartBackend() error { 14 | gin.SetMode(gin.ReleaseMode) 15 | engine := gin.Default() 16 | router.SetupRouter(engine) 17 | log.GetLogger().Info("服务启动", zap.String("host", config.Conf.Server.Host), zap.Int("port", config.Conf.Server.Port)) 18 | return engine.Run(fmt.Sprintf("%s:%d", config.Conf.Server.Host, config.Conf.Server.Port)) 19 | } 20 | -------------------------------------------------------------------------------- /internal/service/audio2subtitle_test.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | ) 7 | 8 | func Test_isValidSplitContent(t *testing.T) { 9 | // 固定的测试文件路径 10 | splitContentFile := "g:\\bin\\AI\\tasks\\gdQRrtQP\\srt_no_ts_1.srt" 11 | originalTextFile := "g:\\bin\\AI\\tasks\\gdQRrtQP\\output\\origin_1.txt" 12 | 13 | // 读取分割内容文件 14 | splitContent, err := os.ReadFile(splitContentFile) 15 | if err != nil { 16 | t.Fatalf("读取分割内容文件失败: %v", err) 17 | } 18 | 19 | // 读取原始文本文件 20 | originalText, err := os.ReadFile(originalTextFile) 21 | if err != nil { 22 | t.Fatalf("读取原始文本文件失败: %v", err) 23 | } 24 | 25 | // 执行测试 26 | if got := isValidSplitContent(string(splitContent), string(originalText)); !got { 27 | t.Errorf("isValidSplitContent() = %v, want true", got) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /internal/service/get_video_info.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "go.uber.org/zap" 7 | "krillin-ai/config" 8 | "krillin-ai/internal/storage" 9 | "krillin-ai/internal/types" 10 | "krillin-ai/log" 11 | "os/exec" 12 | "strings" 13 | ) 14 | 15 | func (s Service) getVideoInfo(ctx context.Context, stepParam *types.SubtitleTaskStepParam) error { 16 | link := stepParam.Link 17 | if strings.Contains(link, "youtube.com") || strings.Contains(link, "bilibili.com") { 18 | var ( 19 | err error 20 | title, description string 21 | ) 22 | // 获取标题 23 | titleCmdArgs := []string{"--skip-download", "--encoding", "utf-8", "--get-title", stepParam.Link} 24 | descriptionCmdArgs := []string{"--skip-download", "--encoding", "utf-8", "--get-description", stepParam.Link} 25 | titleCmdArgs = append(titleCmdArgs, "--cookies", "./cookies.txt") 26 | descriptionCmdArgs = append(descriptionCmdArgs, "--cookies", "./cookies.txt") 27 | if config.Conf.App.Proxy != "" { 28 | titleCmdArgs = append(titleCmdArgs, "--proxy", config.Conf.App.Proxy) 29 | descriptionCmdArgs = append(descriptionCmdArgs, "--proxy", config.Conf.App.Proxy) 30 | } 31 | if storage.FfmpegPath != "ffmpeg" { 32 | titleCmdArgs = append(titleCmdArgs, "--ffmpeg-location", storage.FfmpegPath) 33 | descriptionCmdArgs = append(descriptionCmdArgs, "--ffmpeg-location", storage.FfmpegPath) 34 | } 35 | cmd := exec.Command(storage.YtdlpPath, titleCmdArgs...) 36 | var output []byte 37 | output, err = cmd.CombinedOutput() 38 | if err != nil { 39 | log.GetLogger().Error("getVideoInfo yt-dlp error", zap.Any("stepParam", stepParam), zap.String("output", string(output)), zap.Error(err)) 40 | output = []byte{} 41 | // 不需要整个流程退出 42 | } 43 | title = string(output) 44 | cmd = exec.Command(storage.YtdlpPath, descriptionCmdArgs...) 45 | output, err = cmd.CombinedOutput() 46 | if err != nil { 47 | log.GetLogger().Error("getVideoInfo yt-dlp error", zap.Any("stepParam", stepParam), zap.String("output", string(output)), zap.Error(err)) 48 | output = []byte{} 49 | } 50 | description = string(output) 51 | log.GetLogger().Debug("getVideoInfo title and description", zap.String("title", title), zap.String("description", description)) 52 | // 翻译 53 | var result string 54 | result, err = s.ChatCompleter.ChatCompletion(fmt.Sprintf(types.TranslateVideoTitleAndDescriptionPrompt, types.GetStandardLanguageName(stepParam.TargetLanguage), title+"####"+description)) 55 | if err != nil { 56 | log.GetLogger().Error("getVideoInfo openai chat completion error", zap.Any("stepParam", stepParam), zap.Error(err)) 57 | } 58 | log.GetLogger().Debug("getVideoInfo translate video info result", zap.String("result", result)) 59 | 60 | taskPtr := stepParam.TaskPtr 61 | 62 | taskPtr.Title = title 63 | taskPtr.Description = description 64 | taskPtr.OriginLanguage = string(stepParam.OriginLanguage) 65 | taskPtr.TargetLanguage = string(stepParam.TargetLanguage) 66 | taskPtr.ProcessPct = 10 67 | splitResult := strings.Split(result, "####") 68 | if len(splitResult) == 1 { 69 | taskPtr.TranslatedTitle = splitResult[0] 70 | } else if len(splitResult) == 2 { 71 | taskPtr.TranslatedTitle = splitResult[0] 72 | taskPtr.TranslatedDescription = splitResult[1] 73 | } else { 74 | log.GetLogger().Error("getVideoInfo translate video info error split result length != 1 and 2", zap.Any("stepParam", stepParam), zap.Any("translate result", result), zap.Error(err)) 75 | } 76 | } 77 | return nil 78 | } 79 | -------------------------------------------------------------------------------- /internal/service/init.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import ( 4 | "krillin-ai/config" 5 | "krillin-ai/internal/types" 6 | "krillin-ai/log" 7 | "krillin-ai/pkg/aliyun" 8 | "krillin-ai/pkg/fasterwhisper" 9 | "krillin-ai/pkg/whispercpp" 10 | "krillin-ai/pkg/openai" 11 | "krillin-ai/pkg/whisper" 12 | "krillin-ai/pkg/whisperkit" 13 | 14 | "go.uber.org/zap" 15 | ) 16 | 17 | type Service struct { 18 | Transcriber types.Transcriber 19 | ChatCompleter types.ChatCompleter 20 | TtsClient *aliyun.TtsClient 21 | OssClient *aliyun.OssClient 22 | VoiceCloneClient *aliyun.VoiceCloneClient 23 | } 24 | 25 | func NewService() *Service { 26 | var transcriber types.Transcriber 27 | var chatCompleter types.ChatCompleter 28 | 29 | switch config.Conf.App.TranscribeProvider { 30 | case "openai": 31 | transcriber = whisper.NewClient(config.Conf.Openai.Whisper.BaseUrl, config.Conf.Openai.Whisper.ApiKey, config.Conf.App.Proxy) 32 | case "aliyun": 33 | transcriber = aliyun.NewAsrClient(config.Conf.Aliyun.Bailian.ApiKey) 34 | case "fasterwhisper": 35 | transcriber = fasterwhisper.NewFastwhisperProcessor(config.Conf.LocalModel.Fasterwhisper) 36 | case "whispercpp": 37 | transcriber = whispercpp.NewWhispercppProcessor(config.Conf.LocalModel.Whispercpp) 38 | case "whisperkit": 39 | transcriber = whisperkit.NewWhisperKitProcessor(config.Conf.LocalModel.Whisperkit) 40 | } 41 | log.GetLogger().Info("当前选择的转录源: ", zap.String("transcriber", config.Conf.App.TranscribeProvider)) 42 | 43 | switch config.Conf.App.LlmProvider { 44 | case "openai": 45 | chatCompleter = openai.NewClient(config.Conf.Openai.BaseUrl, config.Conf.Openai.ApiKey, config.Conf.App.Proxy) 46 | case "aliyun": 47 | chatCompleter = aliyun.NewChatClient(config.Conf.Aliyun.Bailian.ApiKey) 48 | } 49 | log.GetLogger().Info("当前选择的LLM源: ", zap.String("llm", config.Conf.App.LlmProvider)) 50 | 51 | return &Service{ 52 | Transcriber: transcriber, 53 | ChatCompleter: chatCompleter, 54 | TtsClient: aliyun.NewTtsClient(config.Conf.Aliyun.Speech.AccessKeyId, config.Conf.Aliyun.Speech.AccessKeySecret, config.Conf.Aliyun.Speech.AppKey), 55 | OssClient: aliyun.NewOssClient(config.Conf.Aliyun.Oss.AccessKeyId, config.Conf.Aliyun.Oss.AccessKeySecret, config.Conf.Aliyun.Oss.Bucket), 56 | VoiceCloneClient: aliyun.NewVoiceCloneClient(config.Conf.Aliyun.Speech.AccessKeyId, config.Conf.Aliyun.Speech.AccessKeySecret, config.Conf.Aliyun.Speech.AppKey), 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /internal/service/link2file.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "go.uber.org/zap" 8 | "krillin-ai/config" 9 | "krillin-ai/internal/storage" 10 | "krillin-ai/internal/types" 11 | "krillin-ai/log" 12 | "krillin-ai/pkg/util" 13 | "os/exec" 14 | "strings" 15 | ) 16 | 17 | func (s Service) linkToFile(ctx context.Context, stepParam *types.SubtitleTaskStepParam) error { 18 | var ( 19 | err error 20 | output []byte 21 | ) 22 | link := stepParam.Link 23 | audioPath := fmt.Sprintf("%s/%s", stepParam.TaskBasePath, types.SubtitleTaskAudioFileName) 24 | videoPath := fmt.Sprintf("%s/%s", stepParam.TaskBasePath, types.SubtitleTaskVideoFileName) 25 | stepParam.TaskPtr.ProcessPct = 3 26 | if strings.Contains(link, "local:") { 27 | // 本地文件 28 | videoPath = strings.ReplaceAll(link, "local:", "") 29 | stepParam.InputVideoPath = videoPath 30 | cmd := exec.Command(storage.FfmpegPath, "-i", videoPath, "-vn", "-ar", "44100", "-ac", "2", "-ab", "192k", "-f", "mp3", audioPath) 31 | output, err = cmd.CombinedOutput() 32 | if err != nil { 33 | log.GetLogger().Error("generateAudioSubtitles.linkToFile ffmpeg error", zap.Any("step param", stepParam), zap.String("output", string(output)), zap.Error(err)) 34 | return fmt.Errorf("generateAudioSubtitles.linkToFile ffmpeg error: %w", err) 35 | } 36 | } else if strings.Contains(link, "youtube.com") { 37 | var videoId string 38 | videoId, err = util.GetYouTubeID(link) 39 | if err != nil { 40 | log.GetLogger().Error("linkToFile.GetYouTubeID error", zap.Any("step param", stepParam), zap.Error(err)) 41 | return fmt.Errorf("linkToFile.GetYouTubeID error: %w", err) 42 | } 43 | stepParam.Link = "https://www.youtube.com/watch?v=" + videoId 44 | cmdArgs := []string{"-f", "bestaudio", "--extract-audio", "--audio-format", "mp3", "--audio-quality", "192K", "-o", audioPath, stepParam.Link} 45 | if config.Conf.App.Proxy != "" { 46 | cmdArgs = append(cmdArgs, "--proxy", config.Conf.App.Proxy) 47 | } 48 | cmdArgs = append(cmdArgs, "--cookies", "./cookies.txt") 49 | if storage.FfmpegPath != "ffmpeg" { 50 | cmdArgs = append(cmdArgs, "--ffmpeg-location", storage.FfmpegPath) 51 | } 52 | cmd := exec.Command(storage.YtdlpPath, cmdArgs...) 53 | output, err = cmd.CombinedOutput() 54 | if err != nil { 55 | log.GetLogger().Error("linkToFile download audio yt-dlp error", zap.Any("step param", stepParam), zap.String("output", string(output)), zap.Error(err)) 56 | return fmt.Errorf("linkToFile download audio yt-dlp error: %w", err) 57 | } 58 | } else if strings.Contains(link, "bilibili.com") { 59 | videoId := util.GetBilibiliVideoId(link) 60 | if videoId == "" { 61 | return errors.New("linkToFile error: invalid link") 62 | } 63 | stepParam.Link = "https://www.bilibili.com/video/" + videoId 64 | cmdArgs := []string{"-f", "bestaudio[ext=m4a]", "-x", "--audio-format", "mp3", "-o", audioPath, stepParam.Link} 65 | if config.Conf.App.Proxy != "" { 66 | cmdArgs = append(cmdArgs, "--proxy", config.Conf.App.Proxy) 67 | } 68 | if storage.FfmpegPath != "ffmpeg" { 69 | cmdArgs = append(cmdArgs, "--ffmpeg-location", storage.FfmpegPath) 70 | } 71 | cmd := exec.Command(storage.YtdlpPath, cmdArgs...) 72 | output, err = cmd.CombinedOutput() 73 | if err != nil { 74 | log.GetLogger().Error("linkToFile download audio yt-dlp error", zap.Any("step param", stepParam), zap.String("output", string(output)), zap.Error(err)) 75 | return fmt.Errorf("linkToFile download audio yt-dlp error: %w", err) 76 | } 77 | } else { 78 | log.GetLogger().Info("linkToFile.unsupported link type", zap.Any("step param", stepParam)) 79 | return errors.New("linkToFile error: unsupported link, only support youtube, bilibili and local file") 80 | } 81 | stepParam.TaskPtr.ProcessPct = 6 82 | stepParam.AudioFilePath = audioPath 83 | 84 | if !strings.HasPrefix(link, "local:") && stepParam.EmbedSubtitleVideoType != "none" { 85 | // 需要下载原视频 86 | cmdArgs := []string{"-f", "bestvideo[height<=1080][ext=mp4]+bestaudio[ext=m4a]/bestvideo[height<=720][ext=mp4]+bestaudio[ext=m4a]/bestvideo[height<=480][ext=mp4]+bestaudio[ext=m4a]", "-o", videoPath, stepParam.Link} 87 | if config.Conf.App.Proxy != "" { 88 | cmdArgs = append(cmdArgs, "--proxy", config.Conf.App.Proxy) 89 | } 90 | if storage.FfmpegPath != "" { 91 | cmdArgs = append(cmdArgs, "--ffmpeg-location", storage.FfmpegPath) 92 | } 93 | cmd := exec.Command(storage.YtdlpPath, cmdArgs...) 94 | output, err = cmd.CombinedOutput() 95 | if err != nil { 96 | log.GetLogger().Error("linkToFile download video yt-dlp error", zap.Any("step param", stepParam), zap.String("output", string(output)), zap.Error(err)) 97 | return fmt.Errorf("linkToFile download video yt-dlp error: %w", err) 98 | } 99 | stepParam.InputVideoPath = videoPath 100 | } 101 | 102 | // 更新字幕任务信息 103 | stepParam.TaskPtr.ProcessPct = 10 104 | return nil 105 | } 106 | -------------------------------------------------------------------------------- /internal/service/subtitle_service.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "github.com/samber/lo" 8 | "go.uber.org/zap" 9 | "krillin-ai/internal/dto" 10 | "krillin-ai/internal/storage" 11 | "krillin-ai/internal/types" 12 | "krillin-ai/log" 13 | "krillin-ai/pkg/util" 14 | "os" 15 | "path/filepath" 16 | "runtime" 17 | "strings" 18 | ) 19 | 20 | func (s Service) StartSubtitleTask(req dto.StartVideoSubtitleTaskReq) (*dto.StartVideoSubtitleTaskResData, error) { 21 | // 校验链接 22 | if strings.Contains(req.Url, "youtube.com") { 23 | videoId, _ := util.GetYouTubeID(req.Url) 24 | if videoId == "" { 25 | return nil, fmt.Errorf("链接不合法") 26 | } 27 | } 28 | if strings.Contains(req.Url, "bilibili.com") { 29 | videoId := util.GetBilibiliVideoId(req.Url) 30 | if videoId == "" { 31 | return nil, fmt.Errorf("链接不合法") 32 | } 33 | } 34 | // 生成任务id 35 | taskId := util.GenerateRandStringWithUpperLowerNum(8) 36 | // 构造任务所需参数 37 | var resultType types.SubtitleResultType 38 | // 根据入参选项确定要返回的字幕类型 39 | if req.TargetLang == "none" { 40 | resultType = types.SubtitleResultTypeOriginOnly 41 | } else { 42 | if req.Bilingual == types.SubtitleTaskBilingualYes { 43 | if req.TranslationSubtitlePos == types.SubtitleTaskTranslationSubtitlePosTop { 44 | resultType = types.SubtitleResultTypeBilingualTranslationOnTop 45 | } else { 46 | resultType = types.SubtitleResultTypeBilingualTranslationOnBottom 47 | } 48 | } else { 49 | resultType = types.SubtitleResultTypeTargetOnly 50 | } 51 | } 52 | // 文字替换map 53 | replaceWordsMap := make(map[string]string) 54 | if len(req.Replace) > 0 { 55 | for _, replace := range req.Replace { 56 | beforeAfter := strings.Split(replace, "|") 57 | if len(beforeAfter) == 2 { 58 | replaceWordsMap[beforeAfter[0]] = beforeAfter[1] 59 | } else { 60 | log.GetLogger().Info("generateAudioSubtitles replace param length err", zap.Any("replace", replace), zap.Any("taskId", taskId)) 61 | } 62 | } 63 | } 64 | var err error 65 | ctx := context.Background() 66 | // 创建字幕任务文件夹 67 | taskBasePath := filepath.Join("./tasks", taskId) 68 | if _, err = os.Stat(taskBasePath); os.IsNotExist(err) { 69 | // 不存在则创建 70 | err = os.MkdirAll(filepath.Join(taskBasePath, "output"), os.ModePerm) 71 | if err != nil { 72 | log.GetLogger().Error("StartVideoSubtitleTask MkdirAll err", zap.Any("req", req), zap.Error(err)) 73 | } 74 | } 75 | 76 | // 创建任务 77 | taskPtr := &types.SubtitleTask{ 78 | TaskId: taskId, 79 | VideoSrc: req.Url, 80 | Status: types.SubtitleTaskStatusProcessing, 81 | } 82 | storage.SubtitleTasks.Store(taskId, taskPtr) 83 | 84 | var ttsVoiceCode string 85 | if req.TtsVoiceCode == types.SubtitleTaskTtsVoiceCodeLongyu { 86 | ttsVoiceCode = "longyu" 87 | } else { 88 | ttsVoiceCode = "longchen" 89 | } 90 | 91 | // 处理声音克隆源 92 | var voiceCloneAudioUrl string 93 | if req.TtsVoiceCloneSrcFileUrl != "" { 94 | localFileUrl := strings.TrimPrefix(req.TtsVoiceCloneSrcFileUrl, "local:") 95 | fileKey := util.GenerateRandStringWithUpperLowerNum(5) + filepath.Ext(localFileUrl) // 防止url encode的问题,这里统一处理 96 | err = s.OssClient.UploadFile(context.Background(), fileKey, localFileUrl, s.OssClient.Bucket) 97 | if err != nil { 98 | log.GetLogger().Error("StartVideoSubtitleTask UploadFile err", zap.Any("req", req), zap.Error(err)) 99 | return nil, errors.New("上传声音克隆源失败") 100 | } 101 | voiceCloneAudioUrl = fmt.Sprintf("https://%s.oss-cn-shanghai.aliyuncs.com/%s", s.OssClient.Bucket, fileKey) 102 | log.GetLogger().Info("StartVideoSubtitleTask 上传声音克隆源成功", zap.Any("oss url", voiceCloneAudioUrl)) 103 | } 104 | 105 | stepParam := types.SubtitleTaskStepParam{ 106 | TaskId: taskId, 107 | TaskPtr: taskPtr, 108 | TaskBasePath: taskBasePath, 109 | Link: req.Url, 110 | SubtitleResultType: resultType, 111 | EnableModalFilter: req.ModalFilter == types.SubtitleTaskModalFilterYes, 112 | EnableTts: req.Tts == types.SubtitleTaskTtsYes, 113 | TtsVoiceCode: ttsVoiceCode, 114 | VoiceCloneAudioUrl: voiceCloneAudioUrl, 115 | ReplaceWordsMap: replaceWordsMap, 116 | OriginLanguage: types.StandardLanguageCode(req.OriginLanguage), 117 | TargetLanguage: types.StandardLanguageCode(req.TargetLang), 118 | UserUILanguage: types.StandardLanguageCode(req.Language), 119 | EmbedSubtitleVideoType: req.EmbedSubtitleVideoType, 120 | VerticalVideoMajorTitle: req.VerticalMajorTitle, 121 | VerticalVideoMinorTitle: req.VerticalMinorTitle, 122 | MaxWordOneLine: 12, // 默认值 123 | } 124 | if req.OriginLanguageWordOneLine != 0 { 125 | stepParam.MaxWordOneLine = req.OriginLanguageWordOneLine 126 | } 127 | 128 | log.GetLogger().Info("current task info", zap.String("taskId", taskId), zap.Any("param", stepParam)) 129 | 130 | go func() { 131 | defer func() { 132 | if r := recover(); r != nil { 133 | const size = 64 << 10 134 | buf := make([]byte, size) 135 | buf = buf[:runtime.Stack(buf, false)] 136 | log.GetLogger().Error("autoVideoSubtitle panic", zap.Any("panic:", r), zap.Any("stack:", buf)) 137 | stepParam.TaskPtr.Status = types.SubtitleTaskStatusFailed 138 | } 139 | }() 140 | // 新版流程:链接->本地音频文件->视频信息获取(若有)->本地字幕文件->语言合成->视频合成->字幕文件链接生成 141 | log.GetLogger().Info("video subtitle start task", zap.String("taskId", taskId)) 142 | err = s.linkToFile(ctx, &stepParam) 143 | if err != nil { 144 | log.GetLogger().Error("StartVideoSubtitleTask linkToFile err", zap.Any("req", req), zap.Error(err)) 145 | stepParam.TaskPtr.Status = types.SubtitleTaskStatusFailed 146 | stepParam.TaskPtr.FailReason = err.Error() 147 | return 148 | } 149 | // 暂时不加视频信息 150 | //err = s.getVideoInfo(ctx, &stepParam) 151 | //if err != nil { 152 | // log.GetLogger().Error("StartVideoSubtitleTask getVideoInfo err", zap.Any("req", req), zap.Error(err)) 153 | // stepParam.TaskPtr.Status = types.SubtitleTaskStatusFailed 154 | // stepParam.TaskPtr.FailReason = "get video info error" 155 | // return 156 | //} 157 | err = s.audioToSubtitle(ctx, &stepParam) 158 | if err != nil { 159 | log.GetLogger().Error("StartVideoSubtitleTask audioToSubtitle err", zap.Any("req", req), zap.Error(err)) 160 | stepParam.TaskPtr.Status = types.SubtitleTaskStatusFailed 161 | stepParam.TaskPtr.FailReason = err.Error() 162 | return 163 | } 164 | err = s.srtFileToSpeech(ctx, &stepParam) 165 | if err != nil { 166 | log.GetLogger().Error("StartVideoSubtitleTask srtFileToSpeech err", zap.Any("req", req), zap.Error(err)) 167 | stepParam.TaskPtr.Status = types.SubtitleTaskStatusFailed 168 | stepParam.TaskPtr.FailReason = err.Error() 169 | return 170 | } 171 | err = s.embedSubtitles(ctx, &stepParam) 172 | if err != nil { 173 | log.GetLogger().Error("StartVideoSubtitleTask embedSubtitles err", zap.Any("req", req), zap.Error(err)) 174 | stepParam.TaskPtr.Status = types.SubtitleTaskStatusFailed 175 | stepParam.TaskPtr.FailReason = err.Error() 176 | return 177 | } 178 | err = s.uploadSubtitles(ctx, &stepParam) 179 | if err != nil { 180 | log.GetLogger().Error("StartVideoSubtitleTask uploadSubtitles err", zap.Any("req", req), zap.Error(err)) 181 | stepParam.TaskPtr.Status = types.SubtitleTaskStatusFailed 182 | stepParam.TaskPtr.FailReason = err.Error() 183 | return 184 | } 185 | 186 | log.GetLogger().Info("video subtitle task end", zap.String("taskId", taskId)) 187 | }() 188 | 189 | return &dto.StartVideoSubtitleTaskResData{ 190 | TaskId: taskId, 191 | }, nil 192 | } 193 | 194 | func (s Service) GetTaskStatus(req dto.GetVideoSubtitleTaskReq) (*dto.GetVideoSubtitleTaskResData, error) { 195 | task, ok := storage.SubtitleTasks.Load(req.TaskId) 196 | if !ok || task == nil { 197 | return nil, errors.New("任务不存在") 198 | } 199 | taskPtr := task.(*types.SubtitleTask) 200 | if taskPtr.Status == types.SubtitleTaskStatusFailed { 201 | return nil, fmt.Errorf("任务失败,原因:%s", taskPtr.FailReason) 202 | } 203 | return &dto.GetVideoSubtitleTaskResData{ 204 | TaskId: taskPtr.TaskId, 205 | ProcessPercent: taskPtr.ProcessPct, 206 | VideoInfo: &dto.VideoInfo{ 207 | Title: taskPtr.Title, 208 | Description: taskPtr.Description, 209 | TranslatedTitle: taskPtr.TranslatedTitle, 210 | TranslatedDescription: taskPtr.TranslatedDescription, 211 | }, 212 | SubtitleInfo: lo.Map(taskPtr.SubtitleInfos, func(item types.SubtitleInfo, _ int) *dto.SubtitleInfo { 213 | return &dto.SubtitleInfo{ 214 | Name: item.Name, 215 | DownloadUrl: item.DownloadUrl, 216 | } 217 | }), 218 | TargetLanguage: taskPtr.TargetLanguage, 219 | SpeechDownloadUrl: taskPtr.SpeechDownloadUrl, 220 | }, nil 221 | } 222 | -------------------------------------------------------------------------------- /internal/service/upload_subtitle.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "go.uber.org/zap" 7 | "krillin-ai/internal/types" 8 | "krillin-ai/log" 9 | "krillin-ai/pkg/util" 10 | ) 11 | 12 | func (s Service) uploadSubtitles(ctx context.Context, stepParam *types.SubtitleTaskStepParam) error { 13 | subtitleInfos := make([]types.SubtitleInfo, 0) 14 | var err error 15 | for _, info := range stepParam.SubtitleInfos { 16 | resultPath := info.Path 17 | if len(stepParam.ReplaceWordsMap) > 0 { // 需要进行替换 18 | replacedSrcFile := util.AddSuffixToFileName(resultPath, "_replaced") 19 | err = util.ReplaceFileContent(resultPath, replacedSrcFile, stepParam.ReplaceWordsMap) 20 | if err != nil { 21 | log.GetLogger().Error("uploadSubtitles ReplaceFileContent err", zap.Any("stepParam", stepParam), zap.Error(err)) 22 | return fmt.Errorf("uploadSubtitles ReplaceFileContent err: %w", err) 23 | } 24 | resultPath = replacedSrcFile 25 | } 26 | subtitleInfos = append(subtitleInfos, types.SubtitleInfo{ 27 | TaskId: stepParam.TaskId, 28 | Name: info.Name, 29 | DownloadUrl: "/api/file/" + resultPath, 30 | }) 31 | } 32 | // 更新字幕任务信息 33 | taskPtr := stepParam.TaskPtr 34 | taskPtr.SubtitleInfos = subtitleInfos 35 | taskPtr.Status = types.SubtitleTaskStatusSuccess 36 | taskPtr.ProcessPct = 100 37 | // 配音文件 38 | if stepParam.TtsResultFilePath != "" { 39 | taskPtr.SpeechDownloadUrl = "/api/file/" + stepParam.TtsResultFilePath 40 | } 41 | return nil 42 | } 43 | -------------------------------------------------------------------------------- /internal/storage/bin.go: -------------------------------------------------------------------------------- 1 | package storage 2 | 3 | var ( 4 | FfmpegPath string 5 | FfprobePath string 6 | YtdlpPath string 7 | FasterwhisperPath string 8 | WhisperKitPath string 9 | WhispercppPath string 10 | ) 11 | -------------------------------------------------------------------------------- /internal/storage/subtitle_task.go: -------------------------------------------------------------------------------- 1 | package storage 2 | 3 | import ( 4 | "sync" 5 | ) 6 | 7 | var SubtitleTasks = sync.Map{} // task id -> SubtitleTask,用于接口查询数据 8 | -------------------------------------------------------------------------------- /internal/types/embed_subtitle.go: -------------------------------------------------------------------------------- 1 | package types 2 | 3 | const AssHeaderHorizontal = `[Script Info] 4 | Title: Example 5 | Original Script: 6 | ScriptType: v4.00+ 7 | PlayDepth: 0 8 | 9 | [V4+ Styles] 10 | Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding 11 | Style: Major,Arial,18,&H00BFFF,&H000000FF,&H00000000,&H64000000,-1,0,0,0,100,100,0,0,1,2.5,1.5,2,10,10,20,1 12 | Style: Minor,Arial,12,&H00BFFF,&H000000FF,&H00000000,&H64000000,-1,0,0,0,100,100,0,0,1,2.5,1.5,2,10,10,30,1 13 | 14 | 15 | [Events] 16 | Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 17 | ` 18 | const AssHeaderVertical = `[Script Info] 19 | Title: Example 20 | Original Script: 21 | ScriptType: v4.00+ 22 | PlayDepth: 0 23 | 24 | [V4+ Styles] 25 | Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding 26 | Style: Major,Arial,15,&H00BFFF,&H000000FF,&H00000000,&H64000000,-1,0,0,0,100,100,-10,0,1,2.5,1.5,2,10,10,80,1 27 | Style: Minor,Arial,8,&H00BFFF,&H000000FF,&H00000000,&H64000000,-1,0,0,0,100,100,-10,0,1,2.5,1.5,2,10,10,100,1 28 | 29 | 30 | [Events] 31 | Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 32 | ` 33 | -------------------------------------------------------------------------------- /internal/types/fasterwhisper.go: -------------------------------------------------------------------------------- 1 | package types 2 | 3 | type FasterWhisperOutput struct { 4 | Segments []struct { 5 | Id int `json:"id"` 6 | Seek int `json:"seek"` 7 | Start float64 `json:"start"` 8 | End float64 `json:"end"` 9 | Text string `json:"text"` 10 | Tokens []int `json:"tokens"` 11 | Temperature float64 `json:"temperature"` 12 | AvgLogprob float64 `json:"avg_logprob"` 13 | CompressionRatio float64 `json:"compression_ratio"` 14 | NoSpeechProb float64 `json:"no_speech_prob"` 15 | Words []struct { 16 | Start float64 `json:"start"` 17 | End float64 `json:"end"` 18 | Word string `json:"word"` 19 | Probability float64 `json:"probability"` 20 | } `json:"words"` 21 | } `json:"segments"` 22 | Language string `json:"language"` 23 | Text string `json:"text"` 24 | } 25 | -------------------------------------------------------------------------------- /internal/types/interface.go: -------------------------------------------------------------------------------- 1 | package types 2 | 3 | type ChatCompleter interface { 4 | ChatCompletion(query string) (string, error) 5 | } 6 | 7 | type Transcriber interface { 8 | Transcription(audioFile, language, wordDir string) (*TranscriptionData, error) 9 | } 10 | -------------------------------------------------------------------------------- /internal/types/subtitle_task.go: -------------------------------------------------------------------------------- 1 | package types 2 | 3 | // var SplitTextPrompt = `你是一个英语处理专家,擅长翻译成%s和处理英文文本,根据句意和标点对句子进行拆分。 4 | 5 | // - 不要漏掉原英文任何一个单词 6 | // - 翻译一定要流畅,完整表达原文意思 7 | // - 优先根据标点符号进行拆分,遇到逗号、句号、问号,一定要拆分,必须把句子拆短些。 8 | // - 遇到定语从句、并列句等复杂句式,根据连词(如and, but, which, when)进行拆分。 9 | // - 拆分后的单行句子英文不能超过15个单词。 10 | // - 翻译的时候确保每个原始字幕块单独存在且编号和格式正确。 11 | // - 不需要任何额外的话语,直接按下面格式输出结果。 12 | 13 | // 1 14 | // [中文翻译] 15 | // [英文句子] 16 | 17 | // 2 18 | // [中文翻译] 19 | // [英文句子] 20 | 21 | // 内容如下:` 22 | 23 | var SplitTextPrompt = `你是一个语言处理专家,专注于自然语言处理和翻译任务。按照以下步骤和要求,以最大程度实现准确和高质量翻译: 24 | 25 | 1. 将原句翻译为%s,确保译文流畅、自然,达到专业翻译水平。 26 | 2. 严格依据标点符号(逗号、句号、问号等)将内容拆分成单独的句子,并依据以下规则确保拆分粒度合理: 27 | - 每个句子在保证句意完整的情况下尽可能短,长度尽量不得超过15个字。 28 | - 可以根据连词(例如 "and", "but", "which", "when", "so", "所以", "但是", "因此", "考虑到" 等)进一步拆分句子,避免语句太长。 29 | 3. 对每个拆分的句子分别翻译,确保不遗漏或修改任何字词。 30 | 4. 将每对翻译后的句子与原句用独立编号表示,并分别以方括号[]包裹内容。 31 | 5. 输出的翻译与原文应保持对应,严格按照原文顺序呈现,不得有错位,且原文尽可能使用原文。 32 | 6. 不管内容是正式还是非正式,都要翻译。 33 | 34 | 翻译输出应采用如下格式: 35 | **正常翻译的示例(注意每块3部分,每个部分都独占一行,空格分块)**: 36 | 1 37 | [翻译后的句子1] 38 | [原句子1] 39 | 40 | 2 41 | [翻译后的句子2] 42 | [原句子2] 43 | 44 | **无文本需要翻译的输出示例**: 45 | [无文本] 46 | 47 | 确保高效、精确地完成上述翻译任务,输入内容如下: 48 | ` 49 | 50 | // 带有语气词过滤的拆分Prompt 51 | var SplitTextPromptWithModalFilter = `你是一个语言处理专家,专注于自然语言处理和翻译任务。按照以下步骤和要求,以最大程度实现准确和高质量翻译: 52 | 53 | 1. 将原句翻译为%s,确保译文流畅、自然,达到专业翻译水平。 54 | 2. 严格依据标点符号(逗号、句号、问号等)将内容拆分成单独的句子,并依据以下规则确保拆分粒度合理: 55 | - 每个句子在保证句意完整的情况下尽可能短,长度尽量不得超过15个字。 56 | - 可以根据连词(例如 "and", "but", "which", "when", "so", "所以", "但是", "因此", "考虑到" 等)进一步拆分句子,避免语句太长。 57 | 3. 对每个拆分的句子分别翻译,确保不遗漏或修改任何字词。 58 | 4. 将每对翻译后的句子与原句用独立编号表示,并分别以方括号[]包裹内容。 59 | 5. 输出的翻译与原文应保持对应,严格按照原文顺序呈现,不得有错位,且原文尽可能使用原文。 60 | 6. 忽略文本中的语气词,比如"Oh" "Ah" "Wow"等等。 61 | 7. 不管内容是正式还是非正式,都要翻译。 62 | 63 | 翻译输出应采用如下格式: 64 | **正常翻译的示例(注意每块3部分,每个部分都独占一行,空格分块)**: 65 | 1 66 | [翻译后的句子1] 67 | [原句子1] 68 | 69 | 2 70 | [翻译后的句子2] 71 | [原句子2] 72 | 73 | **无文本需要翻译的输出示例**: 74 | [无文本] 75 | 76 | 确保高效、精确地完成上述翻译任务,输入内容如下: 77 | ` 78 | 79 | var TranslateVideoTitleAndDescriptionPrompt = `你是一个专业的翻译专家,请翻译下面给出的标题和描述信息(两者用####来分隔),要求如下: 80 | - 将内容翻译成 %s 81 | - 翻译后的内容仍然用####来分隔标题和描述两部分 82 | 以下全部是源内容,请完整按要求翻译: 83 | %s 84 | ` 85 | 86 | type SmallAudio struct { 87 | AudioFile string 88 | Num int 89 | TranscriptionData *TranscriptionData 90 | SrtNoTsFile string 91 | } 92 | 93 | type SubtitleResultType int 94 | 95 | const ( 96 | SubtitleResultTypeOriginOnly SubtitleResultType = iota + 1 // 仅返回原语言字幕 97 | SubtitleResultTypeTargetOnly // 仅返回翻译后语言字幕 98 | SubtitleResultTypeBilingualTranslationOnTop // 返回双语字幕,翻译后的字幕在上 99 | SubtitleResultTypeBilingualTranslationOnBottom // 返回双语字幕,翻译后的字幕在下 100 | ) 101 | 102 | const ( 103 | SubtitleTaskBilingualYes uint8 = iota + 1 104 | SubtitleTaskBilingualNo 105 | ) 106 | 107 | const ( 108 | SubtitleTaskTranslationSubtitlePosTop uint8 = iota + 1 109 | SubtitleTaskTranslationSubtitlePosBelow 110 | ) 111 | 112 | const ( 113 | SubtitleTaskModalFilterYes uint8 = iota + 1 114 | SubtitleTaskModalFilterNo 115 | ) 116 | 117 | const ( 118 | SubtitleTaskTtsYes uint8 = iota + 1 119 | SubtitleTaskTtsNo 120 | ) 121 | 122 | const ( 123 | SubtitleTaskTtsVoiceCodeLongyu uint8 = iota + 1 124 | SubtitleTaskTtsVoiceCodeLongchen 125 | ) 126 | 127 | const ( 128 | SubtitleTaskStatusProcessing uint8 = iota + 1 129 | SubtitleTaskStatusSuccess 130 | SubtitleTaskStatusFailed 131 | ) 132 | 133 | const ( 134 | SubtitleTaskAudioFileName = "origin_audio.mp3" 135 | SubtitleTaskVideoFileName = "origin_video.mp4" 136 | SubtitleTaskSplitAudioFileNamePrefix = "split_audio" 137 | SubtitleTaskSplitAudioFileNamePattern = SubtitleTaskSplitAudioFileNamePrefix + "_%03d.mp3" 138 | SubtitleTaskSplitAudioTxtFileNamePattern = "split_audio_txt_%d.txt" 139 | SubtitleTaskSplitAudioWordsFileNamePattern = "split_audio_words_%d.txt" 140 | SubtitleTaskSplitSrtNoTimestampFileNamePattern = "srt_no_ts_%d.srt" 141 | SubtitleTaskSrtNoTimestampFileName = "srt_no_ts.srt" 142 | SubtitleTaskSplitBilingualSrtFileNamePattern = "split_bilingual_srt_%d.srt" 143 | SubtitleTaskSplitShortOriginMixedSrtFileNamePattern = "split_short_origin_mixed_srt_%d.srt" //长中文+短英文 144 | SubtitleTaskSplitShortOriginSrtFileNamePattern = "split_short_origin_srt_%d.srt" //短英文 145 | SubtitleTaskBilingualSrtFileName = "bilingual_srt.srt" 146 | SubtitleTaskShortOriginMixedSrtFileName = "short_origin_mixed_srt.srt" //长中文+短英文 147 | SubtitleTaskShortOriginSrtFileName = "short_origin_srt.srt" //短英文 148 | SubtitleTaskOriginLanguageSrtFileName = "origin_language_srt.srt" 149 | SubtitleTaskOriginLanguageTextFileName = "origin_language.txt" 150 | SubtitleTaskTargetLanguageSrtFileName = "target_language_srt.srt" 151 | SubtitleTaskTargetLanguageTextFileName = "target_language.txt" 152 | SubtitleTaskStepParamGobPersistenceFileName = "step_param.gob" 153 | SubtitleTaskTransferredVerticalVideoFileName = "transferred_vertical_video.mp4" 154 | SubtitleTaskHorizontalEmbedVideoFileName = "horizontal_embed.mp4" 155 | SubtitleTaskVerticalEmbedVideoFileName = "vertical_embed.mp4" 156 | ) 157 | 158 | const ( 159 | TtsAudioDurationDetailsFileName = "audio_duration_details.txt" 160 | TtsResultAudioFileName = "tts_final_audio.wav" 161 | ) 162 | 163 | const ( 164 | AsrMono16kAudioFileName = "mono_16k_audio.mp3" 165 | ) 166 | 167 | type SubtitleFileInfo struct { 168 | Name string 169 | Path string 170 | LanguageIdentifier string // 在最终下载的文件里标识语言,如zh_cn,en,bilingual 171 | } 172 | 173 | type SubtitleTaskStepParam struct { 174 | TaskId string 175 | TaskPtr *SubtitleTask // 和storage里面对应 176 | TaskBasePath string 177 | Link string 178 | AudioFilePath string 179 | SmallAudios []*SmallAudio 180 | SubtitleResultType SubtitleResultType 181 | EnableModalFilter bool 182 | EnableTts bool 183 | TtsVoiceCode string // 人声语音编码 184 | VoiceCloneAudioUrl string // 音色克隆的源音频oss地址 185 | ReplaceWordsMap map[string]string 186 | OriginLanguage StandardLanguageCode // 视频源语言 187 | TargetLanguage StandardLanguageCode // 用户希望的目标翻译语言 188 | UserUILanguage StandardLanguageCode // 用户的使用语言 189 | BilingualSrtFilePath string 190 | ShortOriginMixedSrtFilePath string 191 | SubtitleInfos []SubtitleFileInfo 192 | TtsSourceFilePath string 193 | TtsResultFilePath string 194 | InputVideoPath string // 源视频路径 195 | EmbedSubtitleVideoType string // 合成字幕嵌入的视频类型 none不嵌入 horizontal横屏 vertical竖屏 196 | VerticalVideoMajorTitle string // 合成竖屏视频的主标题 197 | VerticalVideoMinorTitle string 198 | MaxWordOneLine int // 字幕一行最多显示多少个字 199 | } 200 | 201 | type SrtSentence struct { 202 | Text string 203 | Start float64 204 | End float64 205 | } 206 | 207 | type SrtSentenceWithStrTime struct { 208 | Text string 209 | Start string 210 | End string 211 | } 212 | 213 | type SubtitleInfo struct { 214 | Id uint64 `json:"id" gorm:"column:id"` // 自增id 215 | TaskId string `json:"task_id" gorm:"column:task_id"` // task_id 216 | Uid uint32 `json:"uid" gorm:"column:uid"` // 用户id 217 | Name string `json:"name" gorm:"column:name"` // 字幕名称 218 | DownloadUrl string `json:"download_url" gorm:"column:download_url"` // 字幕地址 219 | CreateTime int64 `json:"create_time" gorm:"column:create_time;autoCreateTime"` // 创建时间 220 | } 221 | 222 | type SubtitleTask struct { 223 | Id uint64 `json:"id" gorm:"column:id"` // 自增id 224 | TaskId string `json:"task_id" gorm:"column:task_id"` // 任务id 225 | Title string `json:"title" gorm:"column:title"` // 标题 226 | Description string `json:"description" gorm:"column:description"` // 描述 227 | TranslatedTitle string `json:"translated_title" gorm:"column:translated_title"` // 翻译后的标题 228 | TranslatedDescription string `json:"translated_description" gorm:"column:translated_description"` // 翻译后的描述 229 | OriginLanguage string `json:"origin_language" gorm:"column:origin_language"` // 视频原语言 230 | TargetLanguage string `json:"target_language" gorm:"column:target_language"` // 翻译任务的目标语言 231 | VideoSrc string `json:"video_src" gorm:"column:video_src"` // 视频地址 232 | Status uint8 `json:"status" gorm:"column:status"` // 1-处理中,2-成功,3-失败 233 | LastSuccessStepNum uint8 `json:"last_success_step_num" gorm:"column:last_success_step_num"` // 最后成功的子任务序号,用于任务恢复 234 | FailReason string `json:"fail_reason" gorm:"column:fail_reason"` // 失败原因 235 | ProcessPct uint8 `json:"process_percent" gorm:"column:process_percent"` // 处理进度 236 | Duration uint32 `json:"duration" gorm:"column:duration"` // 视频时长 237 | SrtNum int `json:"srt_num" gorm:"column:srt_num"` // 字幕数量 238 | SubtitleInfos []SubtitleInfo `gorm:"foreignKey:TaskId;references:TaskId"` 239 | Cover string `json:"cover" gorm:"column:cover"` // 封面 240 | SpeechDownloadUrl string `json:"speech_download_url" gorm:"column:speech_download_url"` // 语音文件下载地址 241 | CreateTime int64 `json:"create_time" gorm:"column:create_time;autoCreateTime"` // 创建时间 242 | UpdateTime int64 `json:"update_time" gorm:"column:update_time;autoUpdateTime"` // 更新时间 243 | } 244 | 245 | type Word struct { 246 | Num int 247 | Text string 248 | Start float64 249 | End float64 250 | } 251 | 252 | type TranscriptionData struct { 253 | Language string 254 | Text string 255 | Words []Word 256 | } 257 | -------------------------------------------------------------------------------- /internal/types/whispercpp.go: -------------------------------------------------------------------------------- 1 | package types 2 | 3 | type WhispercppOutput struct { 4 | SystemInfo string `json:"systeminfo"` 5 | Model struct { 6 | Type string `json:"type"` 7 | Multilingual bool `json:"multilingual"` 8 | Vocab int `json:"vocab"` 9 | Audio struct { 10 | Ctx int `json:"ctx"` 11 | State int `json:"state"` 12 | Head int `json:"head"` 13 | Layer int `json:"layer"` 14 | } `json:"audio"` 15 | Text struct { 16 | Ctx int `json:"ctx"` 17 | State int `json:"state"` 18 | Head int `json:"head"` 19 | Layer int `json:"layer"` 20 | } `json:"text"` 21 | Mels int `json:"mels"` 22 | Ftype int `json:"ftype"` 23 | } `json:"model"` 24 | Params struct { 25 | Model string `json:"model"` 26 | Language string `json:"language"` 27 | Translate bool `json:"translate"` 28 | } `json:"params"` 29 | Result struct { 30 | Language string `json:"language"` 31 | } `json:"result"` 32 | Transcription []struct { 33 | Timestamps struct { 34 | From string `json:"from"` 35 | To string `json:"to"` 36 | } `json:"timestamps"` 37 | Offsets struct { 38 | From int `json:"from"` 39 | To int `json:"to"` 40 | } `json:"offsets"` 41 | Text string `json:"text"` 42 | Tokens []struct { 43 | Text string `json:"text"` 44 | Timestamps struct { 45 | From string `json:"from"` 46 | To string `json:"to"` 47 | } `json:"timestamps"` 48 | Offsets struct { 49 | From int `json:"from"` 50 | To int `json:"to"` 51 | } `json:"offsets"` 52 | ID int `json:"id"` 53 | P float64 `json:"p"` 54 | TDtw int `json:"t_dtw"` 55 | } `json:"tokens"` 56 | } `json:"transcription"` 57 | } 58 | -------------------------------------------------------------------------------- /internal/types/whisperkit.go: -------------------------------------------------------------------------------- 1 | package types 2 | 3 | type WhisperKitOutput struct { 4 | Text string `json:"text"` 5 | Language string `json:"language"` 6 | Segments []struct { 7 | Seek int `json:"seek"` 8 | Tokens []int `json:"tokens"` 9 | CompressionRatio float64 `json:"compressionRatio"` 10 | Temperature float64 `json:"temperature"` 11 | AvgLogprob float64 `json:"avgLogprob"` 12 | NoSpeechProb float64 `json:"noSpeechProb"` 13 | Id int `json:"id"` 14 | TokenLogProbs []map[string]float64 `json:"tokenLogProbs"` 15 | Start float64 `json:"start"` 16 | Words []struct { 17 | Start float64 `json:"start"` 18 | End float64 `json:"end"` 19 | Word string `json:"word"` 20 | Probability float64 `json:"probability"` 21 | Tokens []int `json:"tokens"` 22 | } `json:"words"` 23 | Text string `json:"text"` 24 | End float64 `json:"end"` 25 | } `json:"segments"` 26 | } 27 | -------------------------------------------------------------------------------- /log/zap.go: -------------------------------------------------------------------------------- 1 | package log 2 | 3 | import ( 4 | "go.uber.org/zap" 5 | "go.uber.org/zap/zapcore" 6 | "os" 7 | ) 8 | 9 | var Logger *zap.Logger 10 | 11 | func InitLogger() { 12 | file, err := os.OpenFile("app.log", os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666) 13 | if err != nil { 14 | panic("无法打开日志文件: " + err.Error()) 15 | } 16 | 17 | fileSyncer := zapcore.AddSync(file) 18 | consoleSyncer := zapcore.AddSync(os.Stdout) 19 | 20 | encoderConfig := zap.NewProductionEncoderConfig() 21 | encoderConfig.EncodeTime = zapcore.ISO8601TimeEncoder 22 | 23 | core := zapcore.NewTee( 24 | zapcore.NewCore(zapcore.NewJSONEncoder(encoderConfig), fileSyncer, zap.DebugLevel), // 写入文件(JSON 格式) 25 | zapcore.NewCore(zapcore.NewConsoleEncoder(encoderConfig), consoleSyncer, zap.InfoLevel), // 输出到终端 26 | ) 27 | 28 | Logger = zap.New(core, zap.AddCaller()) 29 | } 30 | 31 | func GetLogger() *zap.Logger { 32 | return Logger 33 | } 34 | -------------------------------------------------------------------------------- /pkg/aliyun/asr.go: -------------------------------------------------------------------------------- 1 | package aliyun 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "github.com/google/uuid" 7 | "github.com/gorilla/websocket" 8 | "go.uber.org/zap" 9 | "io" 10 | "krillin-ai/internal/storage" 11 | "krillin-ai/internal/types" 12 | "krillin-ai/log" 13 | "net/http" 14 | "os" 15 | "os/exec" 16 | "path/filepath" 17 | "strings" 18 | "time" 19 | ) 20 | 21 | type AsrClient struct { 22 | BailianApiKey string 23 | } 24 | 25 | func NewAsrClient(bailianApiKey string) *AsrClient { 26 | return &AsrClient{ 27 | BailianApiKey: bailianApiKey, 28 | } 29 | } 30 | 31 | const ( 32 | wsURL = "wss://dashscope.aliyuncs.com/api-ws/v1/inference/" // WebSocket服务器地址 33 | ) 34 | 35 | var dialer = websocket.DefaultDialer 36 | 37 | func (c AsrClient) Transcription(audioFile, language, workDir string) (*types.TranscriptionData, error) { 38 | // 处理音频 39 | processedAudioFile, err := processAudio(audioFile) 40 | if err != nil { 41 | log.GetLogger().Error("处理音频失败", zap.Error(err), zap.String("audio file", audioFile)) 42 | return nil, err 43 | } 44 | 45 | // 连接WebSocket服务 46 | conn, err := connectWebSocket(c.BailianApiKey) 47 | if err != nil { 48 | log.GetLogger().Error("连接WebSocket失败", zap.Error(err), zap.String("audio file", audioFile)) 49 | return nil, err 50 | } 51 | defer closeConnection(conn) 52 | 53 | // 启动一个goroutine来接收结果 54 | taskStarted := make(chan bool) 55 | taskDone := make(chan bool) 56 | 57 | words := make([]types.Word, 0) 58 | text := "" 59 | startResultReceiver(conn, &words, &text, taskStarted, taskDone) 60 | 61 | // 发送run-task指令 62 | taskID, err := sendRunTaskCmd(conn, language) 63 | if err != nil { 64 | log.GetLogger().Error("发送run-task指令失败", zap.Error(err), zap.String("audio file", audioFile)) 65 | } 66 | 67 | // 等待task-started事件 68 | waitForTaskStarted(taskStarted) 69 | 70 | // 发送待识别音频文件流 71 | if err := sendAudioData(conn, processedAudioFile); err != nil { 72 | log.GetLogger().Error("发送音频数据失败", zap.Error(err)) 73 | } 74 | 75 | // 发送finish-task指令 76 | if err := sendFinishTaskCmd(conn, taskID); err != nil { 77 | log.GetLogger().Error("发送finish-task指令失败", zap.Error(err), zap.String("audio file", audioFile)) 78 | } 79 | 80 | // 等待任务完成或失败 81 | <-taskDone 82 | 83 | if len(words) == 0 { 84 | log.GetLogger().Info("识别结果为空", zap.String("audio file", audioFile)) 85 | } 86 | log.GetLogger().Debug("识别结果", zap.Any("words", words), zap.String("text", text), zap.String("audio file", audioFile)) 87 | 88 | transcriptionData := &types.TranscriptionData{ 89 | Text: text, 90 | Words: words, 91 | } 92 | 93 | return transcriptionData, nil 94 | } 95 | 96 | // 定义结构体来表示JSON数据 97 | type AsrHeader struct { 98 | Action string `json:"action"` 99 | TaskID string `json:"task_id"` 100 | Streaming string `json:"streaming"` 101 | Event string `json:"event"` 102 | ErrorCode string `json:"error_code,omitempty"` 103 | ErrorMessage string `json:"error_message,omitempty"` 104 | Attributes map[string]interface{} `json:"attributes"` 105 | } 106 | 107 | type Output struct { 108 | Sentence struct { 109 | BeginTime int64 `json:"begin_time"` 110 | EndTime *int64 `json:"end_time"` 111 | Text string `json:"text"` 112 | Words []struct { 113 | BeginTime int64 `json:"begin_time"` 114 | EndTime *int64 `json:"end_time"` 115 | Text string `json:"text"` 116 | Punctuation string `json:"punctuation"` 117 | } `json:"words"` 118 | } `json:"sentence"` 119 | Usage interface{} `json:"usage"` 120 | } 121 | 122 | type Payload struct { 123 | TaskGroup string `json:"task_group"` 124 | Task string `json:"task"` 125 | Function string `json:"function"` 126 | Model string `json:"model"` 127 | Parameters Params `json:"parameters"` 128 | Resources []Resource `json:"resources"` 129 | Input Input `json:"input"` 130 | Output Output `json:"output,omitempty"` 131 | } 132 | 133 | type Params struct { 134 | Format string `json:"format"` 135 | SampleRate int `json:"sample_rate"` 136 | VocabularyID string `json:"vocabulary_id"` 137 | DisfluencyRemovalEnabled bool `json:"disfluency_removal_enabled"` 138 | LanguageHints []string `json:"language_hints"` 139 | } 140 | 141 | type Resource struct { 142 | ResourceID string `json:"resource_id"` 143 | ResourceType string `json:"resource_type"` 144 | } 145 | 146 | type Input struct { 147 | } 148 | 149 | type Event struct { 150 | Header AsrHeader `json:"header"` 151 | Payload Payload `json:"payload"` 152 | } 153 | 154 | // 把音频处理成单声道、16k采样率 155 | func processAudio(filePath string) (string, error) { 156 | dest := strings.ReplaceAll(filePath, filepath.Ext(filePath), "_mono_16K.mp3") 157 | cmdArgs := []string{"-i", filePath, "-ac", "1", "-ar", "16000", "-b:a", "192k", dest} 158 | cmd := exec.Command(storage.FfmpegPath, cmdArgs...) 159 | output, err := cmd.CombinedOutput() 160 | if err != nil { 161 | log.GetLogger().Error("处理音频失败", zap.Error(err), zap.String("audio file", filePath), zap.String("output", string(output))) 162 | return "", err 163 | } 164 | return dest, nil 165 | } 166 | 167 | // 连接WebSocket服务 168 | func connectWebSocket(apiKey string) (*websocket.Conn, error) { 169 | header := make(http.Header) 170 | header.Add("X-DashScope-DataInspection", "enable") 171 | header.Add("Authorization", fmt.Sprintf("bearer %s", apiKey)) 172 | conn, _, err := dialer.Dial(wsURL, header) 173 | return conn, err 174 | } 175 | 176 | // 启动一个goroutine异步接收WebSocket消息 177 | func startResultReceiver(conn *websocket.Conn, words *[]types.Word, text *string, taskStarted chan<- bool, taskDone chan<- bool) { 178 | go func() { 179 | for { 180 | _, message, err := conn.ReadMessage() 181 | if err != nil { 182 | log.GetLogger().Error("解析服务器消息失败:", zap.Error(err)) 183 | continue 184 | } 185 | currentEvent := Event{} 186 | err = json.Unmarshal(message, ¤tEvent) 187 | if err != nil { 188 | log.GetLogger().Error("解析服务器消息失败:", zap.Error(err)) 189 | continue 190 | } 191 | if currentEvent.Payload.Output.Sentence.EndTime != nil { 192 | // 本句结束,添加当前的words和text 193 | *text += currentEvent.Payload.Output.Sentence.Text 194 | currentNum := 0 195 | if len(*words) > 0 { 196 | currentNum = (*words)[len(*words)-1].Num + 1 197 | } 198 | for _, word := range currentEvent.Payload.Output.Sentence.Words { 199 | *words = append(*words, types.Word{ 200 | Num: currentNum, 201 | Text: strings.TrimSpace(word.Text), // 阿里云这边的word后面会有空格 202 | Start: float64(word.BeginTime) / 1000, 203 | End: float64(*word.EndTime) / 1000, 204 | }) 205 | currentNum++ 206 | } 207 | } 208 | if handleEvent(conn, ¤tEvent, taskStarted, taskDone) { 209 | return 210 | } 211 | } 212 | }() 213 | } 214 | 215 | // 发送run-task指令 216 | func sendRunTaskCmd(conn *websocket.Conn, language string) (string, error) { 217 | runTaskCmd, taskID, err := generateRunTaskCmd(language) 218 | if err != nil { 219 | return "", err 220 | } 221 | err = conn.WriteMessage(websocket.TextMessage, []byte(runTaskCmd)) 222 | return taskID, err 223 | } 224 | 225 | // 生成run-task指令 226 | func generateRunTaskCmd(language string) (string, string, error) { 227 | taskID := uuid.New().String() 228 | runTaskCmd := Event{ 229 | Header: AsrHeader{ 230 | Action: "run-task", 231 | TaskID: taskID, 232 | Streaming: "duplex", 233 | }, 234 | Payload: Payload{ 235 | TaskGroup: "audio", 236 | Task: "asr", 237 | Function: "recognition", 238 | Model: "paraformer-realtime-v2", 239 | Parameters: Params{ 240 | Format: "mp3", 241 | SampleRate: 16000, 242 | LanguageHints: []string{language}, 243 | }, 244 | Input: Input{}, 245 | }, 246 | } 247 | runTaskCmdJSON, err := json.Marshal(runTaskCmd) 248 | return string(runTaskCmdJSON), taskID, err 249 | } 250 | 251 | // 等待task-started事件 252 | func waitForTaskStarted(taskStarted chan bool) { 253 | select { 254 | case <-taskStarted: 255 | log.GetLogger().Info("阿里云语音识别任务开启成功") 256 | case <-time.After(10 * time.Second): 257 | log.GetLogger().Error("等待task-started超时,任务开启失败") 258 | } 259 | } 260 | 261 | // 发送音频数据 262 | func sendAudioData(conn *websocket.Conn, filePath string) error { 263 | file, err := os.Open(filePath) 264 | if err != nil { 265 | return err 266 | } 267 | defer file.Close() 268 | 269 | buf := make([]byte, 1024) // 100ms的音频大约1024字节 270 | for { 271 | n, err := file.Read(buf) 272 | if n == 0 { 273 | break 274 | } 275 | if err != nil && err != io.EOF { 276 | return err 277 | } 278 | err = conn.WriteMessage(websocket.BinaryMessage, buf[:n]) 279 | if err != nil { 280 | return err 281 | } 282 | time.Sleep(100 * time.Millisecond) 283 | } 284 | return nil 285 | } 286 | 287 | // 发送finish-task指令 288 | func sendFinishTaskCmd(conn *websocket.Conn, taskID string) error { 289 | finishTaskCmd, err := generateFinishTaskCmd(taskID) 290 | if err != nil { 291 | return err 292 | } 293 | err = conn.WriteMessage(websocket.TextMessage, []byte(finishTaskCmd)) 294 | return err 295 | } 296 | 297 | // 生成finish-task指令 298 | func generateFinishTaskCmd(taskID string) (string, error) { 299 | finishTaskCmd := Event{ 300 | Header: AsrHeader{ 301 | Action: "finish-task", 302 | TaskID: taskID, 303 | Streaming: "duplex", 304 | }, 305 | Payload: Payload{ 306 | Input: Input{}, 307 | }, 308 | } 309 | finishTaskCmdJSON, err := json.Marshal(finishTaskCmd) 310 | return string(finishTaskCmdJSON), err 311 | } 312 | 313 | // 处理事件 314 | func handleEvent(conn *websocket.Conn, event *Event, taskStarted chan<- bool, taskDone chan<- bool) bool { 315 | switch event.Header.Event { 316 | case "task-started": 317 | log.GetLogger().Info("收到task-started事件", zap.String("taskID", event.Header.TaskID)) 318 | taskStarted <- true 319 | case "result-generated": 320 | log.GetLogger().Info("收到result-generated事件", zap.String("当前text", event.Payload.Output.Sentence.Text)) 321 | case "task-finished": 322 | log.GetLogger().Info("收到task-finished事件,任务完成", zap.String("taskID", event.Header.TaskID)) 323 | taskDone <- true 324 | return true 325 | case "task-failed": 326 | log.GetLogger().Info("收到task-failed事件", zap.String("taskID", event.Header.TaskID)) 327 | handleTaskFailed(event, conn) 328 | taskDone <- true 329 | return true 330 | default: 331 | log.GetLogger().Info("未知事件:", zap.String("event", event.Header.Event)) 332 | } 333 | return false 334 | } 335 | 336 | // 处理任务失败事件 337 | func handleTaskFailed(event *Event, conn *websocket.Conn) { 338 | log.GetLogger().Error("任务失败:", zap.String("error", event.Header.ErrorMessage)) 339 | } 340 | 341 | // 关闭连接 342 | func closeConnection(conn *websocket.Conn) { 343 | if conn != nil { 344 | conn.Close() 345 | } 346 | } 347 | -------------------------------------------------------------------------------- /pkg/aliyun/base.go: -------------------------------------------------------------------------------- 1 | package aliyun 2 | 3 | import ( 4 | "encoding/json" 5 | "github.com/aliyun/alibaba-cloud-sdk-go/sdk" 6 | "github.com/aliyun/alibaba-cloud-sdk-go/sdk/requests" 7 | "go.uber.org/zap" 8 | "krillin-ai/config" 9 | "krillin-ai/log" 10 | ) 11 | 12 | type TokenResult struct { 13 | ErrMsg string 14 | Token struct { 15 | UserId string 16 | Id string 17 | ExpireTime int64 18 | } 19 | } 20 | 21 | func CreateToken(ak, sk string) (string, error) { 22 | client, err := sdk.NewClientWithAccessKey("cn-shanghai", ak, sk) 23 | if err != nil { 24 | return "", err 25 | } 26 | if config.Conf.App.Proxy != "" { 27 | client.SetHttpProxy(config.Conf.App.Proxy) 28 | } 29 | request := requests.NewCommonRequest() 30 | request.Method = "POST" 31 | request.Domain = "nls-meta.cn-shanghai.aliyuncs.com" 32 | request.ApiName = "CreateToken" 33 | request.Version = "2019-02-28" 34 | response, err := client.ProcessCommonRequest(request) 35 | if err != nil { 36 | log.GetLogger().Error("aliyun sdk create token request error:", zap.Error(err)) 37 | return "", err 38 | } 39 | 40 | var tr TokenResult 41 | err = json.Unmarshal([]byte(response.GetHttpContentString()), &tr) 42 | if err != nil { 43 | log.GetLogger().Error("aliyun sdk json unmarshal error:", zap.Error(err)) 44 | return "", err 45 | } 46 | return tr.Token.Id, nil 47 | } 48 | -------------------------------------------------------------------------------- /pkg/aliyun/chat.go: -------------------------------------------------------------------------------- 1 | package aliyun 2 | 3 | import ( 4 | "context" 5 | goopenai "github.com/sashabaranov/go-openai" 6 | "go.uber.org/zap" 7 | "krillin-ai/log" 8 | ) 9 | 10 | type ChatClient struct { 11 | *goopenai.Client 12 | } 13 | 14 | func NewChatClient(apiKey string) *ChatClient { 15 | cfg := goopenai.DefaultConfig(apiKey) 16 | cfg.BaseURL = "https://dashscope.aliyuncs.com/compatible-mode/v1" // 使用阿里云的openai兼容模式调用 17 | return &ChatClient{ 18 | Client: goopenai.NewClientWithConfig(cfg), 19 | } 20 | } 21 | 22 | func (c ChatClient) ChatCompletion(query string) (string, error) { 23 | req := goopenai.ChatCompletionRequest{ 24 | Model: "qwen-plus", 25 | Messages: []goopenai.ChatCompletionMessage{ 26 | { 27 | Role: goopenai.ChatMessageRoleSystem, 28 | Content: "You are an assistant that helps with subtitle translation.", 29 | }, 30 | { 31 | Role: goopenai.ChatMessageRoleUser, 32 | Content: query, 33 | }, 34 | }, 35 | } 36 | 37 | resp, err := c.CreateChatCompletion(context.Background(), req) 38 | if err != nil { 39 | log.GetLogger().Error("aliyun openai create chat completion failed", zap.Error(err)) 40 | return "", err 41 | } 42 | 43 | resContent := resp.Choices[0].Message.Content 44 | 45 | return resContent, nil 46 | } 47 | -------------------------------------------------------------------------------- /pkg/aliyun/oss.go: -------------------------------------------------------------------------------- 1 | package aliyun 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss" 7 | "github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss/credentials" 8 | "os" 9 | ) 10 | 11 | type OssClient struct { 12 | *oss.Client 13 | Bucket string 14 | } 15 | 16 | func NewOssClient(accessKeyID, accessKeySecret, bucket string) *OssClient { 17 | credProvider := credentials.NewStaticCredentialsProvider(accessKeyID, accessKeySecret) 18 | 19 | cfg := oss.LoadDefaultConfig(). 20 | WithCredentialsProvider(credProvider). 21 | WithRegion("cn-shanghai") 22 | 23 | client := oss.NewClient(cfg) 24 | 25 | return &OssClient{client, bucket} 26 | } 27 | 28 | func (o *OssClient) UploadFile(ctx context.Context, objectKey, filePath, bucket string) error { 29 | file, err := os.Open(filePath) 30 | if err != nil { 31 | return fmt.Errorf("failed to open file: %v", err) 32 | } 33 | defer file.Close() 34 | 35 | _, err = o.PutObject(ctx, &oss.PutObjectRequest{ 36 | Bucket: &bucket, 37 | Key: &objectKey, 38 | Body: file, 39 | }) 40 | if err != nil { 41 | return fmt.Errorf("failed to upload file to OSS: %v", err) 42 | } 43 | 44 | fmt.Printf("File %s uploaded successfully to bucket %s as %s\n", filePath, bucket, objectKey) 45 | return nil 46 | } 47 | -------------------------------------------------------------------------------- /pkg/aliyun/tts.go: -------------------------------------------------------------------------------- 1 | package aliyun 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "github.com/gorilla/websocket" 7 | "go.uber.org/zap" 8 | "krillin-ai/config" 9 | "krillin-ai/log" 10 | "krillin-ai/pkg/util" 11 | "net/http" 12 | "os" 13 | "time" 14 | ) 15 | 16 | type TtsClient struct { 17 | AccessKeyID string 18 | AccessKeySecret string 19 | Appkey string 20 | } 21 | 22 | type TtsHeader struct { 23 | Appkey string `json:"appkey"` 24 | MessageID string `json:"message_id"` 25 | TaskID string `json:"task_id"` 26 | Namespace string `json:"namespace"` 27 | Name string `json:"name"` 28 | } 29 | 30 | type StartSynthesisPayload struct { 31 | Voice string `json:"voice,omitempty"` 32 | Format string `json:"format,omitempty"` 33 | SampleRate int `json:"sample_rate,omitempty"` 34 | Volume int `json:"volume,omitempty"` 35 | SpeechRate int `json:"speech_rate,omitempty"` 36 | PitchRate int `json:"pitch_rate,omitempty"` 37 | EnableSubtitle bool `json:"enable_subtitle,omitempty"` 38 | EnablePhonemeTimestamp bool `json:"enable_phoneme_timestamp,omitempty"` 39 | } 40 | 41 | type RunSynthesisPayload struct { 42 | Text string `json:"text"` 43 | } 44 | 45 | type Message struct { 46 | Header TtsHeader `json:"header"` 47 | Payload interface{} `json:"payload,omitempty"` 48 | } 49 | 50 | func NewTtsClient(accessKeyId, accessKeySecret, appkey string) *TtsClient { 51 | return &TtsClient{ 52 | AccessKeyID: accessKeyId, 53 | AccessKeySecret: accessKeySecret, 54 | Appkey: appkey, 55 | } 56 | } 57 | 58 | func (c *TtsClient) Text2Speech(text, voice, outputFile string) error { 59 | file, err := os.OpenFile(outputFile, os.O_CREATE|os.O_WRONLY, 0666) 60 | if err != nil { 61 | return fmt.Errorf("failed to create file: %w", err) 62 | } 63 | defer file.Close() 64 | 65 | var conn *websocket.Conn 66 | token, _ := CreateToken(c.AccessKeyID, c.AccessKeySecret) 67 | fullURL := "wss://nls-gateway-cn-beijing.aliyuncs.com/ws/v1?token=" + token 68 | dialer := websocket.DefaultDialer 69 | if config.Conf.App.Proxy != "" { 70 | dialer.Proxy = http.ProxyURL(config.Conf.App.ParsedProxy) 71 | } 72 | dialer.HandshakeTimeout = 10 * time.Second 73 | conn, _, err = dialer.Dial(fullURL, nil) 74 | if err != nil { 75 | return err 76 | } 77 | _ = conn.SetReadDeadline(time.Now().Add(time.Second * 60)) 78 | defer c.Close(conn) 79 | 80 | onTextMessage := func(message string) { 81 | log.GetLogger().Info("Received text message", zap.String("Message", message)) 82 | } 83 | 84 | onBinaryMessage := func(data []byte) { 85 | if _, err := file.Write(data); err != nil { 86 | log.GetLogger().Error("Failed to write data to file", zap.Error(err)) 87 | } 88 | } 89 | 90 | var ( 91 | synthesisStarted = make(chan struct{}) 92 | synthesisComplete = make(chan struct{}) 93 | ) 94 | 95 | startPayload := StartSynthesisPayload{ 96 | Voice: voice, 97 | Format: "wav", 98 | SampleRate: 44100, 99 | Volume: 50, 100 | SpeechRate: 0, 101 | PitchRate: 0, 102 | } 103 | 104 | go c.receiveMessages(conn, onTextMessage, onBinaryMessage, synthesisStarted, synthesisComplete) 105 | 106 | taskId := util.GenerateID() 107 | log.GetLogger().Info("SpeechClient StartSynthesis", zap.String("taskId", taskId), zap.Any("payload", startPayload)) 108 | if err := c.StartSynthesis(conn, taskId, startPayload, synthesisStarted); err != nil { 109 | return fmt.Errorf("failed to start synthesis: %w", err) 110 | } 111 | 112 | if err := c.RunSynthesis(conn, taskId, text); err != nil { 113 | return fmt.Errorf("failed to run synthesis: %w", err) 114 | } 115 | 116 | if err := c.StopSynthesis(conn, taskId, synthesisComplete); err != nil { 117 | return fmt.Errorf("failed to stop synthesis: %w", err) 118 | } 119 | 120 | return nil 121 | } 122 | 123 | func (c *TtsClient) sendMessage(conn *websocket.Conn, taskId, name string, payload interface{}) error { 124 | message := Message{ 125 | Header: TtsHeader{ 126 | Appkey: c.Appkey, 127 | MessageID: util.GenerateID(), 128 | TaskID: taskId, 129 | Namespace: "FlowingSpeechSynthesizer", 130 | Name: name, 131 | }, 132 | Payload: payload, 133 | } 134 | jsonData, _ := json.Marshal(message) 135 | log.GetLogger().Debug("SpeechClient sendMessage", zap.String("message", string(jsonData))) 136 | return conn.WriteJSON(message) 137 | } 138 | 139 | func (c *TtsClient) StartSynthesis(conn *websocket.Conn, taskId string, payload StartSynthesisPayload, synthesisStarted chan struct{}) error { 140 | err := c.sendMessage(conn, taskId, "StartSynthesis", payload) 141 | if err != nil { 142 | return err 143 | } 144 | 145 | // 阻塞等待 SynthesisStarted 事件 146 | <-synthesisStarted 147 | 148 | return nil 149 | } 150 | 151 | func (c *TtsClient) RunSynthesis(conn *websocket.Conn, taskId, text string) error { 152 | return c.sendMessage(conn, taskId, "RunSynthesis", RunSynthesisPayload{Text: text}) 153 | } 154 | 155 | func (c *TtsClient) StopSynthesis(conn *websocket.Conn, taskId string, synthesisComplete chan struct{}) error { 156 | err := c.sendMessage(conn, taskId, "StopSynthesis", nil) 157 | if err != nil { 158 | return err 159 | } 160 | 161 | // 阻塞等待 SynthesisCompleted 事件 162 | <-synthesisComplete 163 | 164 | return nil 165 | } 166 | 167 | func (c *TtsClient) Close(conn *websocket.Conn) error { 168 | err := conn.WriteMessage(websocket.CloseMessage, websocket.FormatCloseMessage(websocket.CloseNormalClosure, "")) 169 | if err != nil { 170 | return err 171 | } 172 | return conn.Close() 173 | } 174 | 175 | func (c *TtsClient) receiveMessages(conn *websocket.Conn, onTextMessage func(string), onBinaryMessage func([]byte), synthesisStarted, synthesisComplete chan struct{}) { 176 | defer close(synthesisComplete) 177 | for { 178 | messageType, message, err := conn.ReadMessage() 179 | if err != nil { 180 | if websocket.IsCloseError(err, websocket.CloseNormalClosure) { 181 | log.GetLogger().Error("SpeechClient receiveMessages websocket非正常关闭", zap.Error(err)) 182 | return 183 | } 184 | return 185 | } 186 | if messageType == websocket.TextMessage { 187 | var msg Message 188 | if err := json.Unmarshal(message, &msg); err != nil { 189 | log.GetLogger().Error("SpeechClient receiveMessages json解析失败", zap.Error(err)) 190 | return 191 | } 192 | if msg.Header.Name == "SynthesisCompleted" { 193 | log.GetLogger().Info("SynthesisCompleted event received") 194 | // 收到结束消息退出 195 | break 196 | } else if msg.Header.Name == "SynthesisStarted" { 197 | log.GetLogger().Info("SynthesisStarted event received") 198 | close(synthesisStarted) 199 | } else { 200 | onTextMessage(string(message)) 201 | } 202 | } else if messageType == websocket.BinaryMessage { 203 | onBinaryMessage(message) 204 | } 205 | } 206 | } 207 | -------------------------------------------------------------------------------- /pkg/aliyun/voice_clone.go: -------------------------------------------------------------------------------- 1 | package aliyun 2 | 3 | import ( 4 | "crypto/hmac" 5 | "crypto/sha1" 6 | "encoding/base64" 7 | "fmt" 8 | "go.uber.org/zap" 9 | "krillin-ai/log" 10 | "net/url" 11 | "sort" 12 | "strings" 13 | "time" 14 | 15 | "github.com/go-resty/resty/v2" 16 | "github.com/google/uuid" 17 | ) 18 | 19 | // _encodeText URL-编码文本,保证符合规范 20 | func _encodeText(text string) string { 21 | encoded := url.QueryEscape(text) 22 | // 根据规范替换特殊字符 23 | return strings.ReplaceAll(strings.ReplaceAll(strings.ReplaceAll(encoded, "+", "%20"), "*", "%2A"), "%7E", "~") 24 | } 25 | 26 | // _encodeDict URL-编码字典(map)为查询字符串 27 | func _encodeDict(dic map[string]string) string { 28 | var keys []string 29 | for key := range dic { 30 | keys = append(keys, key) 31 | } 32 | sort.Strings(keys) 33 | values := url.Values{} 34 | 35 | for _, k := range keys { 36 | values.Add(k, dic[k]) 37 | } 38 | encodedText := values.Encode() 39 | // 对整个查询字符串进行编码 40 | return strings.ReplaceAll(strings.ReplaceAll(strings.ReplaceAll(encodedText, "+", "%20"), "*", "%2A"), "%7E", "~") 41 | } 42 | 43 | // 生成签名 44 | func GenerateSignature(secret, stringToSign string) string { 45 | key := []byte(secret + "&") 46 | data := []byte(stringToSign) 47 | hash := hmac.New(sha1.New, key) 48 | hash.Write(data) 49 | signature := base64.StdEncoding.EncodeToString(hash.Sum(nil)) 50 | // 对签名进行URL编码 51 | return _encodeText(signature) 52 | } 53 | 54 | type VoiceCloneResp struct { 55 | RequestId string `json:"RequestId"` 56 | Message string `json:"Message"` 57 | Code int `json:"Code"` 58 | VoiceName string `json:"VoiceName"` 59 | } 60 | 61 | type VoiceCloneClient struct { 62 | restyClient *resty.Client 63 | accessKeyID string 64 | accessKeySecret string 65 | appkey string 66 | } 67 | 68 | func NewVoiceCloneClient(accessKeyID, accessKeySecret, appkey string) *VoiceCloneClient { 69 | return &VoiceCloneClient{ 70 | restyClient: resty.New(), 71 | accessKeyID: accessKeyID, 72 | accessKeySecret: accessKeySecret, 73 | appkey: appkey, 74 | } 75 | } 76 | 77 | func (c *VoiceCloneClient) CosyVoiceClone(voicePrefix, audioURL string) (string, error) { 78 | log.GetLogger().Info("CosyVoiceClone请求开始", zap.String("voicePrefix", voicePrefix), zap.String("audioURL", audioURL)) 79 | parameters := map[string]string{ 80 | "AccessKeyId": c.accessKeyID, 81 | "Action": "CosyVoiceClone", 82 | "Format": "JSON", 83 | "RegionId": "cn-shanghai", 84 | "SignatureMethod": "HMAC-SHA1", 85 | "SignatureNonce": uuid.New().String(), 86 | "SignatureVersion": "1.0", 87 | "Timestamp": time.Now().UTC().Format("2006-01-02T15:04:05Z"), 88 | "Version": "2019-08-19", 89 | "VoicePrefix": voicePrefix, 90 | "Url": audioURL, 91 | } 92 | 93 | queryString := _encodeDict(parameters) 94 | stringToSign := "POST" + "&" + _encodeText("/") + "&" + _encodeText(queryString) 95 | signature := GenerateSignature(c.accessKeySecret, stringToSign) 96 | fullURL := fmt.Sprintf("https://nls-slp.cn-shanghai.aliyuncs.com/?Signature=%s&%s", signature, queryString) 97 | 98 | values := url.Values{} 99 | for key, value := range parameters { 100 | values.Add(key, value) 101 | } 102 | var res VoiceCloneResp 103 | resp, err := c.restyClient.R().SetResult(&res).Post(fullURL) 104 | if err != nil { 105 | log.GetLogger().Error("CosyVoiceClone post error", zap.Error(err)) 106 | return "", fmt.Errorf("CosyVoiceClone post error: %w: ", err) 107 | } 108 | log.GetLogger().Info("CosyVoiceClone请求完毕", zap.String("Response", resp.String())) 109 | if res.Message != "SUCCESS" { 110 | log.GetLogger().Error("CosyVoiceClone res message is not success", zap.String("Request Id", res.RequestId), zap.Int("Code", res.Code), zap.String("Message", res.Message)) 111 | return "", fmt.Errorf("CosyVoiceClone res message is not success, message: %s", res.Message) 112 | } 113 | return res.VoiceName, nil 114 | } 115 | 116 | func (c *VoiceCloneClient) CosyCloneList(voicePrefix string, pageIndex, pageSize int) { 117 | parameters := map[string]string{ 118 | "AccessKeyId": c.accessKeyID, 119 | "Action": "ListCosyVoice", 120 | "Format": "JSON", 121 | "RegionId": "cn-shanghai", 122 | "SignatureMethod": "HMAC-SHA1", 123 | "SignatureNonce": uuid.New().String(), 124 | "SignatureVersion": "1.0", 125 | "Timestamp": time.Now().UTC().Format("2006-01-02T15:04:05Z"), 126 | "Version": "2019-08-19", 127 | "VoicePrefix": voicePrefix, 128 | "PageIndex": fmt.Sprintf("%d", pageIndex), 129 | "PageSize": fmt.Sprintf("%d", pageSize), 130 | } 131 | 132 | queryString := _encodeDict(parameters) 133 | stringToSign := "POST" + "&" + _encodeText("/") + "&" + _encodeText(queryString) 134 | signature := GenerateSignature(c.accessKeySecret, stringToSign) 135 | fullURL := fmt.Sprintf("https://nls-slp.cn-shanghai.aliyuncs.com/?Signature=%s&%s", signature, queryString) 136 | 137 | values := url.Values{} 138 | for key, value := range parameters { 139 | values.Add(key, value) 140 | } 141 | resp, err := c.restyClient.R().Post(fullURL) 142 | if err != nil { 143 | log.GetLogger().Error("CosyCloneList请求失败", zap.Error(err)) 144 | return 145 | } 146 | log.GetLogger().Info("CosyCloneList请求成功", zap.String("Response", resp.String())) 147 | } 148 | -------------------------------------------------------------------------------- /pkg/fasterwhisper/init.go: -------------------------------------------------------------------------------- 1 | package fasterwhisper 2 | 3 | type FastwhisperProcessor struct { 4 | WorkDir string // 生成中间文件的目录 5 | Model string 6 | } 7 | 8 | func NewFastwhisperProcessor(model string) *FastwhisperProcessor { 9 | return &FastwhisperProcessor{ 10 | Model: model, 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /pkg/fasterwhisper/transcription.go: -------------------------------------------------------------------------------- 1 | package fasterwhisper 2 | 3 | import ( 4 | "encoding/json" 5 | "go.uber.org/zap" 6 | "krillin-ai/internal/storage" 7 | "krillin-ai/internal/types" 8 | "krillin-ai/log" 9 | "krillin-ai/pkg/util" 10 | "os" 11 | "os/exec" 12 | "strings" 13 | ) 14 | 15 | func (c *FastwhisperProcessor) Transcription(audioFile, language, workDir string) (*types.TranscriptionData, error) { 16 | cmdArgs := []string{ 17 | "--model_dir", "./models/", 18 | "--model", c.Model, 19 | "--one_word", "2", 20 | "--output_format", "json", 21 | "--language", language, 22 | "--output_dir", workDir, 23 | audioFile, 24 | } 25 | cmd := exec.Command(storage.FasterwhisperPath, cmdArgs...) 26 | log.GetLogger().Info("FastwhisperProcessor转录开始", zap.String("cmd", cmd.String())) 27 | output, err := cmd.CombinedOutput() 28 | if err != nil && !strings.Contains(string(output), "Subtitles are written to") { 29 | log.GetLogger().Error("FastwhisperProcessor cmd 执行失败", zap.String("output", string(output)), zap.Error(err)) 30 | return nil, err 31 | } 32 | log.GetLogger().Info("FastwhisperProcessor转录json生成完毕", zap.String("audio file", audioFile)) 33 | 34 | var result types.FasterWhisperOutput 35 | fileData, err := os.Open(util.ChangeFileExtension(audioFile, ".json")) 36 | if err != nil { 37 | log.GetLogger().Error("FastwhisperProcessor 打开json文件失败", zap.Error(err)) 38 | return nil, err 39 | } 40 | defer fileData.Close() 41 | decoder := json.NewDecoder(fileData) 42 | if err = decoder.Decode(&result); err != nil { 43 | log.GetLogger().Error("FastwhisperProcessor 解析json文件失败", zap.Error(err)) 44 | return nil, err 45 | } 46 | 47 | var ( 48 | transcriptionData types.TranscriptionData 49 | num int 50 | ) 51 | for _, segment := range result.Segments { 52 | transcriptionData.Text += strings.ReplaceAll(segment.Text, "—", " ") // 连字符处理,因为模型存在很多错误添加到连字符 53 | for _, word := range segment.Words { 54 | if strings.Contains(word.Word, "—") { 55 | // 对称切分 56 | mid := (word.Start + word.End) / 2 57 | seperatedWords := strings.Split(word.Word, "—") 58 | transcriptionData.Words = append(transcriptionData.Words, []types.Word{ 59 | { 60 | Num: num, 61 | Text: util.CleanPunction(strings.TrimSpace(seperatedWords[0])), 62 | Start: word.Start, 63 | End: mid, 64 | }, 65 | { 66 | Num: num + 1, 67 | Text: util.CleanPunction(strings.TrimSpace(seperatedWords[1])), 68 | Start: mid, 69 | End: word.End, 70 | }, 71 | }...) 72 | num += 2 73 | } else { 74 | transcriptionData.Words = append(transcriptionData.Words, types.Word{ 75 | Num: num, 76 | Text: util.CleanPunction(strings.TrimSpace(word.Word)), 77 | Start: word.Start, 78 | End: word.End, 79 | }) 80 | num++ 81 | } 82 | } 83 | } 84 | log.GetLogger().Info("FastwhisperProcessor转录成功") 85 | return &transcriptionData, nil 86 | } 87 | -------------------------------------------------------------------------------- /pkg/openai/init.go: -------------------------------------------------------------------------------- 1 | package openai 2 | 3 | import ( 4 | "github.com/sashabaranov/go-openai" 5 | "krillin-ai/config" 6 | "net/http" 7 | ) 8 | 9 | type Client struct { 10 | client *openai.Client 11 | } 12 | 13 | func NewClient(baseUrl, apiKey, proxyAddr string) *Client { 14 | cfg := openai.DefaultConfig(apiKey) 15 | if baseUrl != "" { 16 | cfg.BaseURL = baseUrl 17 | } 18 | 19 | if proxyAddr != "" { 20 | transport := &http.Transport{ 21 | Proxy: http.ProxyURL(config.Conf.App.ParsedProxy), 22 | } 23 | cfg.HTTPClient = &http.Client{ 24 | Transport: transport, 25 | } 26 | } 27 | 28 | client := openai.NewClientWithConfig(cfg) 29 | return &Client{client: client} 30 | } 31 | -------------------------------------------------------------------------------- /pkg/openai/openai.go: -------------------------------------------------------------------------------- 1 | package openai 2 | 3 | import ( 4 | "context" 5 | openai "github.com/sashabaranov/go-openai" 6 | "go.uber.org/zap" 7 | "io" 8 | "krillin-ai/config" 9 | "krillin-ai/log" 10 | ) 11 | 12 | func (c *Client) ChatCompletion(query string) (string, error) { 13 | req := openai.ChatCompletionRequest{ 14 | Model: openai.GPT4oMini20240718, 15 | Messages: []openai.ChatCompletionMessage{ 16 | { 17 | Role: openai.ChatMessageRoleSystem, 18 | Content: "You are an assistant that helps with subtitle translation.", 19 | }, 20 | { 21 | Role: openai.ChatMessageRoleUser, 22 | Content: query, 23 | }, 24 | }, 25 | Stream: true, 26 | MaxTokens: 8192, 27 | } 28 | if config.Conf.Openai.Model != "" { 29 | req.Model = config.Conf.Openai.Model 30 | } 31 | 32 | stream, err := c.client.CreateChatCompletionStream(context.Background(), req) 33 | if err != nil { 34 | log.GetLogger().Error("openai create chat completion stream failed", zap.Error(err)) 35 | return "", err 36 | } 37 | defer stream.Close() 38 | 39 | var resContent string 40 | for { 41 | response, err := stream.Recv() 42 | if err == io.EOF { 43 | break 44 | } 45 | if err != nil { 46 | log.GetLogger().Error("openai stream receive failed", zap.Error(err)) 47 | return "", err 48 | } 49 | if len(response.Choices) == 0 { 50 | log.GetLogger().Info("openai stream receive no choices", zap.Any("response", response)) 51 | continue 52 | } 53 | 54 | resContent += response.Choices[0].Delta.Content 55 | } 56 | 57 | return resContent, nil 58 | } 59 | -------------------------------------------------------------------------------- /pkg/util/base.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "archive/zip" 5 | "fmt" 6 | "github.com/google/uuid" 7 | "io" 8 | "math" 9 | "math/rand" 10 | "net/url" 11 | "os" 12 | "path/filepath" 13 | "regexp" 14 | "strconv" 15 | "strings" 16 | "unicode" 17 | ) 18 | 19 | var strWithUpperLowerNum = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ123456789") 20 | 21 | func GenerateRandStringWithUpperLowerNum(n int) string { 22 | b := make([]rune, n) 23 | for i := range b { 24 | b[i] = strWithUpperLowerNum[rand.Intn(len(strWithUpperLowerNum))] 25 | } 26 | return string(b) 27 | } 28 | 29 | func GetYouTubeID(youtubeURL string) (string, error) { 30 | parsedURL, err := url.Parse(youtubeURL) 31 | if err != nil { 32 | return "", err 33 | } 34 | 35 | if strings.Contains(parsedURL.Path, "watch") { 36 | queryParams := parsedURL.Query() 37 | if id, exists := queryParams["v"]; exists { 38 | return id[0], nil 39 | } 40 | } else { 41 | pathSegments := strings.Split(parsedURL.Path, "/") 42 | return pathSegments[len(pathSegments)-1], nil 43 | } 44 | 45 | return "", fmt.Errorf("no video ID found") 46 | } 47 | 48 | func GetBilibiliVideoId(url string) string { 49 | re := regexp.MustCompile(`https://(?:www\.)?bilibili\.com/(?:video/|video/av\d+/)(BV[a-zA-Z0-9]+)`) 50 | matches := re.FindStringSubmatch(url) 51 | if len(matches) > 1 { 52 | // 返回匹配到的BV号 53 | return matches[1] 54 | } 55 | return "" 56 | } 57 | 58 | // 将浮点数秒数转换为HH:MM:SS,SSS格式的字符串 59 | func FormatTime(seconds float32) string { 60 | totalSeconds := int(math.Floor(float64(seconds))) // 获取总秒数 61 | milliseconds := int((seconds - float32(totalSeconds)) * 1000) // 获取毫秒部分 62 | 63 | hours := totalSeconds / 3600 64 | minutes := (totalSeconds % 3600) / 60 65 | secs := totalSeconds % 60 66 | return fmt.Sprintf("%02d:%02d:%02d,%03d", hours, minutes, secs, milliseconds) 67 | } 68 | 69 | // 判断字符串是否是纯数字(字幕编号) 70 | func IsNumber(s string) bool { 71 | _, err := strconv.Atoi(s) 72 | return err == nil 73 | } 74 | 75 | func Unzip(zipFile, destDir string) error { 76 | zipReader, err := zip.OpenReader(zipFile) 77 | if err != nil { 78 | return fmt.Errorf("打开zip文件失败: %v", err) 79 | } 80 | defer zipReader.Close() 81 | 82 | err = os.MkdirAll(destDir, 0755) 83 | if err != nil { 84 | return fmt.Errorf("创建目标目录失败: %v", err) 85 | } 86 | 87 | for _, file := range zipReader.File { 88 | filePath := filepath.Join(destDir, file.Name) 89 | 90 | if file.FileInfo().IsDir() { 91 | err := os.MkdirAll(filePath, file.Mode()) 92 | if err != nil { 93 | return fmt.Errorf("创建目录失败: %v", err) 94 | } 95 | continue 96 | } 97 | 98 | destFile, err := os.Create(filePath) 99 | if err != nil { 100 | return fmt.Errorf("创建文件失败: %v", err) 101 | } 102 | defer destFile.Close() 103 | 104 | zipFileReader, err := file.Open() 105 | if err != nil { 106 | return fmt.Errorf("打开zip文件内容失败: %v", err) 107 | } 108 | defer zipFileReader.Close() 109 | 110 | _, err = io.Copy(destFile, zipFileReader) 111 | if err != nil { 112 | return fmt.Errorf("复制文件内容失败: %v", err) 113 | } 114 | } 115 | 116 | return nil 117 | } 118 | 119 | func GenerateID() string { 120 | return strings.ReplaceAll(uuid.New().String(), "-", "") 121 | } 122 | 123 | // ChangeFileExtension 修改文件后缀 124 | func ChangeFileExtension(path string, newExt string) string { 125 | ext := filepath.Ext(path) 126 | return path[:len(path)-len(ext)] + newExt 127 | } 128 | 129 | func CleanPunction(word string) string { 130 | return strings.TrimFunc(word, func(r rune) bool { 131 | return unicode.IsPunct(r) 132 | }) 133 | } 134 | 135 | func IsAlphabetic(r rune) bool { 136 | if unicode.IsLetter(r) { // 中文在IsLetter中会返回true 137 | switch { 138 | // 英语及其他拉丁字母的范围 139 | case r >= 'A' && r <= 'Z', r >= 'a' && r <= 'z': 140 | return true 141 | // 扩展拉丁字母(法语、西班牙语等使用的附加字符) 142 | case r >= '\u00C0' && r <= '\u024F': 143 | return true 144 | // 希腊字母 145 | case r >= '\u0370' && r <= '\u03FF': 146 | return true 147 | // 西里尔字母(俄语等) 148 | case r >= '\u0400' && r <= '\u04FF': 149 | return true 150 | default: 151 | return false 152 | } 153 | } 154 | return false 155 | } 156 | 157 | func ContainsAlphabetic(text string) bool { 158 | for _, r := range text { 159 | if IsAlphabetic(r) { 160 | return true 161 | } 162 | } 163 | return false 164 | } 165 | 166 | // CopyFile 复制文件 167 | func CopyFile(src, dst string) error { 168 | sourceFile, err := os.Open(src) 169 | if err != nil { 170 | return err 171 | } 172 | defer sourceFile.Close() 173 | 174 | destinationFile, err := os.Create(dst) 175 | if err != nil { 176 | return err 177 | } 178 | defer destinationFile.Close() 179 | 180 | _, err = io.Copy(destinationFile, sourceFile) 181 | if err != nil { 182 | return err 183 | } 184 | 185 | return destinationFile.Sync() 186 | } 187 | -------------------------------------------------------------------------------- /pkg/util/download.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "fmt" 5 | "go.uber.org/zap" 6 | "io" 7 | "krillin-ai/config" 8 | "krillin-ai/log" 9 | "net/http" 10 | "os" 11 | "time" 12 | ) 13 | 14 | // 用于显示下载进度,实现io.Writer 15 | type progressWriter struct { 16 | Total uint64 17 | Downloaded uint64 18 | StartTime time.Time 19 | } 20 | 21 | func (pw *progressWriter) Write(p []byte) (int, error) { 22 | n := len(p) 23 | pw.Downloaded += uint64(n) 24 | 25 | // 初始化开始时间 26 | if pw.StartTime.IsZero() { 27 | pw.StartTime = time.Now() 28 | } 29 | 30 | percent := float64(pw.Downloaded) / float64(pw.Total) * 100 31 | elapsed := time.Since(pw.StartTime).Seconds() 32 | speed := float64(pw.Downloaded) / 1024 / 1024 / elapsed 33 | 34 | fmt.Printf("\r下载进度: %.2f%% (%.2f MB / %.2f MB) | 速度: %.2f MB/s", 35 | percent, 36 | float64(pw.Downloaded)/1024/1024, 37 | float64(pw.Total)/1024/1024, 38 | speed) 39 | 40 | return n, nil 41 | } 42 | 43 | // DownloadFile 下载文件并保存到指定路径,支持代理 44 | func DownloadFile(urlStr, filepath, proxyAddr string) error { 45 | log.GetLogger().Info("开始下载文件", zap.String("url", urlStr)) 46 | client := &http.Client{} 47 | if proxyAddr != "" { 48 | client.Transport = &http.Transport{ 49 | Proxy: http.ProxyURL(config.Conf.App.ParsedProxy), 50 | } 51 | } 52 | 53 | resp, err := client.Get(urlStr) 54 | if err != nil { 55 | return err 56 | } 57 | defer resp.Body.Close() 58 | 59 | size := resp.ContentLength 60 | fmt.Printf("文件大小: %.2f MB\n", float64(size)/1024/1024) 61 | 62 | out, err := os.Create(filepath) 63 | if err != nil { 64 | return err 65 | } 66 | defer out.Close() 67 | 68 | // 带有进度的 Reader 69 | progress := &progressWriter{ 70 | Total: uint64(size), 71 | } 72 | reader := io.TeeReader(resp.Body, progress) 73 | 74 | _, err = io.Copy(out, reader) 75 | if err != nil { 76 | return err 77 | } 78 | fmt.Printf("\n") // 进度信息结束,换新行 79 | 80 | log.GetLogger().Info("文件下载完成", zap.String("路径", filepath)) 81 | return nil 82 | } 83 | -------------------------------------------------------------------------------- /pkg/util/language.go: -------------------------------------------------------------------------------- 1 | package util 2 | -------------------------------------------------------------------------------- /pkg/util/subtitle.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "krillin-ai/internal/storage" 7 | "os" 8 | "os/exec" 9 | "path/filepath" 10 | "regexp" 11 | "strconv" 12 | "strings" 13 | "unicode" 14 | ) 15 | 16 | // 处理每一个字幕块 17 | func ProcessBlock(block []string, targetLanguageFile, targetLanguageTextFile, originLanguageFile, originLanguageTextFile *os.File, isTargetOnTop bool) { 18 | var targetLines, originLines []string 19 | // 匹配时间戳的正则表达式 20 | timePattern := regexp.MustCompile(`\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}`) 21 | for _, line := range block { 22 | if timePattern.MatchString(line) || IsNumber(line) { 23 | // 时间戳和编号行保留在两个文件中 24 | targetLines = append(targetLines, line) 25 | originLines = append(originLines, line) 26 | continue 27 | } 28 | if len(targetLines) == 2 && len(originLines) == 2 { // 刚写完编号和时间戳,到了上方的文字行 29 | if isTargetOnTop { 30 | targetLines = append(targetLines, line) 31 | targetLanguageTextFile.WriteString(line) // 文稿文件 32 | } else { 33 | originLines = append(originLines, line) 34 | originLanguageTextFile.WriteString(line) 35 | } 36 | continue 37 | } 38 | // 到了下方的文字行 39 | if isTargetOnTop { 40 | originLines = append(originLines, line) 41 | originLanguageTextFile.WriteString(line) 42 | } else { 43 | targetLines = append(targetLines, line) 44 | targetLanguageTextFile.WriteString(line) 45 | } 46 | } 47 | 48 | if len(targetLines) > 2 { 49 | // 写入目标语言文件 50 | for _, line := range targetLines { 51 | targetLanguageFile.WriteString(line + "\n") 52 | } 53 | targetLanguageFile.WriteString("\n") 54 | } 55 | 56 | if len(originLines) > 2 { 57 | // 写入源语言文件 58 | for _, line := range originLines { 59 | originLanguageFile.WriteString(line + "\n") 60 | } 61 | originLanguageFile.WriteString("\n") 62 | } 63 | } 64 | 65 | // IsSubtitleText 是否是字幕文件中的字幕文字行 66 | func IsSubtitleText(line string) bool { 67 | if line == "" { 68 | return false 69 | } 70 | if IsNumber(line) { 71 | return false 72 | } 73 | timelinePattern := regexp.MustCompile(`\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}`) 74 | return !timelinePattern.MatchString(line) 75 | } 76 | 77 | type Format struct { 78 | Duration string `json:"duration"` 79 | } 80 | 81 | type ProbeData struct { 82 | Format Format `json:"format"` 83 | } 84 | 85 | type SrtBlock struct { 86 | Index int 87 | Timestamp string 88 | TargetLanguageSentence string 89 | OriginLanguageSentence string 90 | } 91 | 92 | func TrimString(s string) string { 93 | s = strings.Replace(s, "[中文翻译]", "", -1) 94 | s = strings.Replace(s, "[英文句子]", "", -1) 95 | // 去除开头的空格和 '[' 96 | s = strings.TrimLeft(s, " [") 97 | 98 | // 去除结尾的空格和 ']' 99 | s = strings.TrimRight(s, " ]") 100 | 101 | //替换中文单引号 102 | s = strings.ReplaceAll(s, "’", "'") 103 | 104 | return s 105 | } 106 | 107 | func ParseSrtNoTsToSrtBlock(srtNoTsFile string) ([]*SrtBlock, error) { 108 | file, err := os.Open(srtNoTsFile) 109 | if err != nil { 110 | return nil, err 111 | } 112 | defer file.Close() 113 | 114 | var blocks []*SrtBlock 115 | var currentBlock SrtBlock 116 | scanner := bufio.NewScanner(file) 117 | start := true 118 | 119 | for scanner.Scan() { 120 | line := TrimString(scanner.Text()) 121 | // 去掉最开始的描述 122 | if start && !IsNumber(line) { 123 | continue 124 | } else { 125 | start = false 126 | } 127 | if line == "" { // 空行表示一个块的结束 128 | if currentBlock.Index != 0 { 129 | cur := currentBlock 130 | blocks = append(blocks, &cur) 131 | currentBlock = SrtBlock{} // 重置 132 | } 133 | continue 134 | } 135 | 136 | if currentBlock.Index == 0 { // 按文件内容依次赋值 137 | var index int 138 | _, err = fmt.Sscanf(line, "%d", &index) 139 | if err != nil { 140 | return blocks, nil 141 | } // 可能是空语音等,直接忽略 142 | currentBlock.Index = index 143 | } else if currentBlock.TargetLanguageSentence == "" { 144 | currentBlock.TargetLanguageSentence = line 145 | } else if currentBlock.OriginLanguageSentence == "" { 146 | currentBlock.OriginLanguageSentence = line 147 | } 148 | } 149 | // 最后的块 150 | if currentBlock.Index != 0 { 151 | cur := currentBlock 152 | blocks = append(blocks, &cur) 153 | } 154 | 155 | if err = scanner.Err(); err != nil { 156 | return nil, err 157 | } 158 | return blocks, nil 159 | } 160 | 161 | func SplitSentence(sentence string) []string { 162 | // 使用正则表达式移除标点符号和特殊字符(保留各语言字母、数字和空格) 163 | re := regexp.MustCompile(`[^\p{L}\p{N}\s']+`) 164 | cleanedSentence := re.ReplaceAllString(sentence, " ") 165 | 166 | // 使用 strings.Fields 按空格拆分成单词 167 | words := strings.Fields(cleanedSentence) 168 | 169 | return words 170 | } 171 | 172 | func MergeFile(finalFile string, files ...string) error { 173 | // 创建最终文件 174 | final, err := os.Create(finalFile) 175 | if err != nil { 176 | return err 177 | } 178 | 179 | // 逐个读取文件并写入最终文件 180 | for _, file := range files { 181 | f, err := os.Open(file) 182 | if err != nil { 183 | return err 184 | } 185 | defer f.Close() 186 | 187 | scanner := bufio.NewScanner(f) 188 | for scanner.Scan() { 189 | line := scanner.Text() 190 | final.WriteString(line + "\n") 191 | } 192 | } 193 | 194 | return nil 195 | } 196 | 197 | func MergeSrtFiles(finalFile string, files ...string) error { 198 | output, err := os.Create(finalFile) 199 | if err != nil { 200 | return err 201 | } 202 | defer output.Close() 203 | writer := bufio.NewWriter(output) 204 | lineNumber := 0 205 | for _, file := range files { 206 | // 不存在某一个file就跳过 207 | if _, err = os.Stat(file); os.IsNotExist(err) { 208 | continue 209 | } 210 | // 打开当前字幕文件 211 | f, err := os.Open(file) 212 | if err != nil { 213 | return err 214 | } 215 | defer f.Close() 216 | // 处理当前字幕文件 217 | scanner := bufio.NewScanner(f) 218 | for scanner.Scan() { 219 | line := scanner.Text() 220 | 221 | if strings.Contains(line, "```") { 222 | continue 223 | } 224 | 225 | if IsNumber(line) { 226 | lineNumber++ 227 | line = strconv.Itoa(lineNumber) 228 | } 229 | 230 | writer.WriteString(line + "\n") 231 | } 232 | } 233 | writer.Flush() 234 | 235 | return nil 236 | } 237 | 238 | // 给定文件和替换map,将文件中所有的key替换成value 239 | func ReplaceFileContent(srcFile, dstFile string, replacements map[string]string) error { 240 | file, err := os.Open(srcFile) 241 | if err != nil { 242 | return err 243 | } 244 | defer file.Close() 245 | 246 | outFile, err := os.Create(dstFile) 247 | if err != nil { 248 | return err 249 | } 250 | defer outFile.Close() 251 | 252 | scanner := bufio.NewScanner(file) 253 | writer := bufio.NewWriter(outFile) // 提高性能 254 | defer writer.Flush() 255 | 256 | for scanner.Scan() { 257 | line := scanner.Text() 258 | for before, after := range replacements { 259 | line = strings.ReplaceAll(line, before, after) 260 | } 261 | _, _ = writer.WriteString(line + "\n") 262 | } 263 | 264 | if err = scanner.Err(); err != nil { 265 | return err 266 | } 267 | 268 | return nil 269 | } 270 | 271 | // 获得文件名后加上后缀的新文件名,不改变扩展名,例如:/home/ubuntu/abc.srt变成/home/ubuntu/abc_tmp.srt 272 | func AddSuffixToFileName(filePath, suffix string) string { 273 | dir := filepath.Dir(filePath) 274 | ext := filepath.Ext(filePath) 275 | name := strings.TrimSuffix(filepath.Base(filePath), ext) 276 | newName := fmt.Sprintf("%s%s%s", name, suffix, ext) 277 | return filepath.Join(dir, newName) 278 | } 279 | 280 | // 去除字符串中的标点符号等字符,确保字符中的内容都是whisper模型可以识别出来的,便于时间戳对齐 281 | func GetRecognizableString(s string) string { 282 | var result []rune 283 | for _, v := range s { 284 | // 英文字母和数字 285 | if unicode.Is(unicode.Latin, v) || unicode.Is(unicode.Number, v) { 286 | result = append(result, v) 287 | } 288 | // 中文 289 | if unicode.Is(unicode.Han, v) { 290 | result = append(result, v) 291 | } 292 | // 韩文 293 | if unicode.Is(unicode.Hangul, v) { 294 | result = append(result, v) 295 | } 296 | // 日文平假片假 297 | if unicode.Is(unicode.Hiragana, v) || unicode.Is(unicode.Katakana, v) { 298 | result = append(result, v) 299 | } 300 | } 301 | return string(result) 302 | } 303 | 304 | func GetAudioDuration(inputFile string) (float64, error) { 305 | // 使用 ffprobe 获取精确时长 306 | cmd := exec.Command(storage.FfprobePath, "-i", inputFile, "-show_entries", "format=duration", "-v", "quiet", "-of", "csv=p=0") 307 | cmdOutput, err := cmd.Output() 308 | if err != nil { 309 | return 0, fmt.Errorf("GetAudioDuration failed to get audio duration: %w", err) 310 | } 311 | 312 | // 解析时长 313 | duration, err := strconv.ParseFloat(strings.TrimSpace(string(cmdOutput)), 64) 314 | if err != nil { 315 | return 0, fmt.Errorf("GetAudioDuration failed to parse audio duration: %w", err) 316 | } 317 | 318 | return duration, nil 319 | } 320 | -------------------------------------------------------------------------------- /pkg/whisper/init.go: -------------------------------------------------------------------------------- 1 | package whisper 2 | 3 | import ( 4 | "github.com/sashabaranov/go-openai" 5 | "krillin-ai/config" 6 | "net/http" 7 | ) 8 | 9 | type Client struct { 10 | client *openai.Client 11 | } 12 | 13 | func NewClient(baseUrl, apiKey, proxyAddr string) *Client { 14 | cfg := openai.DefaultConfig(apiKey) 15 | if baseUrl != "" { 16 | cfg.BaseURL = baseUrl 17 | } 18 | 19 | if proxyAddr != "" { 20 | transport := &http.Transport{ 21 | Proxy: http.ProxyURL(config.Conf.App.ParsedProxy), 22 | } 23 | cfg.HTTPClient = &http.Client{ 24 | Transport: transport, 25 | } 26 | } 27 | 28 | client := openai.NewClientWithConfig(cfg) 29 | return &Client{client: client} 30 | } 31 | -------------------------------------------------------------------------------- /pkg/whisper/whisper.go: -------------------------------------------------------------------------------- 1 | package whisper 2 | 3 | import ( 4 | "context" 5 | "github.com/sashabaranov/go-openai" 6 | "go.uber.org/zap" 7 | "krillin-ai/internal/types" 8 | "krillin-ai/log" 9 | "strings" 10 | ) 11 | 12 | func (c *Client) Transcription(audioFile, language, workDir string) (*types.TranscriptionData, error) { 13 | resp, err := c.client.CreateTranscription( 14 | context.Background(), 15 | openai.AudioRequest{ 16 | Model: openai.Whisper1, 17 | FilePath: audioFile, 18 | Format: openai.AudioResponseFormatVerboseJSON, 19 | TimestampGranularities: []openai.TranscriptionTimestampGranularity{ 20 | openai.TranscriptionTimestampGranularityWord, 21 | }, 22 | Language: language, 23 | }, 24 | ) 25 | if err != nil { 26 | log.GetLogger().Error("openai create transcription failed", zap.Error(err)) 27 | return nil, err 28 | } 29 | 30 | transcriptionData := &types.TranscriptionData{ 31 | Language: resp.Language, 32 | Text: strings.ReplaceAll(resp.Text, "-", " "), // 连字符处理,因为模型存在很多错误添加到连字符 33 | Words: make([]types.Word, 0), 34 | } 35 | num := 0 36 | for _, word := range resp.Words { 37 | if strings.Contains(word.Word, "—") { 38 | // 对称切分 39 | mid := (word.Start + word.End) / 2 40 | seperatedWords := strings.Split(word.Word, "—") 41 | transcriptionData.Words = append(transcriptionData.Words, []types.Word{ 42 | { 43 | Num: num, 44 | Text: seperatedWords[0], 45 | Start: word.Start, 46 | End: mid, 47 | }, 48 | { 49 | Num: num + 1, 50 | Text: seperatedWords[1], 51 | Start: mid, 52 | End: word.End, 53 | }, 54 | }...) 55 | num += 2 56 | } else { 57 | transcriptionData.Words = append(transcriptionData.Words, types.Word{ 58 | Num: num, 59 | Text: word.Word, 60 | Start: word.Start, 61 | End: word.End, 62 | }) 63 | num++ 64 | } 65 | } 66 | 67 | return transcriptionData, nil 68 | } 69 | -------------------------------------------------------------------------------- /pkg/whispercpp/init.go: -------------------------------------------------------------------------------- 1 | package whispercpp 2 | 3 | type WhispercppProcessor struct { 4 | WorkDir string // 生成中间文件的目录 5 | Model string 6 | } 7 | 8 | func NewWhispercppProcessor(model string) *WhispercppProcessor { 9 | return &WhispercppProcessor{ 10 | Model: model, 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /pkg/whispercpp/transcription.go: -------------------------------------------------------------------------------- 1 | package whispercpp 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "krillin-ai/internal/storage" 7 | "krillin-ai/internal/types" 8 | "krillin-ai/log" 9 | "krillin-ai/pkg/util" 10 | "os" 11 | "os/exec" 12 | "regexp" 13 | "strconv" 14 | "strings" 15 | 16 | "go.uber.org/zap" 17 | ) 18 | 19 | func (c *WhispercppProcessor) Transcription(audioFile, language, workDir string) (*types.TranscriptionData, error) { 20 | name := util.ChangeFileExtension(audioFile, "") 21 | cmdArgs := []string{ 22 | "-m", fmt.Sprintf("./models/whispercpp/ggml-%s.bin", c.Model), 23 | "--output-json-full", 24 | "--flash-attn", 25 | "--split-on-word", 26 | "--language", language, 27 | "--output-file", name, 28 | "--file", audioFile, 29 | } 30 | cmd := exec.Command(storage.WhispercppPath, cmdArgs...) 31 | log.GetLogger().Info("WhispercppProcessor转录开始", zap.String("cmd", cmd.String())) 32 | output, err := cmd.CombinedOutput() 33 | if err != nil && !strings.Contains(string(output), "output_json: saving output to") { 34 | log.GetLogger().Error("WhispercppProcessor cmd 执行失败", zap.String("output", string(output)), zap.Error(err)) 35 | return nil, err 36 | } 37 | log.GetLogger().Info("WhispercppProcessor转录json生成完毕", zap.String("audio file", audioFile)) 38 | 39 | var result types.WhispercppOutput 40 | fileData, err := os.Open(util.ChangeFileExtension(audioFile, ".json")) 41 | if err != nil { 42 | log.GetLogger().Error("WhispercppProcessor 打开json文件失败", zap.Error(err)) 43 | return nil, err 44 | } 45 | defer fileData.Close() 46 | decoder := json.NewDecoder(fileData) 47 | if err = decoder.Decode(&result); err != nil { 48 | log.GetLogger().Error("WhispercppProcessor 解析json文件失败", zap.Error(err)) 49 | return nil, err 50 | } 51 | 52 | var ( 53 | transcriptionData types.TranscriptionData 54 | num int 55 | ) 56 | for _, segment := range result.Transcription { 57 | transcriptionData.Text += strings.ReplaceAll(segment.Text, "—", " ") // 连字符处理,因为模型存在很多错误添加到连字符 58 | for _, word := range segment.Tokens { 59 | fromSec, err := parseTimestampToSeconds(word.Timestamps.From) 60 | if err != nil { 61 | log.GetLogger().Error("解析开始时间失败", zap.Error(err)) 62 | return nil, err 63 | } 64 | 65 | toSec, err := parseTimestampToSeconds(word.Timestamps.To) 66 | if err != nil { 67 | log.GetLogger().Error("解析结束时间失败", zap.Error(err)) 68 | return nil, err 69 | } 70 | regex := regexp.MustCompile(`^\[.*\]$`) 71 | if regex.MatchString(word.Text) { 72 | continue 73 | } else if strings.Contains(word.Text, "—") { 74 | // 对称切分 75 | mid := (fromSec + toSec) / 2 76 | seperatedWords := strings.Split(word.Text, "—") 77 | transcriptionData.Words = append(transcriptionData.Words, []types.Word{ 78 | { 79 | Num: num, 80 | Text: util.CleanPunction(strings.TrimSpace(seperatedWords[0])), 81 | Start: fromSec, 82 | End: mid, 83 | }, 84 | { 85 | Num: num + 1, 86 | Text: util.CleanPunction(strings.TrimSpace(seperatedWords[1])), 87 | Start: mid, 88 | End: toSec, 89 | }, 90 | }...) 91 | num += 2 92 | } else { 93 | transcriptionData.Words = append(transcriptionData.Words, types.Word{ 94 | Num: num, 95 | Text: util.CleanPunction(strings.TrimSpace(word.Text)), 96 | Start: fromSec, 97 | End: toSec, 98 | }) 99 | num++ 100 | } 101 | } 102 | } 103 | log.GetLogger().Info("WhispercppProcessor转录成功") 104 | return &transcriptionData, nil 105 | } 106 | 107 | // 新增时间戳转换函数 108 | func parseTimestampToSeconds(timeStr string) (float64, error) { 109 | parts := strings.Split(timeStr, ",") 110 | if len(parts) != 2 { 111 | return 0, fmt.Errorf("invalid timestamp format: %s", timeStr) 112 | } 113 | 114 | timePart := strings.Split(parts[0], ":") 115 | if len(timePart) != 3 { 116 | return 0, fmt.Errorf("invalid time format: %s", parts[0]) 117 | } 118 | 119 | hours, _ := strconv.Atoi(timePart[0]) 120 | minutes, _ := strconv.Atoi(timePart[1]) 121 | seconds, _ := strconv.Atoi(timePart[2]) 122 | milliseconds, _ := strconv.Atoi(parts[1]) 123 | 124 | return float64(hours*3600+minutes*60+seconds) + float64(milliseconds)/1000, nil 125 | } 126 | -------------------------------------------------------------------------------- /pkg/whisperkit/init.go: -------------------------------------------------------------------------------- 1 | package whisperkit 2 | 3 | type WhisperKitProcessor struct { 4 | WorkDir string // 生成中间文件的目录 5 | Model string 6 | } 7 | 8 | func NewWhisperKitProcessor(model string) *WhisperKitProcessor { 9 | return &WhisperKitProcessor{ 10 | Model: model, 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /pkg/whisperkit/transcription.go: -------------------------------------------------------------------------------- 1 | package whisperkit 2 | 3 | import ( 4 | "encoding/json" 5 | "krillin-ai/internal/storage" 6 | "krillin-ai/internal/types" 7 | "krillin-ai/log" 8 | "krillin-ai/pkg/util" 9 | "os" 10 | "os/exec" 11 | "strings" 12 | 13 | "go.uber.org/zap" 14 | ) 15 | 16 | func (c *WhisperKitProcessor) Transcription(audioFile, language, workDir string) (*types.TranscriptionData, error) { 17 | cmdArgs := []string{ 18 | "transcribe", 19 | "--model-path", "./models/whisperkit/openai_whisper-large-v2", 20 | "--audio-encoder-compute-units", "all", 21 | "--text-decoder-compute-units", "all", 22 | "--language", language, 23 | "--report", 24 | "--report-path", workDir, 25 | "--word-timestamps", 26 | "--skip-special-tokens", 27 | "--audio-path", audioFile, 28 | } 29 | cmd := exec.Command(storage.WhisperKitPath, cmdArgs...) 30 | log.GetLogger().Info("WhisperKitProcessor转录开始", zap.String("cmd", cmd.String())) 31 | output, err := cmd.CombinedOutput() 32 | if err != nil { 33 | log.GetLogger().Error("WhisperKitProcessor cmd 执行失败", zap.String("output", string(output)), zap.Error(err)) 34 | return nil, err 35 | } 36 | log.GetLogger().Info("WhisperKitProcessor转录json生成完毕", zap.String("audio file", audioFile)) 37 | 38 | var result types.WhisperKitOutput 39 | fileData, err := os.Open(util.ChangeFileExtension(audioFile, ".json")) 40 | if err != nil { 41 | log.GetLogger().Error("WhisperKitProcessor 打开json文件失败", zap.Error(err)) 42 | return nil, err 43 | } 44 | defer fileData.Close() 45 | decoder := json.NewDecoder(fileData) 46 | if err = decoder.Decode(&result); err != nil { 47 | log.GetLogger().Error("WhisperKitProcessor 解析json文件失败", zap.Error(err)) 48 | return nil, err 49 | } 50 | 51 | var ( 52 | transcriptionData types.TranscriptionData 53 | num int 54 | ) 55 | for _, segment := range result.Segments { 56 | transcriptionData.Text += strings.ReplaceAll(segment.Text, "—", " ") // 连字符处理,因为模型存在很多错误添加到连字符 57 | for _, word := range segment.Words { 58 | if strings.Contains(word.Word, "—") { 59 | // 对称切分 60 | mid := (word.Start + word.End) / 2 61 | seperatedWords := strings.Split(word.Word, "—") 62 | transcriptionData.Words = append(transcriptionData.Words, []types.Word{ 63 | { 64 | Num: num, 65 | Text: util.CleanPunction(strings.TrimSpace(seperatedWords[0])), 66 | Start: word.Start, 67 | End: mid, 68 | }, 69 | { 70 | Num: num + 1, 71 | Text: util.CleanPunction(strings.TrimSpace(seperatedWords[1])), 72 | Start: mid, 73 | End: word.End, 74 | }, 75 | }...) 76 | num += 2 77 | } else { 78 | transcriptionData.Words = append(transcriptionData.Words, types.Word{ 79 | Num: num, 80 | Text: util.CleanPunction(strings.TrimSpace(word.Word)), 81 | Start: word.Start, 82 | End: word.End, 83 | }) 84 | num++ 85 | } 86 | } 87 | } 88 | log.GetLogger().Info("WhisperKitProcessor转录成功") 89 | return &transcriptionData, nil 90 | } 91 | -------------------------------------------------------------------------------- /static/background.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/d32920bc6b13724e0f39287bf6952cfc323b943e/static/background.jpg -------------------------------------------------------------------------------- /static/embed.go: -------------------------------------------------------------------------------- 1 | package static 2 | 3 | import "embed" 4 | 5 | //go:embed index.html background.jpg 6 | var EmbeddedFiles embed.FS 7 | --------------------------------------------------------------------------------