├── .github └── workflows │ ├── gpt-translate.yml │ └── release.yml ├── .gitignore ├── .goreleaser.yaml ├── Dockerfile ├── LICENSE ├── README.md ├── aliyun.md ├── cmd ├── desktop │ └── main.go └── server │ └── main.go ├── config ├── config-example.toml └── config.go ├── docker.md ├── docs ├── ar │ ├── README.md │ ├── aliyun.md │ ├── docker.md │ └── faq.md ├── de │ ├── README.md │ ├── aliyun.md │ ├── docker.md │ └── faq.md ├── es │ ├── README.md │ ├── aliyun.md │ ├── docker.md │ └── faq.md ├── fr │ ├── README.md │ ├── aliyun.md │ ├── docker.md │ └── faq.md ├── images │ ├── alignment.png │ ├── aliyun_accesskey_1.png │ ├── aliyun_oss_1.png │ ├── aliyun_oss_2.png │ ├── aliyun_oss_3.png │ ├── aliyun_oss_4.png │ ├── aliyun_oss_5.png │ ├── aliyun_speech_1.png │ ├── aliyun_speech_2.png │ ├── aliyun_speech_3.png │ ├── aliyun_speech_4.png │ ├── bailian_1.png │ ├── export_cookies.png │ ├── logo.jpg │ ├── ui.jpg │ └── ui_desktop.png ├── jp │ ├── README.md │ ├── aliyun.md │ ├── docker.md │ └── faq.md ├── kr │ ├── README.md │ ├── aliyun.md │ ├── docker.md │ └── faq.md ├── pt │ ├── README.md │ ├── aliyun.md │ ├── docker.md │ └── faq.md ├── rus │ ├── README.md │ ├── aliyun.md │ ├── docker.md │ └── faq.md ├── vi │ ├── README.md │ ├── aliyun.md │ ├── docker.md │ └── faq.md └── zh │ ├── README.md │ ├── aliyun.md │ ├── docker.md │ └── faq.md ├── faq.md ├── go.mod ├── go.sum ├── internal ├── api │ └── subtitle.go ├── deps │ └── checker.go ├── desktop │ ├── components.go │ ├── desktop.go │ ├── file.go │ ├── subtitle.go │ ├── theme.go │ └── ui.go ├── dto │ └── subtitle_task.go ├── handler │ ├── init.go │ ├── middleware.go │ └── subtitle_task.go ├── response │ └── response.go ├── router │ └── router.go ├── server │ └── server.go ├── service │ ├── audio2subtitle.go │ ├── audio2subtitle_test.go │ ├── get_video_info.go │ ├── init.go │ ├── link2file.go │ ├── srt2speech.go │ ├── srt_embed.go │ ├── subtitle_service.go │ └── upload_subtitle.go ├── storage │ ├── bin.go │ └── subtitle_task.go └── types │ ├── embed_subtitle.go │ ├── fasterwhisper.go │ ├── interface.go │ ├── language.go │ ├── subtitle_task.go │ ├── whispercpp.go │ └── whisperkit.go ├── log └── zap.go ├── pkg ├── aliyun │ ├── asr.go │ ├── base.go │ ├── chat.go │ ├── oss.go │ ├── tts.go │ └── voice_clone.go ├── fasterwhisper │ ├── init.go │ └── transcription.go ├── openai │ ├── init.go │ └── openai.go ├── util │ ├── audio.go │ ├── base.go │ ├── download.go │ ├── language.go │ ├── subtitle.go │ └── video.go ├── whisper │ ├── init.go │ └── whisper.go ├── whispercpp │ ├── init.go │ └── transcription.go └── whisperkit │ ├── init.go │ └── transcription.go └── static ├── background.jpg ├── embed.go └── index.html /.github/workflows/gpt-translate.yml: -------------------------------------------------------------------------------- 1 | name: GPT Translate 2 | 3 | on: 4 | issue_comment: 5 | types: [created] 6 | 7 | jobs: 8 | gpt_translate: 9 | if: contains(github.event.comment.body, '/gt') || contains(github.event.comment.body, '/gpt-translate') 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v4 13 | 14 | - name: Run GPT Translate 15 | uses: PairZhu/gpt-translate@master 16 | with: 17 | apikey: ${{ secrets.OPENAI_API_KEY }} 18 | model: ${{ secrets.OPENAI_MODEL }} 19 | basePath: ${{ secrets.OPENAI_BASE_URL }} 20 | prompt: '请将给定文本翻译为目标语言 {targetLanguage}。翻译带链接的内容时请勿修改任何链接地址,但可以在合适的情况修改链接文本。在合适的非代码位置可以修改标点符号的全半角。请确保翻译后的文本符合 {targetFileExt} 文件的语法和格式。请勿添加任何额外的解释或注释。文本内容如下:' 21 | 22 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: release 2 | 3 | on: 4 | push: 5 | tags: 6 | - "v*" 7 | - "v*-*" 8 | 9 | permissions: 10 | contents: write 11 | 12 | jobs: 13 | build-desktop: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: Checkout code 17 | uses: actions/checkout@v4 18 | 19 | - name: Set up Go 20 | uses: actions/setup-go@v5 21 | with: 22 | go-version-file: "go.mod" 23 | 24 | - name: Install XGO 25 | run: | 26 | go install src.techknowlogick.com/xgo@latest 27 | 28 | - name: Pull Docker Image 29 | run: | 30 | docker pull ghcr.io/techknowlogick/xgo:latest 31 | 32 | - name: Get Version 33 | id: version 34 | uses: actions/github-script@v7 35 | with: 36 | script: | 37 | const rawTag = '${{ github.ref_name }}'; 38 | const version = rawTag.replace(/^v/, ''); // Remove the leading 'v' if present 39 | console.log(`Version: ${version}`); 40 | core.setOutput('version', version); 41 | 42 | - name: Build Binary 43 | run: | 44 | targets=( 45 | # macOS (amd64) 46 | "darwin amd64 _amd64 macOS" 47 | # macOS (arm64) 48 | "darwin arm64 _arm64 macOS" 49 | # Windows (amd64) 50 | "windows amd64 .exe Windows" 51 | # Windows (386) 52 | "windows 386 _i386.exe Windows" 53 | ) 54 | mkdir -p build 55 | 56 | # 遍历所有平台 57 | for entry in "${targets[@]}"; do 58 | ( 59 | # 拆分字符串 60 | IFS=' ' read -r -a parts <<< "$entry" 61 | os="${parts[0]}" 62 | arch="${parts[1]}" 63 | suffix="${parts[2]}" 64 | display_os="${parts[3]}" 65 | log_prefix="[${os}-${arch}]" 66 | # 构建目标目录 67 | target_dir="dist/${os}_${arch}" 68 | mkdir -p "$target_dir" 69 | # 使用 xgo 构建 70 | echo "${log_prefix} 🚀 Building for $os/$arch..." 71 | xgo \ 72 | --targets="$os/$arch" \ 73 | --out "krillinai_desktop" \ 74 | --dest "$target_dir" \ 75 | ./cmd/desktop 2>&1 | sed "s/^/${log_prefix} /" 76 | # 生成最终二进制文件名日志输 77 | binary_name="KrillinAI_${{ steps.version.outputs.version }}_Desktop_${display_os}${suffix}" 78 | # 移动并重命名文件 79 | mv "$target_dir"/krillinai_desktop* "build/$binary_name" 80 | echo "${log_prefix} ✅ Built: build/$binary_name" 81 | ) & 82 | done 83 | 84 | wait 85 | echo "✨ All concurrent tasks completed!" 86 | 87 | - name: Upload artifacts 88 | uses: actions/upload-artifact@v4 89 | with: 90 | path: build/* 91 | retention-days: 1 92 | 93 | goreleaser: 94 | needs: build-desktop 95 | if: always() 96 | runs-on: ubuntu-latest 97 | steps: 98 | - name: Set up QEMU 99 | uses: docker/setup-qemu-action@v3 100 | 101 | - name: Set up Docker Buildx 102 | uses: docker/setup-buildx-action@v3 103 | 104 | - name: Checkout code 105 | uses: actions/checkout@v4 106 | with: 107 | fetch-depth: 0 108 | 109 | - name: Download artifacts 110 | uses: actions/download-artifact@v4 111 | with: 112 | path: build 113 | 114 | - name: Set up Go 115 | uses: actions/setup-go@v5 116 | with: 117 | go-version-file: "go.mod" 118 | 119 | - name: Login to Docker Hub 120 | uses: docker/login-action@v3 121 | with: 122 | username: ${{ secrets.DOCKER_USERNAME }} 123 | password: ${{ secrets.DOCKERHUB_TOKEN }} 124 | 125 | - name: Run GoReleaser 126 | uses: goreleaser/goreleaser-action@v6 127 | with: 128 | distribution: goreleaser 129 | version: latest 130 | args: release --clean 131 | env: 132 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 133 | DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} 134 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | tasks/ 3 | cookies.txt 4 | .vscode/ 5 | config/config.toml 6 | bin/ 7 | models/ 8 | uploads/ 9 | app.log 10 | build/ 11 | dist/ 12 | 13 | # MACOS 14 | .DS_Store 15 | ._* 16 | -------------------------------------------------------------------------------- /.goreleaser.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | builds: 4 | - env: 5 | - CGO_ENABLED=0 6 | goos: 7 | - darwin 8 | - linux 9 | - windows 10 | main: ./cmd/server/main.go 11 | 12 | # Docker 构建配置 13 | dockers: 14 | - image_templates: 15 | - "{{ tolower .Env.DOCKER_USERNAME }}/{{ tolower .ProjectName }}:{{.Version}}-amd64" 16 | - "{{ tolower .Env.DOCKER_USERNAME }}/{{ tolower .ProjectName }}:latest-amd64" 17 | dockerfile: Dockerfile 18 | use: buildx 19 | build_flag_templates: 20 | - "--platform=linux/amd64" 21 | - "--label=org.opencontainers.image.created={{.Date}}" 22 | - "--label=org.opencontainers.image.title={{.ProjectName}}" 23 | - "--label=org.opencontainers.image.revision={{.FullCommit}}" 24 | - "--label=org.opencontainers.image.version={{.Version}}" 25 | - "--label=org.opencontainers.image.source=https://github.com/{{ .Env.GITHUB_REPOSITORY_OWNER }}/{{.ProjectName}}" 26 | - image_templates: 27 | - "{{ tolower .Env.DOCKER_USERNAME }}/{{ tolower .ProjectName }}:{{.Version}}-arm64" 28 | - "{{ tolower .Env.DOCKER_USERNAME }}/{{ tolower .ProjectName }}:latest-arm64" 29 | dockerfile: Dockerfile 30 | use: buildx 31 | build_flag_templates: 32 | - "--platform=linux/arm64" 33 | - "--label=org.opencontainers.image.created={{.Date}}" 34 | - "--label=org.opencontainers.image.title={{.ProjectName}}" 35 | - "--label=org.opencontainers.image.revision={{.FullCommit}}" 36 | - "--label=org.opencontainers.image.version={{.Version}}" 37 | - "--label=org.opencontainers.image.source=https://github.com/{{ .Env.GITHUB_REPOSITORY_OWNER }}/{{.ProjectName}}" 38 | 39 | docker_manifests: 40 | - name_template: "{{ tolower .Env.DOCKER_USERNAME }}/{{ tolower .ProjectName }}:{{.Version}}" 41 | image_templates: 42 | - "{{ tolower .Env.DOCKER_USERNAME }}/{{ tolower .ProjectName }}:{{.Version}}-amd64" 43 | - "{{ tolower .Env.DOCKER_USERNAME }}/{{ tolower .ProjectName }}:{{.Version}}-arm64" 44 | - name_template: "{{ tolower .Env.DOCKER_USERNAME }}/{{ tolower .ProjectName }}:latest" 45 | image_templates: 46 | - "{{ tolower .Env.DOCKER_USERNAME }}/{{ tolower .ProjectName }}:latest-amd64" 47 | - "{{ tolower .Env.DOCKER_USERNAME }}/{{ tolower .ProjectName }}:latest-arm64" 48 | 49 | archives: 50 | - formats: ["binary"] 51 | name_template: >- 52 | {{ .ProjectName }}_ 53 | {{- .Version }}_ 54 | {{- if eq .Os "darwin" }}macOS_{{ .Arch }} 55 | {{- else if and (eq .Os "windows") (eq .Arch "amd64") }}{{ title .Os }} 56 | {{- else }}{{ title .Os }}_ 57 | {{- if eq .Arch "amd64" }}x86_64 58 | {{- else if eq .Arch "386" }}i386 59 | {{- else }}{{ .Arch }}{{ end }} 60 | {{- if .Arm }}v{{ .Arm }}{{ end }} 61 | {{- end }} 62 | 63 | release: 64 | extra_files: 65 | - glob: "build/*" 66 | 67 | changelog: 68 | sort: asc 69 | filters: 70 | exclude: 71 | - "^docs:" 72 | - "^test:" 73 | - "^chore:" 74 | - "^ci:" 75 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:latest 2 | 3 | WORKDIR /app 4 | 5 | RUN apt-get update && \ 6 | apt-get install -y --no-install-recommends wget ca-certificates ffmpeg && \ 7 | rm -rf /var/lib/apt/lists/* 8 | 9 | RUN mkdir -p bin && \ 10 | ARCH=$(uname -m) && \ 11 | case "$ARCH" in \ 12 | x86_64) \ 13 | URL="https://github.com/yt-dlp/yt-dlp/releases/download/2025.01.15/yt-dlp_linux"; \ 14 | ;; \ 15 | armv7l) \ 16 | URL="https://github.com/yt-dlp/yt-dlp/releases/download/2025.01.15/yt-dlp_linux_armv7l"; \ 17 | ;; \ 18 | aarch64) \ 19 | URL="https://github.com/yt-dlp/yt-dlp/releases/download/2025.01.15/yt-dlp_linux_aarch64"; \ 20 | ;; \ 21 | *) \ 22 | echo "Unsupported architecture: $ARCH" && exit 1; \ 23 | ;; \ 24 | esac && \ 25 | wget -O bin/yt-dlp "$URL" && \ 26 | chmod +x bin/yt-dlp 27 | 28 | COPY KrillinAI ./ 29 | 30 | RUN mkdir -p /app/models && \ 31 | chmod +x ./KrillinAI 32 | 33 | VOLUME ["/app/bin", "/app/models"] 34 | 35 | ENV PATH="/app/bin:${PATH}" 36 | 37 | EXPOSE 8888/tcp 38 | 39 | ENTRYPOINT ["./KrillinAI"] 40 | -------------------------------------------------------------------------------- /aliyun.md: -------------------------------------------------------------------------------- 1 | ## Prerequisites 2 | You need to have an [Alibaba Cloud](https://www.aliyun.com) account and complete real-name verification. Most services have free quotas. 3 | 4 | ## Obtaining Alibaba Cloud `access_key_id` and `access_key_secret` 5 | 1. Go to the [Alibaba Cloud AccessKey management page](https://ram.console.aliyun.com/profile/access-keys). 6 | 2. Click on "Create AccessKey." If needed, select the usage method as "Used in local development environment." 7 | ![Alibaba Cloud access key](/docs/images/aliyun_accesskey_1.png) 8 | 3. Keep it safe; it's best to copy it to a local file for storage. 9 | 10 | ## Activating Alibaba Cloud Voice Service 11 | 1. Go to the [Alibaba Cloud Voice Service management page](https://nls-portal.console.aliyun.com/applist). You need to activate the service on your first visit. 12 | 2. Click on "Create Project." 13 | ![Alibaba Cloud speech](/docs/images/aliyun_speech_1.png) 14 | 3. Select features and activate them. 15 | ![Alibaba Cloud speech](/docs/images/aliyun_speech_2.png) 16 | 4. The "Streaming Text-to-Speech (CosyVoice Large Model)" needs to be upgraded to the commercial version; other services can use the free trial version. 17 | ![Alibaba Cloud speech](/docs/images/aliyun_speech_3.png) 18 | 5. Simply copy the app key. 19 | ![Alibaba Cloud speech](/docs/images/aliyun_speech_4.png) 20 | 21 | ## Activating Alibaba Cloud OSS Service 22 | 1. Go to the [Alibaba Cloud Object Storage Service Console](https://oss.console.aliyun.com/overview). You need to activate the service on your first visit. 23 | 2. Select the Bucket list on the left, then click "Create." 24 | ![Alibaba Cloud OSS](/docs/images/aliyun_oss_1.png) 25 | 3. Choose "Quick Create," fill in a compliant Bucket name, and select the **Shanghai** region to complete the creation (the name you enter here will be the value for the configuration item `aliyun.oss.bucket`). 26 | ![Alibaba Cloud OSS](/docs/images/aliyun_oss_2.png) 27 | 4. After creation, enter the Bucket. 28 | ![Alibaba Cloud OSS](/docs/images/aliyun_oss_3.png) 29 | 5. Turn off the "Block Public Access" switch and set the read and write permissions to "Public Read." 30 | ![Alibaba Cloud OSS](/docs/images/aliyun_oss_4.png) 31 | ![Alibaba Cloud OSS](/docs/images/aliyun_oss_5.png) -------------------------------------------------------------------------------- /cmd/desktop/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "go.uber.org/zap" 5 | "krillin-ai/config" 6 | "krillin-ai/internal/desktop" 7 | "krillin-ai/internal/server" 8 | "krillin-ai/log" 9 | "os" 10 | ) 11 | 12 | func main() { 13 | log.InitLogger() 14 | defer log.GetLogger().Sync() 15 | 16 | if !config.LoadConfig() { 17 | // 确保有最基础的配置 18 | err := config.SaveConfig() 19 | if err != nil { 20 | log.GetLogger().Error("保存配置失败", zap.Error(err)) 21 | os.Exit(1) 22 | } 23 | } 24 | go func() { 25 | if err := server.StartBackend(); err != nil { 26 | log.GetLogger().Error("后端服务启动失败", zap.Error(err)) 27 | os.Exit(1) 28 | } 29 | }() 30 | config.ConfigBackup = config.Conf 31 | desktop.Show() 32 | } 33 | -------------------------------------------------------------------------------- /cmd/server/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "go.uber.org/zap" 5 | "krillin-ai/config" 6 | "krillin-ai/internal/deps" 7 | "krillin-ai/internal/server" 8 | "krillin-ai/log" 9 | "os" 10 | ) 11 | 12 | func main() { 13 | log.InitLogger() 14 | defer log.GetLogger().Sync() 15 | 16 | var err error 17 | if !config.LoadConfig() { 18 | return 19 | } 20 | 21 | if err = config.CheckConfig(); err != nil { 22 | log.GetLogger().Error("加载配置失败", zap.Error(err)) 23 | return 24 | } 25 | 26 | if err = deps.CheckDependency(); err != nil { 27 | log.GetLogger().Error("依赖环境准备失败", zap.Error(err)) 28 | return 29 | } 30 | if err = server.StartBackend(); err != nil { 31 | log.GetLogger().Error("后端服务启动失败", zap.Error(err)) 32 | os.Exit(1) 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /config/config-example.toml: -------------------------------------------------------------------------------- 1 | [app] 2 | segment_duration = 5 # 音频切分处理间隔,单位:分钟,建议值:5-10,如果视频中话语较少可以适当提高 3 | transcribe_parallel_num = 1 # 并发进行转录的数量上限,建议值:1-3,如果使用了本地模型,最好调成1 4 | translate_parallel_num = 3 # 并发进行翻译的数量上限,建议值:3,倍于转录的并发量,如果使用TPM限制严格的API,可以适当调低 5 | transcribe_max_attempts = 3 # 转录最大尝试次数,建议值:3 6 | translate_max_attempts = 5 # 翻译最大尝试次数,建议值:5,如果模型参数量较少或翻译失败率较高可以适当调高 7 | proxy = "" # 网络代理地址,格式如http://127.0.0.1:7890,可不填 8 | 9 | [server] 10 | host = "127.0.0.1" 11 | port = 8888 12 | 13 | # 下方的配置不是都要填,请结合文档说明进行配置 14 | 15 | [llm] #支持openai,deepseek,通义千问等所有兼容openai请求格式的模型服务 16 | base_url = "" # 自定义base url,可配合转发站密钥使用,留空为openai官方api 17 | api_key = "" # API密钥 18 | model = "" # 指定模型名,可通过此字段结合base_url使用外部任何与OpenAI API兼容的大模型服务,留空默认为gpt-4o-mini 19 | 20 | [transcribe] # 视频转文本支持多种方案,配置时先填provider,再填对应的配置 21 | provider = "openai" #语音识别,当前可选值:openai,fasterwhisper,whisperkit,whisper.cpp,aliyun。(fasterwhisper不支持macOS,whisperkit只支持M芯片) 22 | [transcribe.openai] 23 | base_url = "" 24 | api_key = "" 25 | model = "whisper-1" 26 | [transcribe.fasterwhisper] 27 | model = "medium" # fasterwhisper的本地模型可选值:tiny,medium,large-v2。建议medium及以上 28 | [transcribe.whisperkit] 29 | model = "large-v2" # whisperkit的本地模型可选值:large-v2 30 | [transcribe.whispercpp] 31 | model = "large-v2" # whispercpp的本地模型可选值:large-v2 32 | [transcribe.aliyun] # provider选aliyun这块就都要填 33 | [tts.aliyun.oss] 34 | access_key_id = "" 35 | access_key_secret = "" 36 | bucket = "" 37 | [transcribe.aliyun.speech] 38 | access_key_id = "" 39 | access_key_secret = "" 40 | app_key= "" 41 | 42 | [tts] 43 | provider = "aliyun" # 可选值:openai,aliyun 44 | [tts.openai] 45 | base_url = "" 46 | api_key = "" 47 | model = "" # gpt-4o-mini-tts, tts-1, tts-1-hd 48 | [tts.aliyun] # provider选aliyun这块就都要填 49 | [tts.aliyun.oss] 50 | access_key_id = "" 51 | access_key_secret = "" 52 | bucket = "" 53 | [tts.aliyun.speech] 54 | access_key_id = "" 55 | access_key_secret = "" 56 | app_key= "" -------------------------------------------------------------------------------- /config/config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "krillin-ai/log" 7 | "net/url" 8 | "os" 9 | "path/filepath" 10 | "runtime" 11 | 12 | "github.com/BurntSushi/toml" 13 | "go.uber.org/zap" 14 | ) 15 | 16 | var ConfigBackup Config // 用于在开始任务之前,检测配置是否更新,更新后要重启服务端 17 | 18 | type App struct { 19 | SegmentDuration int `toml:"segment_duration"` 20 | TranscribeParallelNum int `toml:"transcribe_parallel_num"` 21 | TranslateParallelNum int `toml:"translate_parallel_num"` 22 | TranscribeMaxAttempts int `toml:"transcribe_max_attempts"` 23 | TranslateMaxAttempts int `toml:"translate_max_attempts"` 24 | Proxy string `toml:"proxy"` 25 | ParsedProxy *url.URL `toml:"-"` 26 | } 27 | 28 | type Server struct { 29 | Host string `toml:"host"` 30 | Port int `toml:"port"` 31 | } 32 | 33 | type OpenaiCompatibleConfig struct { 34 | BaseUrl string `toml:"base_url"` 35 | ApiKey string `toml:"api_key"` 36 | Model string `toml:"model"` 37 | } 38 | 39 | type LocalModelConfig struct { 40 | Model string `toml:"model"` 41 | } 42 | 43 | type AliyunSpeechConfig struct { 44 | AccessKeyId string `toml:"access_key_id"` 45 | AccessKeySecret string `toml:"access_key_secret"` 46 | AppKey string `toml:"app_key"` 47 | } 48 | 49 | type AliyunOssConfig struct { 50 | AccessKeyId string `toml:"access_key_id"` 51 | AccessKeySecret string `toml:"access_key_secret"` 52 | Bucket string `toml:"bucket"` 53 | } 54 | 55 | type AliyunTranscribeConfig struct { 56 | Oss AliyunOssConfig `toml:"oss"` 57 | Speech AliyunSpeechConfig `toml:"speech"` 58 | } 59 | 60 | type Transcribe struct { 61 | Provider string `toml:"provider"` 62 | Openai OpenaiCompatibleConfig `toml:"openai"` 63 | Fasterwhisper LocalModelConfig `toml:"fasterwhisper"` 64 | Whisperkit LocalModelConfig `toml:"whisperkit"` 65 | Whispercpp LocalModelConfig `toml:"whispercpp"` 66 | Aliyun AliyunTranscribeConfig `toml:"aliyun"` 67 | } 68 | 69 | type AliyunTtsConfig struct { 70 | Oss AliyunOssConfig `toml:"oss"` 71 | Speech AliyunSpeechConfig `toml:"speech"` 72 | } 73 | 74 | type Tts struct { 75 | Provider string `toml:"provider"` 76 | Openai OpenaiCompatibleConfig `toml:"openai"` 77 | Aliyun AliyunTtsConfig `toml:"aliyun"` 78 | } 79 | 80 | type OpenAiWhisper struct { 81 | BaseUrl string `toml:"base_url"` 82 | ApiKey string `toml:"api_key"` 83 | } 84 | 85 | type Config struct { 86 | App App `toml:"app"` 87 | Server Server `toml:"server"` 88 | Llm OpenaiCompatibleConfig `toml:"llm"` 89 | Transcribe Transcribe `toml:"transcribe"` 90 | Tts Tts `toml:"tts"` 91 | } 92 | 93 | var Conf = Config{ 94 | App: App{ 95 | SegmentDuration: 5, 96 | TranslateParallelNum: 3, 97 | TranscribeParallelNum: 1, 98 | TranscribeMaxAttempts: 3, 99 | TranslateMaxAttempts: 3, 100 | }, 101 | Server: Server{ 102 | Host: "127.0.0.1", 103 | Port: 8888, 104 | }, 105 | Llm: OpenaiCompatibleConfig{ 106 | Model: "gpt-4o-mini", 107 | }, 108 | Transcribe: Transcribe{ 109 | Provider: "openai", 110 | Openai: OpenaiCompatibleConfig{ 111 | Model: "whisper-1", 112 | }, 113 | Fasterwhisper: LocalModelConfig{ 114 | Model: "large-v2", 115 | }, 116 | Whisperkit: LocalModelConfig{ 117 | Model: "large-v2", 118 | }, 119 | Whispercpp: LocalModelConfig{ 120 | Model: "large-v2", 121 | }, 122 | }, 123 | Tts: Tts{ 124 | Provider: "openai", 125 | Openai: OpenaiCompatibleConfig{ 126 | Model: "gpt-4o-mini-tts", 127 | }, 128 | }, 129 | } 130 | 131 | // 检查必要的配置是否完整 132 | func validateConfig() error { 133 | // 检查转写服务提供商配置 134 | switch Conf.Transcribe.Provider { 135 | case "openai": 136 | if Conf.Transcribe.Openai.ApiKey == "" { 137 | return errors.New("使用OpenAI转录服务需要配置 OpenAI API Key") 138 | } 139 | case "fasterwhisper": 140 | if Conf.Transcribe.Fasterwhisper.Model != "tiny" && Conf.Transcribe.Fasterwhisper.Model != "medium" && Conf.Transcribe.Fasterwhisper.Model != "large-v2" { 141 | return errors.New("检测到开启了fasterwhisper,但模型选型配置不正确,请检查配置") 142 | } 143 | case "whisperkit": 144 | if runtime.GOOS != "darwin" { 145 | log.GetLogger().Error("whisperkit只支持macos", zap.String("当前系统", runtime.GOOS)) 146 | return fmt.Errorf("whisperkit只支持macos") 147 | } 148 | if Conf.Transcribe.Whisperkit.Model != "large-v2" { 149 | return errors.New("检测到开启了whisperkit,但模型选型配置不正确,请检查配置") 150 | } 151 | case "whispercpp": 152 | if runtime.GOOS != "windows" { // 当前先仅支持win,模型仅支持large-v2,最小化产品 153 | log.GetLogger().Error("whispercpp only support windows", zap.String("current os", runtime.GOOS)) 154 | return fmt.Errorf("whispercpp only support windows") 155 | } 156 | if Conf.Transcribe.Whispercpp.Model != "large-v2" { 157 | return errors.New("检测到开启了whisper.cpp,但模型选型配置不正确,请检查配置") 158 | } 159 | case "aliyun": 160 | if Conf.Transcribe.Aliyun.Speech.AccessKeyId == "" || Conf.Transcribe.Aliyun.Speech.AccessKeySecret == "" || Conf.Transcribe.Aliyun.Speech.AppKey == "" { 161 | return errors.New("使用阿里云语音服务需要配置相关密钥") 162 | } 163 | default: 164 | return errors.New("不支持的转录提供商") 165 | } 166 | 167 | return nil 168 | } 169 | 170 | func LoadConfig() bool { 171 | var err error 172 | configPath := "./config/config.toml" 173 | if _, err = os.Stat(configPath); os.IsNotExist(err) { 174 | log.GetLogger().Info("未找到配置文件") 175 | return false 176 | } else { 177 | log.GetLogger().Info("已找到配置文件,从配置文件中加载配置") 178 | if _, err = toml.DecodeFile(configPath, &Conf); err != nil { 179 | log.GetLogger().Error("加载配置文件失败", zap.Error(err)) 180 | return false 181 | } 182 | return true 183 | } 184 | } 185 | 186 | // 验证配置 187 | func CheckConfig() error { 188 | var err error 189 | // 解析代理地址 190 | Conf.App.ParsedProxy, err = url.Parse(Conf.App.Proxy) 191 | if err != nil { 192 | return err 193 | } 194 | return validateConfig() 195 | } 196 | 197 | // SaveConfig 保存配置到文件 198 | func SaveConfig() error { 199 | configPath := filepath.Join("config", "config.toml") 200 | 201 | if _, err := os.Stat(configPath); os.IsNotExist(err) { 202 | err = os.MkdirAll(filepath.Dir(configPath), os.ModePerm) 203 | if err != nil { 204 | return err 205 | } 206 | } 207 | 208 | data, err := toml.Marshal(Conf) 209 | if err != nil { 210 | return err 211 | } 212 | 213 | err = os.WriteFile(configPath, data, 0644) 214 | if err != nil { 215 | return err 216 | } 217 | 218 | return nil 219 | } 220 | -------------------------------------------------------------------------------- /docker.md: -------------------------------------------------------------------------------- 1 | # Docker Deployment Guide 2 | 3 | ## Quick Start 4 | First, prepare the configuration file, setting the server listening port to `8888` and the server listening address to `0.0.0.0`. 5 | 6 | ### Starting with docker run 7 | ```bash 8 | docker run -d \ 9 | -p 8888:8888 \ 10 | -v /path/to/config.toml:/app/config/config.toml \ 11 | -v /path/to/tasks:/app/tasks \ 12 | asteria798/krillinai 13 | ``` 14 | 15 | ### Starting with docker-compose 16 | ```yaml 17 | version: '3' 18 | services: 19 | krillin: 20 | image: asteria798/krillinai 21 | ports: 22 | - "8888:8888" 23 | volumes: 24 | - /path/to/config.toml:/app/config/config.toml # Configuration file 25 | - /path/to/tasks:/app/tasks # Output directory 26 | ``` 27 | 28 | ## Persisting Models 29 | If using the fasterwhisper model, KrillinAI will automatically download the necessary files to the `/app/models` and `/app/bin` directories. These files will be lost when the container is deleted. To persist the models, you can map these two directories to a directory on the host. 30 | 31 | ### Starting with docker run 32 | ```bash 33 | docker run -d \ 34 | -p 8888:8888 \ 35 | -v /path/to/config.toml:/app/config/config.toml \ 36 | -v /path/to/tasks:/app/tasks \ 37 | -v /path/to/models:/app/models \ 38 | -v /path/to/bin:/app/bin \ 39 | asteria798/krillinai 40 | ``` 41 | 42 | ### Starting with docker-compose 43 | ```yaml 44 | version: '3' 45 | services: 46 | krillin: 47 | image: asteria798/krillinai 48 | ports: 49 | - "8888:8888" 50 | volumes: 51 | - /path/to/config.toml:/app/config/config.toml 52 | - /path/to/tasks:/app/tasks 53 | - /path/to/models:/app/models 54 | - /path/to/bin:/app/bin 55 | ``` 56 | 57 | ## Notes 58 | 1. If the network mode of the Docker container is not set to host, it is recommended to set the server listening address in the configuration file to `0.0.0.0`, otherwise the service may not be accessible. 59 | 2. If the container needs to access the host's network proxy, please set the proxy address configuration item `proxy`'s `127.0.0.1` to `host.docker.internal`, for example, `http://host.docker.internal:7890`. -------------------------------------------------------------------------------- /docs/ar/aliyun.md: -------------------------------------------------------------------------------- 1 | ## المتطلبات الأساسية 2 | يجب أن يكون لديك حساب على [علي بابا كلاود](https://www.aliyun.com) وتم التحقق من هويتك، حيث أن معظم الخدمات تحتوي على حصة مجانية. 3 | 4 | ## الحصول على `access_key_id` و `access_key_secret` من علي بابا كلاود 5 | 1. انتقل إلى [صفحة إدارة AccessKey في علي بابا كلاود](https://ram.console.aliyun.com/profile/access-keys). 6 | 2. انقر على إنشاء AccessKey، وإذا لزم الأمر، اختر طريقة الاستخدام، واختر "استخدام في بيئة التطوير المحلية". 7 | ![علي بابا كلاود access key](/docs/images/aliyun_accesskey_1.png) 8 | 3. احفظه بشكل آمن، من الأفضل نسخه إلى ملف محلي. 9 | 10 | ## تفعيل خدمة الصوت من علي بابا كلاود 11 | 1. انتقل إلى [صفحة إدارة خدمة الصوت من علي بابا كلاود](https://nls-portal.console.aliyun.com/applist)، وعند الدخول لأول مرة، يجب تفعيل الخدمة. 12 | 2. انقر على إنشاء مشروع. 13 | ![علي بابا كلاود speech](/docs/images/aliyun_speech_1.png) 14 | 3. اختر الوظائف وقم بالتفعيل. 15 | ![علي بابا كلاود speech](/docs/images/aliyun_speech_2.png) 16 | 4. "توليد الصوت النصي المتدفق (نموذج CosyVoice الكبير)" يحتاج إلى الترقية إلى النسخة التجارية، بينما يمكن استخدام الخدمات الأخرى بنسخة التجربة المجانية. 17 | ![علي بابا كلاود speech](/docs/images/aliyun_speech_3.png) 18 | 5. انسخ مفتاح التطبيق فقط. 19 | ![علي بابا كلاود speech](/docs/images/aliyun_speech_4.png) 20 | 21 | ## تفعيل خدمة OSS من علي بابا كلاود 22 | 1. انتقل إلى [وحدة التحكم في خدمة التخزين الكائني من علي بابا كلاود](https://oss.console.aliyun.com/overview)، وعند الدخول لأول مرة، يجب تفعيل الخدمة. 23 | 2. اختر قائمة Buckets من الجانب الأيسر، ثم انقر على إنشاء. 24 | ![علي بابا كلاود OSS](/docs/images/aliyun_oss_1.png) 25 | 3. اختر الإنشاء السريع، املأ اسم Bucket الذي يتوافق مع المتطلبات واختر منطقة **شنغهاي**، ثم أكمل الإنشاء (الاسم المدخل هنا هو قيمة الإعداد `aliyun.oss.bucket`). 26 | ![علي بابا كلاود OSS](/docs/images/aliyun_oss_2.png) 27 | 4. بعد الانتهاء من الإنشاء، انتقل إلى Bucket. 28 | ![علي بابا كلاود OSS](/docs/images/aliyun_oss_3.png) 29 | 5. قم بإيقاف تشغيل مفتاح "منع الوصول العام"، واضبط أذونات القراءة والكتابة على "قراءة عامة". 30 | ![علي بابا كلاود OSS](/docs/images/aliyun_oss_4.png) 31 | ![علي بابا كلاود OSS](/docs/images/aliyun_oss_5.png) -------------------------------------------------------------------------------- /docs/ar/docker.md: -------------------------------------------------------------------------------- 1 | # دليل نشر Docker 2 | 3 | ## البدء السريع 4 | قم أولاً بإعداد ملف التكوين، واضبط منفذ الاستماع للخادم على `8888`، وعنوان الاستماع للخادم على `0.0.0.0`. 5 | 6 | ### بدء تشغيل docker run 7 | ```bash 8 | docker run -d \ 9 | -p 8888:8888 \ 10 | -v /path/to/config.toml:/app/config/config.toml \ 11 | -v /path/to/tasks:/app/tasks \ 12 | asteria798/krillinai 13 | ``` 14 | 15 | ### بدء تشغيل docker-compose 16 | ```yaml 17 | version: '3' 18 | services: 19 | krillin: 20 | image: asteria798/krillinai 21 | ports: 22 | - "8888:8888" 23 | volumes: 24 | - /path/to/config.toml:/app/config/config.toml # ملف التكوين 25 | - /path/to/tasks:/app/tasks # دليل الإخراج 26 | ``` 27 | 28 | ## نماذج الاستمرارية 29 | إذا كنت تستخدم نموذج fasterwhisper، سيقوم KrillinAI بتنزيل الملفات المطلوبة للنموذج تلقائيًا إلى دليل `/app/models` ودليل `/app/bin`. ستفقد هذه الملفات بعد حذف الحاوية. إذا كنت بحاجة إلى استمرارية النموذج، يمكنك ربط هذين الدليلين بدليل المضيف. 30 | 31 | ### بدء تشغيل docker run 32 | ```bash 33 | docker run -d \ 34 | -p 8888:8888 \ 35 | -v /path/to/config.toml:/app/config/config.toml \ 36 | -v /path/to/tasks:/app/tasks \ 37 | -v /path/to/models:/app/models \ 38 | -v /path/to/bin:/app/bin \ 39 | asteria798/krillinai 40 | ``` 41 | 42 | ### بدء تشغيل docker-compose 43 | ```yaml 44 | version: '3' 45 | services: 46 | krillin: 47 | image: asteria798/krillinai 48 | ports: 49 | - "8888:8888" 50 | volumes: 51 | - /path/to/config.toml:/app/config/config.toml 52 | - /path/to/tasks:/app/tasks 53 | - /path/to/models:/app/models 54 | - /path/to/bin:/app/bin 55 | ``` 56 | 57 | ## ملاحظات 58 | 1. إذا لم يكن وضع الشبكة لحاوية docker هو host، يُنصح بتعيين عنوان الاستماع لخادم ملف التكوين على `0.0.0.0`، وإلا قد لا تتمكن من الوصول إلى الخدمة. 59 | 2. إذا كانت الحاوية بحاجة إلى الوصول إلى وكيل الشبكة للمضيف، يرجى تعيين عنوان الوكيل في خيار التكوين `proxy` من `127.0.0.1` إلى `host.docker.internal`، على سبيل المثال `http://host.docker.internal:7890`. -------------------------------------------------------------------------------- /docs/ar/faq.md: -------------------------------------------------------------------------------- 1 | ### 1. لا يمكن رؤية ملف إعدادات `app.log`، ولا يمكن معرفة محتوى الخطأ 2 | يرجى من مستخدمي Windows وضع دليل العمل لهذا البرنامج في مجلد غير موجود على القرص C. 3 | 4 | ### 2. تم إنشاء ملف الإعدادات في النسخة غير المكتبية، ولكن لا يزال يظهر الخطأ "لا يمكن العثور على ملف الإعدادات" 5 | تأكد من أن اسم ملف الإعدادات هو `config.toml`، وليس `config.toml.txt` أو أي شيء آخر. بعد الانتهاء من الإعداد، يجب أن تكون بنية مجلد العمل لهذا البرنامج كما يلي: 6 | ``` 7 | /── config/ 8 | │ └── config.toml 9 | ├── cookies.txt (<- ملف cookies.txt اختياري) 10 | └── krillinai.exe 11 | ``` 12 | 13 | ### 3. تم ملء إعدادات النموذج الكبير، ولكن يظهر الخطأ "xxxxx يحتاج إلى إعداد xxxxx API Key" 14 | على الرغم من أن خدمات النموذج وخدمات الصوت يمكن أن تستخدم كلاهما خدمات openai، إلا أن هناك أيضًا سيناريوهات حيث يستخدم النموذج الكبير خدمات غير openai بشكل منفصل، لذا فإن هذين الإعدادين منفصلان. بالإضافة إلى إعدادات النموذج الكبير، يرجى البحث عن إعدادات whisper أدناه لملء المفتاح والمعلومات المقابلة. 15 | 16 | ### 4. يظهر الخطأ "yt-dlp error" 17 | مشكلة في برنامج تنزيل الفيديو، والتي تبدو حاليًا مجرد مشكلة في الشبكة أو إصدار برنامج التنزيل. تحقق مما إذا كان وكيل الشبكة مفتوحًا ومكونًا في إعدادات ملف الإعدادات، كما يُنصح باختيار نقطة اتصال في هونغ كونغ. يتم تثبيت برنامج التنزيل تلقائيًا بواسطة هذا البرنامج، وسأقوم بتحديث مصدر التثبيت، ولكن نظرًا لأنه ليس مصدرًا رسميًا، فقد يكون هناك تأخير. إذا واجهت مشكلة، حاول تحديثه يدويًا، وطريقة التحديث هي: 18 | 19 | افتح الطرفية في موقع دليل bin للبرنامج، ثم نفذ 20 | ``` 21 | ./yt-dlp.exe -U 22 | ``` 23 | استبدل هنا `yt-dlp.exe` باسم برنامج ytdlp الفعلي في نظامك. 24 | 25 | ### 5. بعد النشر، يتم إنشاء الترجمة بشكل طبيعي، ولكن الترجمة المدمجة في الفيديو تحتوي على الكثير من الرموز غير المفهومة 26 | معظمها بسبب نقص خطوط اللغة الصينية في Linux. يرجى تنزيل خط [微软雅黑](https://modelscope.cn/models/Maranello/KrillinAI_dependency_cn/resolve/master/%E5%AD%97%E4%BD%93/msyh.ttc) و[微软雅黑-bold](https://modelscope.cn/models/Maranello/KrillinAI_dependency_cn/resolve/master/%E5%AD%97%E4%BD%93/msyhbd.ttc) (أو اختيار خط يلبي متطلباتك)، ثم اتبع الخطوات التالية: 27 | 1. أنشئ مجلد msyh في /usr/share/fonts/ وانسخ الخطوط التي تم تنزيلها إلى هذا الدليل 28 | 2. 29 | ``` 30 | cd /usr/share/fonts/msyh 31 | sudo mkfontscale 32 | sudo mkfontdir 33 | fc-cache 34 | ``` 35 | 36 | ### 6. كيف يمكن ملء رمز الصوت في تحويل النص إلى كلام؟ 37 | يرجى الرجوع إلى وثائق مزود خدمة الصوت، وفيما يلي ما يتعلق بهذا المشروع: 38 | [وثائق OpenAI TTS](https://platform.openai.com/docs/guides/text-to-speech/api-reference)، الموجودة في خيارات الصوت 39 | [وثائق تفاعل الصوت الذكي من علي بابا](https://help.aliyun.com/zh/isi/developer-reference/overview-of-speech-synthesis)، الموجودة في قائمة الصوت - قيمة معلمة voice -------------------------------------------------------------------------------- /docs/de/aliyun.md: -------------------------------------------------------------------------------- 1 | ## Voraussetzungen 2 | Sie benötigen ein [Alibaba Cloud](https://www.aliyun.com) Konto, das durch eine echte Identitätsprüfung verifiziert wurde. Die meisten Dienste bieten ein kostenloses Kontingent. 3 | 4 | ## Abrufen von `access_key_id` und `access_key_secret` für Alibaba Cloud 5 | 1. Gehen Sie zur [Alibaba Cloud AccessKey-Verwaltungsseite](https://ram.console.aliyun.com/profile/access-keys). 6 | 2. Klicken Sie auf "AccessKey erstellen". Wählen Sie bei Bedarf die Verwendungsmethode "In der lokalen Entwicklungsumgebung verwenden". 7 | ![Alibaba Cloud access key](/docs/images/aliyun_accesskey_1.png) 8 | 3. Bewahren Sie diese sicher auf, am besten kopieren Sie sie in eine lokale Datei. 9 | 10 | ## Aktivierung des Alibaba Cloud Sprachdienstes 11 | 1. Gehen Sie zur [Alibaba Cloud Sprachdienstverwaltungsseite](https://nls-portal.console.aliyun.com/applist). Bei der ersten Anmeldung müssen Sie den Dienst aktivieren. 12 | 2. Klicken Sie auf "Projekt erstellen". 13 | ![Alibaba Cloud speech](/docs/images/aliyun_speech_1.png) 14 | 3. Wählen Sie die Funktionen aus und aktivieren Sie sie. 15 | ![Alibaba Cloud speech](/docs/images/aliyun_speech_2.png) 16 | 4. "Stream Text-to-Speech (CosyVoice großes Modell)" muss auf die kommerzielle Version aktualisiert werden, andere Dienste können mit der kostenlosen Testversion verwendet werden. 17 | ![Alibaba Cloud speech](/docs/images/aliyun_speech_3.png) 18 | 5. Kopieren Sie einfach den App-Key. 19 | ![Alibaba Cloud speech](/docs/images/aliyun_speech_4.png) 20 | 21 | ## Aktivierung des Alibaba Cloud OSS-Dienstes 22 | 1. Gehen Sie zur [Alibaba Cloud Object Storage Service-Konsole](https://oss.console.aliyun.com/overview). Bei der ersten Anmeldung müssen Sie den Dienst aktivieren. 23 | 2. Wählen Sie in der linken Spalte die Bucket-Liste aus und klicken Sie auf "Erstellen". 24 | ![Alibaba Cloud OSS](/docs/images/aliyun_oss_1.png) 25 | 3. Wählen Sie "Schneller erstellen", geben Sie einen Bucket-Namen ein, der den Anforderungen entspricht, und wählen Sie die Region **Shanghai** aus, um die Erstellung abzuschließen (der hier eingegebene Name ist der Wert für die Konfiguration `aliyun.oss.bucket`). 26 | ![Alibaba Cloud OSS](/docs/images/aliyun_oss_2.png) 27 | 4. Nach der Erstellung gehen Sie in den Bucket. 28 | ![Alibaba Cloud OSS](/docs/images/aliyun_oss_3.png) 29 | 5. Deaktivieren Sie den Schalter "Öffentlichen Zugriff blockieren" und setzen Sie die Lese- und Schreibberechtigungen auf "Öffentlich lesbar". 30 | ![Alibaba Cloud OSS](/docs/images/aliyun_oss_4.png) 31 | ![Alibaba Cloud OSS](/docs/images/aliyun_oss_5.png) -------------------------------------------------------------------------------- /docs/de/docker.md: -------------------------------------------------------------------------------- 1 | # Docker Bereitstellungsanleitung 2 | 3 | ## Schnellstart 4 | Bereiten Sie zunächst die Konfigurationsdatei vor und setzen Sie den Server-Listener-Port auf `8888` und die Server-Listener-Adresse auf `0.0.0.0`. 5 | 6 | ### docker run starten 7 | ```bash 8 | docker run -d \ 9 | -p 8888:8888 \ 10 | -v /path/to/config.toml:/app/config/config.toml \ 11 | -v /path/to/tasks:/app/tasks \ 12 | asteria798/krillinai 13 | ``` 14 | 15 | ### docker-compose starten 16 | ```yaml 17 | version: '3' 18 | services: 19 | krillin: 20 | image: asteria798/krillinai 21 | ports: 22 | - "8888:8888" 23 | volumes: 24 | - /path/to/config.toml:/app/config/config.toml # Konfigurationsdatei 25 | - /path/to/tasks:/app/tasks # Ausgabeverzeichnis 26 | ``` 27 | 28 | ## Modellpersistenz 29 | Wenn das fasterwhisper-Modell verwendet wird, lädt KrillinAI automatisch die benötigten Dateien in das Verzeichnis `/app/models` und das Verzeichnis `/app/bin`. Diese Dateien gehen verloren, wenn der Container gelöscht wird. Um das Modell zu persistieren, können Sie diese beiden Verzeichnisse auf ein Verzeichnis des Hosts abbilden. 30 | 31 | ### docker run starten 32 | ```bash 33 | docker run -d \ 34 | -p 8888:8888 \ 35 | -v /path/to/config.toml:/app/config/config.toml \ 36 | -v /path/to/tasks:/app/tasks \ 37 | -v /path/to/models:/app/models \ 38 | -v /path/to/bin:/app/bin \ 39 | asteria798/krillinai 40 | ``` 41 | 42 | ### docker-compose starten 43 | ```yaml 44 | version: '3' 45 | services: 46 | krillin: 47 | image: asteria798/krillinai 48 | ports: 49 | - "8888:8888" 50 | volumes: 51 | - /path/to/config.toml:/app/config/config.toml 52 | - /path/to/tasks:/app/tasks 53 | - /path/to/models:/app/models 54 | - /path/to/bin:/app/bin 55 | ``` 56 | 57 | ## Hinweise 58 | 1. Wenn der Netzwerkmodus des Docker-Containers nicht `host` ist, wird empfohlen, die Server-Listener-Adresse der Konfigurationsdatei auf `0.0.0.0` zu setzen, da sonst der Zugriff auf den Dienst möglicherweise nicht möglich ist. 59 | 2. Wenn der Container auf den Netzwerkproxy des Hosts zugreifen muss, setzen Sie die Proxy-Adresse in der Konfiguration `proxy` von `127.0.0.1` auf `host.docker.internal`, z. B. `http://host.docker.internal:7890`. -------------------------------------------------------------------------------- /docs/de/faq.md: -------------------------------------------------------------------------------- 1 | ### 1. `app.log` Konfigurationsdatei nicht sichtbar, Fehlerinhalt kann nicht ermittelt werden 2 | Windows-Benutzer sollten das Arbeitsverzeichnis dieser Software in einen Ordner außerhalb des C-Laufwerks legen. 3 | 4 | ### 2. Die Konfigurationsdatei wurde zwar erstellt, aber es erscheint der Fehler „Konfigurationsdatei nicht gefunden“ 5 | Stellen Sie sicher, dass der Dateiname der Konfigurationsdatei `config.toml` ist und nicht `config.toml.txt` oder etwas anderes. 6 | Nach der Konfiguration sollte die Struktur des Arbeitsordners dieser Software wie folgt aussehen: 7 | ``` 8 | /── config/ 9 | │ └── config.toml 10 | ├── cookies.txt (<- optionaler cookies.txt Datei) 11 | └── krillinai.exe 12 | ``` 13 | 14 | ### 3. Große Modellkonfiguration ausgefüllt, aber der Fehler „xxxxx benötigt die Konfiguration des xxxxx API-Schlüssels“ erscheint 15 | Obwohl sowohl der Modellservice als auch der Sprachdienst die Dienste von OpenAI nutzen können, gibt es auch Szenarien, in denen große Modelle unabhängig von OpenAI verwendet werden. Daher sind diese beiden Konfigurationen getrennt. Neben der großen Modellkonfiguration suchen Sie bitte im unteren Bereich der Konfiguration nach den Whisper-Konfigurationen, um die entsprechenden Schlüssel und Informationen auszufüllen. 16 | 17 | ### 4. Fehler enthält „yt-dlp error“ 18 | Das Problem mit dem Video-Downloader scheint derzeit nur ein Netzwerkproblem oder ein Versionsproblem des Downloaders zu sein. Überprüfen Sie, ob der Netzwerkproxy aktiviert ist und ob er in den Proxy-Konfigurationseinstellungen der Konfigurationsdatei korrekt konfiguriert ist. Es wird empfohlen, einen Hongkong-Knoten auszuwählen. Der Downloader wird automatisch von dieser Software installiert. Ich werde die Installationsquelle aktualisieren, aber da es sich nicht um eine offizielle Quelle handelt, kann es zu Verzögerungen kommen. Bei Problemen versuchen Sie bitte, manuell zu aktualisieren. Die Aktualisierungsmethode: 19 | 20 | Öffnen Sie ein Terminal im bin-Verzeichnis der Software und führen Sie aus: 21 | ``` 22 | ./yt-dlp.exe -U 23 | ``` 24 | Ersetzen Sie hier `yt-dlp.exe` durch den tatsächlichen Namen der ytdlp-Software in Ihrem System. 25 | 26 | ### 5. Nach der Bereitstellung werden die Untertitel normal generiert, aber die eingebetteten Untertitel im Video enthalten viele Zeichenfehler 27 | In den meisten Fällen liegt dies daran, dass auf Linux die chinesischen Schriftarten fehlen. Bitte laden Sie die Schriftarten [微软雅黑](https://modelscope.cn/models/Maranello/KrillinAI_dependency_cn/resolve/master/%E5%AD%97%E4%BD%93/msyh.ttc) und [微软雅黑-bold](https://modelscope.cn/models/Maranello/KrillinAI_dependency_cn/resolve/master/%E5%AD%97%E4%BD%93/msyhbd.ttc) herunter (oder wählen Sie selbst Schriftarten aus, die Ihren Anforderungen entsprechen), und befolgen Sie dann die folgenden Schritte: 28 | 1. Erstellen Sie einen neuen Ordner namens msyh unter /usr/share/fonts/ und kopieren Sie die heruntergeladenen Schriftarten in dieses Verzeichnis. 29 | 2. 30 | ``` 31 | cd /usr/share/fonts/msyh 32 | sudo mkfontscale 33 | sudo mkfontdir 34 | fc-cache 35 | ``` 36 | 37 | ### 6. Wie fülle ich den Klangcode für die Sprachsynthese aus? 38 | Bitte beziehen Sie sich auf die Dokumentation des Sprachdienstanbieters. Hier sind die relevanten Informationen für dieses Projekt: 39 | [OpenAI TTS-Dokumentation](https://platform.openai.com/docs/guides/text-to-speech/api-reference), zu finden unter Voice options 40 | [Alibaba Cloud Intelligent Speech Interaction Dokumentation](https://help.aliyun.com/zh/isi/developer-reference/overview-of-speech-synthesis), zu finden unter Klangliste - voice Parameterwerte -------------------------------------------------------------------------------- /docs/es/aliyun.md: -------------------------------------------------------------------------------- 1 | ## Requisitos previos 2 | Necesitas tener una cuenta de [Alibaba Cloud](https://www.aliyun.com) y haber completado la verificación de identidad. La mayoría de los servicios tienen un límite gratuito. 3 | 4 | ## Obtención de `access_key_id` y `access_key_secret` de Alibaba Cloud 5 | 1. Accede a la [página de gestión de AccessKey de Alibaba Cloud](https://ram.console.aliyun.com/profile/access-keys). 6 | 2. Haz clic en crear AccessKey. Si es necesario, selecciona el método de uso y elige "Uso en entorno de desarrollo local". 7 | ![Clave de acceso de Alibaba Cloud](/docs/images/aliyun_accesskey_1.png) 8 | 3. Guarda de manera segura, es mejor copiarlo en un archivo local. 9 | 10 | ## Activación del servicio de voz de Alibaba Cloud 11 | 1. Accede a la [página de gestión del servicio de voz de Alibaba Cloud](https://nls-portal.console.aliyun.com/applist). La primera vez que ingreses, necesitarás activar el servicio. 12 | 2. Haz clic en crear proyecto. 13 | ![Voz de Alibaba Cloud](/docs/images/aliyun_speech_1.png) 14 | 3. Selecciona las funciones y actívalas. 15 | ![Voz de Alibaba Cloud](/docs/images/aliyun_speech_2.png) 16 | 4. La "síntesis de voz de texto en streaming (modelo grande CosyVoice)" necesita ser actualizada a la versión comercial; otros servicios pueden utilizar la versión de prueba gratuita. 17 | ![Voz de Alibaba Cloud](/docs/images/aliyun_speech_3.png) 18 | 5. Copia la clave de la aplicación. 19 | ![Voz de Alibaba Cloud](/docs/images/aliyun_speech_4.png) 20 | 21 | ## Activación del servicio OSS de Alibaba Cloud 22 | 1. Accede a la [consola del servicio de almacenamiento de objetos de Alibaba Cloud](https://oss.console.aliyun.com/overview). La primera vez que ingreses, necesitarás activar el servicio. 23 | 2. Selecciona la lista de Buckets en el lado izquierdo y luego haz clic en crear. 24 | ![OSS de Alibaba Cloud](/docs/images/aliyun_oss_1.png) 25 | 3. Selecciona creación rápida, completa un nombre de Bucket que cumpla con los requisitos y elige la región **Shanghái**, y finaliza la creación (el nombre que ingreses aquí será el valor de la configuración `aliyun.oss.bucket`). 26 | ![OSS de Alibaba Cloud](/docs/images/aliyun_oss_2.png) 27 | 4. Una vez creado, accede al Bucket. 28 | ![OSS de Alibaba Cloud](/docs/images/aliyun_oss_3.png) 29 | 5. Desactiva el interruptor de "Bloquear acceso público" y establece los permisos de lectura y escritura en "Lectura pública". 30 | ![OSS de Alibaba Cloud](/docs/images/aliyun_oss_4.png) 31 | ![OSS de Alibaba Cloud](/docs/images/aliyun_oss_5.png) -------------------------------------------------------------------------------- /docs/es/docker.md: -------------------------------------------------------------------------------- 1 | # Guía de implementación de Docker 2 | 3 | ## Comenzar rápidamente 4 | Primero, prepara el archivo de configuración, configurando el puerto de escucha del servidor en `8888` y la dirección de escucha del servidor en `0.0.0.0`. 5 | 6 | ### Inicio con docker run 7 | ```bash 8 | docker run -d \ 9 | -p 8888:8888 \ 10 | -v /path/to/config.toml:/app/config/config.toml \ 11 | -v /path/to/tasks:/app/tasks \ 12 | asteria798/krillinai 13 | ``` 14 | 15 | ### Inicio con docker-compose 16 | ```yaml 17 | version: '3' 18 | services: 19 | krillin: 20 | image: asteria798/krillinai 21 | ports: 22 | - "8888:8888" 23 | volumes: 24 | - /path/to/config.toml:/app/config/config.toml # Archivo de configuración 25 | - /path/to/tasks:/app/tasks # Directorio de salida 26 | ``` 27 | 28 | ## Persistencia del modelo 29 | Si utilizas el modelo fasterwhisper, KrillinAI descargará automáticamente los archivos necesarios para el modelo en el directorio `/app/models` y el directorio `/app/bin`. Estos archivos se perderán al eliminar el contenedor. Si necesitas persistir el modelo, puedes mapear estos dos directorios a un directorio en el host. 30 | 31 | ### Inicio con docker run 32 | ```bash 33 | docker run -d \ 34 | -p 8888:8888 \ 35 | -v /path/to/config.toml:/app/config/config.toml \ 36 | -v /path/to/tasks:/app/tasks \ 37 | -v /path/to/models:/app/models \ 38 | -v /path/to/bin:/app/bin \ 39 | asteria798/krillinai 40 | ``` 41 | 42 | ### Inicio con docker-compose 43 | ```yaml 44 | version: '3' 45 | services: 46 | krillin: 47 | image: asteria798/krillinai 48 | ports: 49 | - "8888:8888" 50 | volumes: 51 | - /path/to/config.toml:/app/config/config.toml 52 | - /path/to/tasks:/app/tasks 53 | - /path/to/models:/app/models 54 | - /path/to/bin:/app/bin 55 | ``` 56 | 57 | ## Consideraciones 58 | 1. Si el modo de red del contenedor de Docker no es host, se recomienda configurar la dirección de escucha del servidor en el archivo de configuración como `0.0.0.0`, de lo contrario, es posible que no se pueda acceder al servicio. 59 | 2. Si el contenedor necesita acceder al proxy de red del host, configura la opción de dirección del proxy `proxy` de `127.0.0.1` a `host.docker.internal`, por ejemplo, `http://host.docker.internal:7890`. -------------------------------------------------------------------------------- /docs/es/faq.md: -------------------------------------------------------------------------------- 1 | ### 1. No se puede ver el archivo de configuración `app.log`, no se puede saber el contenido del error 2 | Los usuarios de Windows deben colocar el directorio de trabajo de este software en una carpeta que no esté en la unidad C. 3 | 4 | ### 2. La versión no de escritorio ha creado el archivo de configuración, pero sigue mostrando el error "No se puede encontrar el archivo de configuración" 5 | Asegúrate de que el nombre del archivo de configuración sea `config.toml`, y no `config.toml.txt` u otro. 6 | Una vez completada la configuración, la estructura de la carpeta de trabajo de este software debería ser la siguiente: 7 | ``` 8 | /── config/ 9 | │ └── config.toml 10 | ├── cookies.txt (<- archivo cookies.txt opcional) 11 | └── krillinai.exe 12 | ``` 13 | 14 | ### 3. Se completó la configuración del modelo grande, pero aparece el error "xxxxx necesita configurar la clave API de xxxxx" 15 | Aunque los servicios de modelo y de voz pueden utilizar ambos los servicios de OpenAI, también hay escenarios en los que el modelo grande utiliza servicios que no son de OpenAI, por lo que estas dos configuraciones son independientes. Además de la configuración del modelo grande, busca la configuración de whisper más abajo para completar la clave y otra información correspondiente. 16 | 17 | ### 4. El error contiene "yt-dlp error" 18 | El problema del descargador de videos, por lo que parece, se reduce a problemas de red o de versión del descargador. Verifica si el proxy de red está habilitado y configurado en la sección de proxy del archivo de configuración, y se recomienda elegir un nodo de Hong Kong. El descargador se instala automáticamente con este software; actualizaré la fuente de instalación, pero no es oficial, por lo que puede haber desactualizaciones. Si encuentras problemas, intenta actualizar manualmente con el siguiente método: 19 | 20 | Abre una terminal en la ubicación del directorio bin del software y ejecuta 21 | ``` 22 | ./yt-dlp.exe -U 23 | ``` 24 | Aquí, reemplaza `yt-dlp.exe` con el nombre real del software ytdlp en tu sistema. 25 | 26 | ### 5. Después de la implementación, la generación de subtítulos es normal, pero los subtítulos incrustados en el video tienen muchos caracteres extraños 27 | La mayoría de las veces esto se debe a la falta de fuentes chinas en Linux. Descarga las fuentes [Microsoft YaHei](https://modelscope.cn/models/Maranello/KrillinAI_dependency_cn/resolve/master/%E5%AD%97%E4%BD%93/msyh.ttc) y [Microsoft YaHei-bold](https://modelscope.cn/models/Maranello/KrillinAI_dependency_cn/resolve/master/%E5%AD%97%E4%BD%93/msyhbd.ttc) (o elige fuentes que satisfagan tus requisitos) y luego sigue estos pasos: 28 | 1. Crea una carpeta msyh en /usr/share/fonts/ y copia las fuentes descargadas en ese directorio. 29 | 2. 30 | ``` 31 | cd /usr/share/fonts/msyh 32 | sudo mkfontscale 33 | sudo mkfontdir 34 | fc-cache 35 | ``` 36 | 37 | ### 6. ¿Cómo se completa el código de tono para la síntesis de voz? 38 | Consulta la documentación del proveedor del servicio de voz; a continuación se presentan los documentos relacionados con este proyecto: 39 | [Documentación de OpenAI TTS](https://platform.openai.com/docs/guides/text-to-speech/api-reference), ubicada en Opciones de voz 40 | [Documentación de interacción de voz inteligente de Alibaba Cloud](https://help.aliyun.com/zh/isi/developer-reference/overview-of-speech-synthesis), ubicada en la lista de tonos - valor del parámetro voice -------------------------------------------------------------------------------- /docs/fr/aliyun.md: -------------------------------------------------------------------------------- 1 | ## Prérequis 2 | Vous devez d'abord avoir un compte [Alibaba Cloud](https://www.aliyun.com) et avoir vérifié votre identité. La plupart des services offrent un quota gratuit. 3 | 4 | ## Obtention de `access_key_id` et `access_key_secret` d'Alibaba Cloud 5 | 1. Accédez à la [page de gestion des AccessKey d'Alibaba Cloud](https://ram.console.aliyun.com/profile/access-keys). 6 | 2. Cliquez sur "Créer AccessKey". Si nécessaire, choisissez le mode d'utilisation, sélectionnez "Utilisation dans un environnement de développement local". 7 | ![Clé d'accès Alibaba Cloud](/docs/images/aliyun_accesskey_1.png) 8 | 3. Conservez-les en toute sécurité, il est préférable de les copier dans un fichier local. 9 | 10 | ## Activation du service de voix d'Alibaba Cloud 11 | 1. Accédez à la [page de gestion du service de voix d'Alibaba Cloud](https://nls-portal.console.aliyun.com/applist). La première fois, vous devez activer le service. 12 | 2. Cliquez sur "Créer un projet". 13 | ![Voix Alibaba Cloud](/docs/images/aliyun_speech_1.png) 14 | 3. Sélectionnez les fonctionnalités et activez-les. 15 | ![Voix Alibaba Cloud](/docs/images/aliyun_speech_2.png) 16 | 4. La "synthèse vocale de texte en continu (modèle CosyVoice)" doit être mise à niveau vers la version commerciale, les autres services peuvent utiliser la version d'essai gratuite. 17 | ![Voix Alibaba Cloud](/docs/images/aliyun_speech_3.png) 18 | 5. Copiez simplement la clé de l'application. 19 | ![Voix Alibaba Cloud](/docs/images/aliyun_speech_4.png) 20 | 21 | ## Activation du service OSS d'Alibaba Cloud 22 | 1. Accédez à la [console de service de stockage d'objets d'Alibaba Cloud](https://oss.console.aliyun.com/overview). La première fois, vous devez activer le service. 23 | 2. Sélectionnez la liste des Buckets à gauche, puis cliquez sur "Créer". 24 | ![OSS Alibaba Cloud](/docs/images/aliyun_oss_1.png) 25 | 3. Choisissez "Création rapide", remplissez un nom de Bucket conforme aux exigences et sélectionnez la région **Shanghai**, puis terminez la création (le nom que vous saisissez ici est la valeur de la configuration `aliyun.oss.bucket`). 26 | ![OSS Alibaba Cloud](/docs/images/aliyun_oss_2.png) 27 | 4. Une fois la création terminée, accédez au Bucket. 28 | ![OSS Alibaba Cloud](/docs/images/aliyun_oss_3.png) 29 | 5. Désactivez l'option "Bloquer l'accès public" et définissez les autorisations de lecture et d'écriture sur "Lecture publique". 30 | ![OSS Alibaba Cloud](/docs/images/aliyun_oss_4.png) 31 | ![OSS Alibaba Cloud](/docs/images/aliyun_oss_5.png) -------------------------------------------------------------------------------- /docs/fr/docker.md: -------------------------------------------------------------------------------- 1 | # Guide de déploiement Docker 2 | 3 | ## Démarrage rapide 4 | Préparez d'abord le fichier de configuration, en définissant le port d'écoute du serveur sur `8888` et l'adresse d'écoute du serveur sur `0.0.0.0`. 5 | 6 | ### Démarrage avec docker run 7 | ```bash 8 | docker run -d \ 9 | -p 8888:8888 \ 10 | -v /path/to/config.toml:/app/config/config.toml \ 11 | -v /path/to/tasks:/app/tasks \ 12 | asteria798/krillinai 13 | ``` 14 | 15 | ### Démarrage avec docker-compose 16 | ```yaml 17 | version: '3' 18 | services: 19 | krillin: 20 | image: asteria798/krillinai 21 | ports: 22 | - "8888:8888" 23 | volumes: 24 | - /path/to/config.toml:/app/config/config.toml # Fichier de configuration 25 | - /path/to/tasks:/app/tasks # Répertoire de sortie 26 | ``` 27 | 28 | ## Modèle de persistance 29 | Si vous utilisez le modèle fasterwhisper, KrillinAI téléchargera automatiquement les fichiers nécessaires au modèle dans le répertoire `/app/models` et le répertoire `/app/bin`. Ces fichiers seront perdus après la suppression du conteneur. Si vous avez besoin de persister le modèle, vous pouvez mapper ces deux répertoires à un répertoire de l'hôte. 30 | 31 | ### Démarrage avec docker run 32 | ```bash 33 | docker run -d \ 34 | -p 8888:8888 \ 35 | -v /path/to/config.toml:/app/config/config.toml \ 36 | -v /path/to/tasks:/app/tasks \ 37 | -v /path/to/models:/app/models \ 38 | -v /path/to/bin:/app/bin \ 39 | asteria798/krillinai 40 | ``` 41 | 42 | ### Démarrage avec docker-compose 43 | ```yaml 44 | version: '3' 45 | services: 46 | krillin: 47 | image: asteria798/krillinai 48 | ports: 49 | - "8888:8888" 50 | volumes: 51 | - /path/to/config.toml:/app/config/config.toml 52 | - /path/to/tasks:/app/tasks 53 | - /path/to/models:/app/models 54 | - /path/to/bin:/app/bin 55 | ``` 56 | 57 | ## Remarques 58 | 1. Si le mode réseau du conteneur Docker n'est pas `host`, il est recommandé de définir l'adresse d'écoute du serveur dans le fichier de configuration sur `0.0.0.0`, sinon le service pourrait ne pas être accessible. 59 | 2. Si le conteneur a besoin d'accéder au proxy réseau de l'hôte, veuillez configurer l'option d'adresse du proxy `proxy` de `127.0.0.1` à `host.docker.internal`, par exemple `http://host.docker.internal:7890`. -------------------------------------------------------------------------------- /docs/fr/faq.md: -------------------------------------------------------------------------------- 1 | ### 1. Impossible de voir le fichier de configuration `app.log`, impossible de connaître le contenu de l'erreur 2 | Les utilisateurs de Windows doivent placer le répertoire de travail de ce logiciel dans un dossier qui n'est pas sur le disque C. 3 | 4 | ### 2. Le fichier de configuration a bien été créé, mais l'erreur "fichier de configuration introuvable" persiste 5 | Assurez-vous que le nom du fichier de configuration est `config.toml`, et non `config.toml.txt` ou autre. Une fois la configuration terminée, la structure du dossier de travail de ce logiciel devrait être la suivante : 6 | ``` 7 | /── config/ 8 | │ └── config.toml 9 | ├── cookies.txt (<- fichier cookies.txt optionnel) 10 | └── krillinai.exe 11 | ``` 12 | 13 | ### 3. La configuration du grand modèle a été remplie, mais l'erreur "xxxxx nécessite la configuration de la clé API xxxxx" apparaît 14 | Bien que les services de modèle et de voix puissent tous deux utiliser les services d'OpenAI, il existe également des scénarios où le grand modèle utilise des services non-OpenAI, c'est pourquoi ces deux configurations sont séparées. En plus de la configuration du grand modèle, veuillez chercher la configuration de whisper en bas pour remplir les clés et autres informations correspondantes. 15 | 16 | ### 4. L'erreur contient "yt-dlp error" 17 | Le problème du téléchargeur vidéo semble être lié à des problèmes de réseau ou de version du téléchargeur. Vérifiez si le proxy réseau est activé et configuré dans les options de proxy du fichier de configuration, et il est conseillé de choisir un nœud à Hong Kong. Le téléchargeur est installé automatiquement par ce logiciel, et bien que je mettrai à jour la source d'installation, ce n'est pas une source officielle, donc il peut y avoir des retards. En cas de problème, essayez de mettre à jour manuellement avec la méthode suivante : 18 | 19 | Ouvrez un terminal dans le répertoire bin du logiciel et exécutez 20 | ``` 21 | ./yt-dlp.exe -U 22 | ``` 23 | Remplacez `yt-dlp.exe` par le nom réel du logiciel ytdlp sur votre système. 24 | 25 | ### 5. Après le déploiement, la génération de sous-titres fonctionne normalement, mais les sous-titres intégrés dans la vidéo contiennent beaucoup de caractères illisibles 26 | Cela est principalement dû à l'absence de polices chinoises sur Linux. Veuillez télécharger les polices [Microsoft YaHei](https://modelscope.cn/models/Maranello/KrillinAI_dependency_cn/resolve/master/%E5%AD%97%E4%BD%93/msyh.ttc) et [Microsoft YaHei Bold](https://modelscope.cn/models/Maranello/KrillinAI_dependency_cn/resolve/master/%E5%AD%97%E4%BD%93/msyhbd.ttc) (ou choisir des polices qui répondent à vos exigences), puis suivez les étapes ci-dessous : 27 | 1. Créez un dossier msyh sous /usr/share/fonts/ et copiez les polices téléchargées dans ce répertoire. 28 | 2. 29 | ``` 30 | cd /usr/share/fonts/msyh 31 | sudo mkfontscale 32 | sudo mkfontdir 33 | fc-cache 34 | ``` 35 | 36 | ### 6. Comment remplir le code de voix pour la synthèse vocale ? 37 | Veuillez vous référer à la documentation du fournisseur de services vocaux, voici les informations pertinentes pour ce projet : 38 | [Documentation OpenAI TTS](https://platform.openai.com/docs/guides/text-to-speech/api-reference), située dans les options de voix 39 | [Documentation d'interaction vocale intelligente d'Alibaba Cloud](https://help.aliyun.com/zh/isi/developer-reference/overview-of-speech-synthesis), située dans la liste des voix - valeur du paramètre voice -------------------------------------------------------------------------------- /docs/images/alignment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/6b1b22bb98aa14b327a257cf24c521cc827aebd1/docs/images/alignment.png -------------------------------------------------------------------------------- /docs/images/aliyun_accesskey_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/6b1b22bb98aa14b327a257cf24c521cc827aebd1/docs/images/aliyun_accesskey_1.png -------------------------------------------------------------------------------- /docs/images/aliyun_oss_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/6b1b22bb98aa14b327a257cf24c521cc827aebd1/docs/images/aliyun_oss_1.png -------------------------------------------------------------------------------- /docs/images/aliyun_oss_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/6b1b22bb98aa14b327a257cf24c521cc827aebd1/docs/images/aliyun_oss_2.png -------------------------------------------------------------------------------- /docs/images/aliyun_oss_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/6b1b22bb98aa14b327a257cf24c521cc827aebd1/docs/images/aliyun_oss_3.png -------------------------------------------------------------------------------- /docs/images/aliyun_oss_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/6b1b22bb98aa14b327a257cf24c521cc827aebd1/docs/images/aliyun_oss_4.png -------------------------------------------------------------------------------- /docs/images/aliyun_oss_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/6b1b22bb98aa14b327a257cf24c521cc827aebd1/docs/images/aliyun_oss_5.png -------------------------------------------------------------------------------- /docs/images/aliyun_speech_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/6b1b22bb98aa14b327a257cf24c521cc827aebd1/docs/images/aliyun_speech_1.png -------------------------------------------------------------------------------- /docs/images/aliyun_speech_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/6b1b22bb98aa14b327a257cf24c521cc827aebd1/docs/images/aliyun_speech_2.png -------------------------------------------------------------------------------- /docs/images/aliyun_speech_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/6b1b22bb98aa14b327a257cf24c521cc827aebd1/docs/images/aliyun_speech_3.png -------------------------------------------------------------------------------- /docs/images/aliyun_speech_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/6b1b22bb98aa14b327a257cf24c521cc827aebd1/docs/images/aliyun_speech_4.png -------------------------------------------------------------------------------- /docs/images/bailian_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/6b1b22bb98aa14b327a257cf24c521cc827aebd1/docs/images/bailian_1.png -------------------------------------------------------------------------------- /docs/images/export_cookies.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/6b1b22bb98aa14b327a257cf24c521cc827aebd1/docs/images/export_cookies.png -------------------------------------------------------------------------------- /docs/images/logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/6b1b22bb98aa14b327a257cf24c521cc827aebd1/docs/images/logo.jpg -------------------------------------------------------------------------------- /docs/images/ui.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/6b1b22bb98aa14b327a257cf24c521cc827aebd1/docs/images/ui.jpg -------------------------------------------------------------------------------- /docs/images/ui_desktop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/6b1b22bb98aa14b327a257cf24c521cc827aebd1/docs/images/ui_desktop.png -------------------------------------------------------------------------------- /docs/jp/README.md: -------------------------------------------------------------------------------- 1 |
2 | KlicStudio 3 | 4 | # 極簡デプロイAI動画翻訳音声ツール 5 | 6 | KrillinAI%2FKlicStudio | Trendshift 7 | 8 | **[English](/README.md)|[简体中文](/docs/zh/README.md)|[日本語](/docs/jp/README.md)|[한국어](/docs/kr/README.md)|[Tiếng Việt](/docs/vi/README.md)|[Français](/docs/fr/README.md)|[Deutsch](/docs/de/README.md)|[Español](/docs/es/README.md)|[Português](/docs/pt/README.md)|[Русский](/docs/rus/README.md)|[اللغة العربية](/docs/ar/README.md)** 9 | 10 | [![Twitter](https://img.shields.io/badge/Twitter-KrillinAI-orange?logo=twitter)](https://x.com/KrillinAI) 11 | [![QQ 群](https://img.shields.io/badge/QQ%20群-754069680-green?logo=tencent-qq)](https://jq.qq.com/?_wv=1027&k=754069680) 12 | [![Bilibili](https://img.shields.io/badge/dynamic/json?label=Bilibili&query=%24.data.follower&suffix=フォロワー&url=https%3A%2F%2Fapi.bilibili.com%2Fx%2Frelation%2Fstat%3Fvmid%3D242124650&logo=bilibili&color=00A1D6&labelColor=FE7398&logoColor=FFFFFF)](https://space.bilibili.com/242124650) 13 | 14 |
15 | 16 | ## プロジェクト概要 ([今すぐオンライン版を体験!](https://www.klic.studio/)) 17 | 18 | Klic StudioはKrillin AIが開発したオールインワンの音声・動画ローカライズおよび強化ソリューションです。このシンプルで強力なツールは、音声・動画翻訳、ナレーション、音声クローンを一体化し、横向き・縦向きのフォーマット出力をサポートし、すべての主要プラットフォーム(Bilibili、小紅書、Douyin、動画号、Kuaishou、YouTube、TikTokなど)で完璧に表示されることを保証します。エンドツーエンドのワークフローを通じて、数回のクリックで元の素材を美しいクロスプラットフォームコンテンツに変換できます。 19 | 20 | ## 主な特徴と機能: 21 | 🎯 **ワンクリック起動**:複雑な環境設定は不要、自動的に依存関係をインストールし、すぐに使用開始。デスクトップ版が新たに追加され、より便利に使用できます! 22 | 23 | 📥 **動画取得**:yt-dlpによるダウンロードまたはローカルファイルのアップロードをサポート 24 | 25 | 📜 **高精度認識**:Whisperに基づく高精度音声認識 26 | 27 | 🧠 **スマートセグメンテーション**:LLMを使用して字幕のセグメンテーションと整列を行います 28 | 29 | 🔄 **用語置換**:専門用語をワンクリックで置換 30 | 31 | 🌍 **専門翻訳**:文脈を考慮したLLM翻訳で自然な意味を保持 32 | 33 | 🎙️ **音声クローン**:CosyVoiceの厳選音色またはカスタム音色のクローンを提供 34 | 35 | 🎬 **動画合成**:横向き・縦向きの動画と字幕のレイアウトを自動処理 36 | 37 | 💻 **クロスプラットフォーム**:Windows、Linux、macOSをサポートし、デスクトップ版とサーバー版を提供 38 | 39 | 40 | ## 効果の展示 41 | 下の画像は46分のローカル動画をインポートし、ワンクリックで生成された字幕ファイルのトラック上の効果で、手動調整は一切ありません。欠落や重複はなく、文の切れ目も自然で、翻訳の質も非常に高いです。 42 | ![整列効果](/docs/images/alignment.png) 43 | 44 | 45 | 46 | 53 | 62 | 63 | 70 | 71 | 72 |
47 | 48 | ### 字幕翻訳 49 | --- 50 | https://github.com/user-attachments/assets/bba1ac0a-fe6b-4947-b58d-ba99306d0339 51 | 52 | 54 | 55 | 56 | 57 | ### ナレーション 58 | --- 59 | https://github.com/user-attachments/assets/0b32fad3-c3ad-4b6a-abf0-0865f0dd2385 60 | 61 | 64 | 65 | ### 縦向き 66 | --- 67 | https://github.com/user-attachments/assets/c2c7b528-0ef8-4ba9-b8ac-f9f92f6d4e71 68 | 69 |
73 | 74 | ## 🔍 音声認識サービスのサポート 75 | _**下表のローカルモデルはすべて自動インストール可能な実行ファイル+モデルファイルをサポートしています。選択するだけで、Klicがすべて準備します。**_ 76 | 77 | | サービス源 | サポートプラットフォーム | モデル選択肢 | ローカル/クラウド | 備考 | 78 | |--------------------|-----------------|----------------------------------------|-------|-------------| 79 | | **OpenAI Whisper** | 全プラットフォーム | - | クラウド | 速度が速く、効果が良い | 80 | | **FasterWhisper** | Windows/Linux | `tiny`/`medium`/`large-v2` (推奨medium+) | ローカル | 速度がさらに速く、クラウドサービスのコストがかからない | 81 | | **WhisperKit** | macOS (Mシリーズチップのみ) | `large-v2` | ローカル | Appleチップに最適化 | 82 | | **WhisperCpp** | 全プラットフォーム | `large-v2` | ローカル | 全プラットフォームをサポート | 83 | | **阿里云ASR** | 全プラットフォーム | - | クラウド | 中国本土のネットワーク問題を回避 | 84 | 85 | ## 🚀 大言語モデルのサポート 86 | 87 | ✅ すべての **OpenAI API仕様** に準拠したクラウド/ローカルの大言語モデルサービスに対応しており、以下を含みますが、これに限定されません: 88 | - OpenAI 89 | - Gemini 90 | - DeepSeek 91 | - 通義千問 92 | - ローカルにデプロイされたオープンソースモデル 93 | - その他OpenAI形式のAPIサービスに対応 94 | 95 | ## 🎤 TTSテキストから音声へのサポート 96 | - 阿里云音声サービス 97 | - OpenAI TTS 98 | 99 | ## 言語サポート 100 | 入力言語サポート:中文、英語、日本語、ドイツ語、トルコ語、韓国語、ロシア語、マレー語(継続的に増加中) 101 | 102 | 翻訳言語サポート:英語、中国語、ロシア語、スペイン語、フランス語など101言語 103 | 104 | ## インターフェースプレビュー 105 | ![インターフェースプレビュー](/docs/images/ui_desktop.png) 106 | 107 | 108 | ## 🚀 クイックスタート 109 | ### 基本ステップ 110 | まず、[Release](https://github.com/KrillinAI/KlicStudio/releases)からあなたのデバイスシステムに合った実行ファイルをダウンロードし、以下のチュートリアルに従ってデスクトップ版か非デスクトップ版を選択し、空のフォルダにソフトウェアをダウンロードしてください。実行後にいくつかのディレクトリが生成されるため、空のフォルダに置くと管理が楽になります。 111 | 112 | 【デスクトップ版の場合、releaseファイルにdesktopが含まれている場合はこちら】 113 | _デスクトップ版は新たにリリースされ、新規ユーザーが設定ファイルを正しく編集するのが難しい問題を解決するために、いくつかのバグがあり、継続的に更新中です。_ 114 | 1. ファイルをダブルクリックするだけで使用開始できます(デスクトップ版も設定が必要です。ソフトウェア内で設定します) 115 | 116 | 【非デスクトップ版の場合、releaseファイルにdesktopが含まれていない場合はこちら】 117 | _非デスクトップ版は最初のバージョンで、設定が比較的複雑ですが、機能は安定しており、サーバーへのデプロイに適しています。ウェブの形式でUIを提供します。_ 118 | 1. フォルダ内に`config`フォルダを作成し、その中に`config.toml`ファイルを作成します。ソースコードの`config`ディレクトリ内の`config-example.toml`ファイルの内容をコピーして`config.toml`に貼り付け、コメントに従って設定情報を記入します。 119 | 2. ダブルクリックするか、ターミナルで実行ファイルを実行してサービスを起動します 120 | 3. ブラウザを開き、`http://127.0.0.1:8888`を入力して使用開始します(8888は設定ファイルに記入したポートに置き換えてください) 121 | 122 | ### To: macOSユーザー 123 | 【デスクトップ版の場合、releaseファイルにdesktopが含まれている場合はこちら】 124 | デスクトップ版は現在、署名などの問題により、ダブルクリックで直接実行したりdmgインストールを行うことができず、手動でアプリを信頼する必要があります。方法は以下の通りです: 125 | 1. ターミナルで実行ファイル(ファイル名がKlicStudio_1.0.0_desktop_macOS_arm64だと仮定)のあるディレクトリを開きます 126 | 2. 次のコマンドを順に実行します: 127 | ``` 128 | sudo xattr -cr ./KlicStudio_1.0.0_desktop_macOS_arm64 129 | sudo chmod +x ./KlicStudio_1.0.0_desktop_macOS_arm64 130 | ./KlicStudio_1.0.0_desktop_macOS_arm64 131 | ``` 132 | 133 | 【非デスクトップ版の場合、releaseファイルにdesktopが含まれていない場合はこちら】 134 | 本ソフトウェアは署名を行っていないため、macOS上で実行する際には、「基本ステップ」でのファイル設定が完了した後、手動でアプリを信頼する必要があります。方法は以下の通りです: 135 | 1. ターミナルで実行ファイル(ファイル名がKlicStudio_1.0.0_macOS_arm64だと仮定)のあるディレクトリを開きます 136 | 2. 次のコマンドを順に実行します: 137 | ``` 138 | sudo xattr -rd com.apple.quarantine ./KlicStudio_1.0.0_macOS_arm64 139 | sudo chmod +x ./KlicStudio_1.0.0_macOS_arm64 140 | ./KlicStudio_1.0.0_macOS_arm64 141 | ``` 142 | これでサービスが起動します 143 | 144 | ### Dockerデプロイ 145 | 本プロジェクトはDockerデプロイをサポートしています。詳細は[Dockerデプロイ説明](./docker.md)を参照してください。 146 | 147 | ### Cookie設定説明(必須ではありません) 148 | 149 | 動画ダウンロードに失敗した場合は、 150 | 151 | [Cookie設定説明](./get_cookies.md)を参照してCookie情報を設定してください。 152 | 153 | ### 設定ヘルプ(必見) 154 | 最も迅速で便利な設定方法: 155 | * `transcribe.provider.name`に`openai`を記入すると、`transcribe.openai`ブロックと`llm`ブロックの大モデル設定を記入するだけで字幕翻訳が可能になります。(`app.proxy`、`model`、`openai.base_url`は状況に応じて記入) 156 | 157 | ローカル音声認識モデルを使用する設定方法(コスト、速度、品質の選択を考慮) 158 | * `transcribe.provider.name`に`fasterwhisper`を記入し、`transcribe.fasterwhisper.model`に`large-v2`を記入、その後`llm`に大モデル設定を記入すれば、字幕翻訳が可能です。ローカルモデルは自動的にダウンロードされます。(`app.proxy`と`openai.base_url`は同様) 159 | 160 | テキストから音声への変換(TTS)はオプションで、設定ロジックは上記と同様に、`tts.provider.name`を記入し、`tts`の下の対応する設定ブロックを記入すればよいです。UI内の音声コードは選択したプロバイダーのドキュメントに従って記入してください(下方のよくある質問にドキュメントのアドレスがあります)。阿里云のakskなどの記入は重複する可能性がありますが、これは設定構造を明確にするためです。 161 | 注意:音声クローンを使用する場合、`tts`は`aliyun`の選択のみをサポートします。 162 | 163 | **阿里云のAccessKey、Bucket、AppKeyの取得方法は**:[阿里云設定説明](./aliyun.md)をお読みください。 164 | 165 | タスク=音声認識+大モデル翻訳+音声サービス(TTSなど、オプション)であることを理解してください。これは設定ファイルを理解するのに役立ちます。 166 | 167 | ## よくある質問 168 | 169 | [よくある質問](./faq.md)をご覧ください。 170 | 171 | ## 貢献規範 172 | 1. 無駄なファイル(.vscode、.ideaなど)を提出しないでください。`.gitignore`を使用してフィルタリングしてください。 173 | 2. config.tomlを提出せず、config-example.tomlを使用して提出してください。 174 | 175 | ## お問い合わせ 176 | 1. 私たちのQQグループに参加して質問を解決してください:754069680 177 | 2. 私たちのソーシャルメディアアカウントをフォローしてください。[Bilibili](https://space.bilibili.com/242124650)、毎日AI技術分野の質の高いコンテンツを共有しています。 178 | 179 | ## Star履歴 180 | 181 | [![Star履歴チャート](https://api.star-history.com/svg?repos=KrillinAI/KlicStudio&type=Date)](https://star-history.com/#KrillinAI/KlicStudio&Date) -------------------------------------------------------------------------------- /docs/jp/aliyun.md: -------------------------------------------------------------------------------- 1 | ## 前提条件 2 | [阿里云](https://www.aliyun.com)のアカウントを作成し、本人確認を行う必要があります。ほとんどのサービスには無料枠があります。 3 | 4 | ## 阿里云`access_key_id`と`access_key_secret`の取得 5 | 1. [阿里云AccessKey管理ページ](https://ram.console.aliyun.com/profile/access-keys)にアクセスします。 6 | 2. AccessKeyを作成するためにクリックし、必要に応じて使用方法を選択し、「ローカル開発環境で使用」を選択します。 7 | ![阿里云access key](/docs/images/aliyun_accesskey_1.png) 8 | 3. 大切に保管し、できればローカルファイルにコピーして保存します。 9 | 10 | ## 阿里云音声サービスの開通 11 | 1. [阿里云音声サービス管理ページ](https://nls-portal.console.aliyun.com/applist)にアクセスし、初めての場合はサービスを開通させる必要があります。 12 | 2. プロジェクトを作成するためにクリックします。 13 | ![阿里云speech](/docs/images/aliyun_speech_1.png) 14 | 3. 機能を選択して開通させます。 15 | ![阿里云speech](/docs/images/aliyun_speech_2.png) 16 | 4. 「ストリーミングテキスト音声合成(CosyVoice大モデル)」は商用版にアップグレードする必要があります。他のサービスは無料体験版を使用できます。 17 | ![阿里云speech](/docs/images/aliyun_speech_3.png) 18 | 5. app keyをコピーします。 19 | ![阿里云speech](/docs/images/aliyun_speech_4.png) 20 | 21 | ## 阿里云OSSサービスの開通 22 | 1. [阿里云オブジェクトストレージサービスコンソール](https://oss.console.aliyun.com/overview)にアクセスし、初めての場合はサービスを開通させる必要があります。 23 | 2. 左側のBucketリストを選択し、次に作成をクリックします。 24 | ![阿里云OSS](/docs/images/aliyun_oss_1.png) 25 | 3. クイック作成を選択し、要件に合ったBucket名を入力し、**上海**地域を選択して作成を完了します(ここに入力した名前が設定項目`aliyun.oss.bucket`の値になります)。 26 | ![阿里云OSS](/docs/images/aliyun_oss_2.png) 27 | 4. 作成が完了したらBucketに入ります。 28 | ![阿里云OSS](/docs/images/aliyun_oss_3.png) 29 | 5. 「公共アクセスをブロック」スイッチをオフにし、読み書き権限を「公共読み取り」に設定します。 30 | ![阿里云OSS](/docs/images/aliyun_oss_4.png) 31 | ![阿里云OSS](/docs/images/aliyun_oss_5.png) -------------------------------------------------------------------------------- /docs/jp/docker.md: -------------------------------------------------------------------------------- 1 | # Docker デプロイガイド 2 | 3 | ## クイックスタート 4 | まず、設定ファイルを準備し、サーバーのリスニングポートを`8888`、サーバーのリスニングアドレスを`0.0.0.0`に設定します。 5 | 6 | ### docker runでの起動 7 | ```bash 8 | docker run -d \ 9 | -p 8888:8888 \ 10 | -v /path/to/config.toml:/app/config/config.toml \ 11 | -v /path/to/tasks:/app/tasks \ 12 | asteria798/krillinai 13 | ``` 14 | 15 | ### docker-composeでの起動 16 | ```yaml 17 | version: '3' 18 | services: 19 | krillin: 20 | image: asteria798/krillinai 21 | ports: 22 | - "8888:8888" 23 | volumes: 24 | - /path/to/config.toml:/app/config/config.toml # 設定ファイル 25 | - /path/to/tasks:/app/tasks # 出力ディレクトリ 26 | ``` 27 | 28 | ## モデルの永続化 29 | fasterwhisperモデルを使用する場合、KrillinAIは自動的にモデルに必要なファイルを`/app/models`ディレクトリと`/app/bin`ディレクトリにダウンロードします。コンテナが削除されると、これらのファイルは失われます。モデルを永続化する必要がある場合は、これらの2つのディレクトリをホストマシンのディレクトリにマッピングしてください。 30 | 31 | ### docker runでの起動 32 | ```bash 33 | docker run -d \ 34 | -p 8888:8888 \ 35 | -v /path/to/config.toml:/app/config/config.toml \ 36 | -v /path/to/tasks:/app/tasks \ 37 | -v /path/to/models:/app/models \ 38 | -v /path/to/bin:/app/bin \ 39 | asteria798/krillinai 40 | ``` 41 | 42 | ### docker-composeでの起動 43 | ```yaml 44 | version: '3' 45 | services: 46 | krillin: 47 | image: asteria798/krillinai 48 | ports: 49 | - "8888:8888" 50 | volumes: 51 | - /path/to/config.toml:/app/config/config.toml 52 | - /path/to/tasks:/app/tasks 53 | - /path/to/models:/app/models 54 | - /path/to/bin:/app/bin 55 | ``` 56 | 57 | ## 注意事項 58 | 1. dockerコンテナのネットワークモードがhostでない場合、設定ファイルのサーバーリスニングアドレスを`0.0.0.0`に設定することをお勧めします。そうしないと、サービスにアクセスできない可能性があります。 59 | 2. コンテナ内でホストマシンのネットワークプロキシにアクセスする必要がある場合、プロキシアドレス設定項目`proxy`の`127.0.0.1`を`host.docker.internal`に設定してください。例えば`http://host.docker.internal:7890`のように。 -------------------------------------------------------------------------------- /docs/jp/faq.md: -------------------------------------------------------------------------------- 1 | ### 1. `app.log`設定ファイルが見えず、エラー内容がわからない 2 | Windowsユーザーは、本ソフトウェアの作業ディレクトリをCドライブ以外のフォルダに配置してください。 3 | 4 | ### 2. デスクトップ版では明らかに設定ファイルが作成されたが、「設定ファイルが見つかりません」とエラーが出る 5 | 設定ファイル名が`config.toml`であることを確認してください。`config.toml.txt`やその他の名前ではありません。 6 | 設定が完了した後、本ソフトウェアの作業フォルダの構造は以下のようになります: 7 | ``` 8 | /── config/ 9 | │ └── config.toml 10 | ├── cookies.txt (<- オプションのcookies.txtファイル) 11 | └── krillinai.exe 12 | ``` 13 | 14 | ### 3. 大モデルの設定を記入したが、「xxxxxにはxxxxx API Keyの設定が必要です」とエラーが出る 15 | モデルサービスと音声サービスは両方ともopenaiのサービスを使用できますが、大モデルが非openaiのシーンで単独使用されることもあるため、これらの設定は分かれています。大モデルの設定に加えて、設定の下にあるwhisper設定で対応するキーなどの情報を記入してください。 16 | 17 | ### 4. エラーに「yt-dlp error」が含まれている 18 | 動画ダウンローダーの問題で、現在のところネットワークの問題かダウンローダーのバージョンの問題である可能性があります。ネットワークプロキシが開いていて、設定ファイルのプロキシ設定項目に正しく構成されているか確認してください。また、香港ノードを選択することをお勧めします。ダウンローダーは本ソフトウェアが自動的にインストールしたもので、インストール元は更新しますが、公式のものではないため、古くなる可能性があります。問題が発生した場合は手動で更新を試みてください。更新方法: 19 | 20 | ソフトウェアのbinディレクトリでターミナルを開き、次のコマンドを実行します。 21 | ``` 22 | ./yt-dlp.exe -U 23 | ``` 24 | ここで`yt-dlp.exe`は、あなたのシステムで実際のytdlpソフトウェア名に置き換えてください。 25 | 26 | ### 5. デプロイ後、字幕生成は正常だが、合成された字幕が動画に埋め込まれると多くの文字化けがある 27 | ほとんどはLinuxに中国語フォントが欠けているためです。[微软雅黑](https://modelscope.cn/models/Maranello/KrillinAI_dependency_cn/resolve/master/%E5%AD%97%E4%BD%93/msyh.ttc)と[微软雅黑-bold](https://modelscope.cn/models/Maranello/KrillinAI_dependency_cn/resolve/master/%E5%AD%97%E4%BD%93/msyhbd.ttc)フォント(または自分の要件を満たすフォントを選択)をダウンロードし、以下の手順に従って操作してください: 28 | 1. `/usr/share/fonts/`にmsyhフォルダを新規作成し、ダウンロードしたフォントをそのディレクトリにコピーします。 29 | 2. 30 | ``` 31 | cd /usr/share/fonts/msyh 32 | sudo mkfontscale 33 | sudo mkfontdir 34 | fc-cache 35 | ``` 36 | 37 | ### 6. 音声合成の音色コードはどう記入すればよいですか? 38 | 音声サービス提供者のドキュメントを参照してください。以下は本プロジェクトに関連するものです: 39 | [OpenAI TTSドキュメント](https://platform.openai.com/docs/guides/text-to-speech/api-reference)、Voice optionsにあります 40 | [アリババクラウドのインテリジェント音声インタラクションドキュメント](https://help.aliyun.com/zh/isi/developer-reference/overview-of-speech-synthesis)、音色リスト-voiceパラメータ値にあります -------------------------------------------------------------------------------- /docs/kr/aliyun.md: -------------------------------------------------------------------------------- 1 | ## 전제 조건 2 | 먼저 [알리바바 클라우드](https://www.aliyun.com) 계정이 필요하며, 실명 인증을 받아야 합니다. 대부분의 서비스는 무료 할당량이 있습니다. 3 | 4 | ## 알리바바 클라우드 `access_key_id` 및 `access_key_secret` 획득 5 | 1. [알리바바 클라우드 AccessKey 관리 페이지](https://ram.console.aliyun.com/profile/access-keys)에 접속합니다. 6 | 2. AccessKey 생성을 클릭하고, 필요시 사용 방식을 선택합니다. "로컬 개발 환경에서 사용"을 선택합니다. 7 | ![알리바바 클라우드 access key](/docs/images/aliyun_accesskey_1.png) 8 | 3. 안전하게 보관하며, 가능하면 로컬 파일에 복사하여 저장합니다. 9 | 10 | ## 알리바바 클라우드 음성 서비스 개통 11 | 1. [알리바바 클라우드 음성 서비스 관리 페이지](https://nls-portal.console.aliyun.com/applist)에 접속하여, 처음 들어가면 서비스를 개통해야 합니다. 12 | 2. 프로젝트 생성을 클릭합니다. 13 | ![알리바바 클라우드 speech](/docs/images/aliyun_speech_1.png) 14 | 3. 기능을 선택하고 개통합니다. 15 | ![알리바바 클라우드 speech](/docs/images/aliyun_speech_2.png) 16 | 4. "스트리밍 텍스트 음성 합성(CosyVoice 대모델)"은 상업용 버전으로 업그레이드해야 하며, 다른 서비스는 무료 체험판을 사용할 수 있습니다. 17 | ![알리바바 클라우드 speech](/docs/images/aliyun_speech_3.png) 18 | 5. app key를 복사합니다. 19 | ![알리바바 클라우드 speech](/docs/images/aliyun_speech_4.png) 20 | 21 | ## 알리바바 클라우드 OSS 서비스 개통 22 | 1. [알리바바 클라우드 객체 저장 서비스 콘솔](https://oss.console.aliyun.com/overview)에 접속하여, 처음 들어가면 서비스를 개통해야 합니다. 23 | 2. 왼쪽에서 Bucket 목록을 선택한 후, 생성을 클릭합니다. 24 | ![알리바바 클라우드 OSS](/docs/images/aliyun_oss_1.png) 25 | 3. 빠른 생성을 선택하고, 요구 사항에 맞는 Bucket 이름을 입력한 후 **상하이** 지역을 선택하여 생성을 완료합니다(여기서 입력한 이름이 구성 항목 `aliyun.oss.bucket`의 값이 됩니다). 26 | ![알리바바 클라우드 OSS](/docs/images/aliyun_oss_2.png) 27 | 4. 생성 완료 후 Bucket에 들어갑니다. 28 | ![알리바바 클라우드 OSS](/docs/images/aliyun_oss_3.png) 29 | 5. "공공 접근 차단" 스위치를 끄고, 읽기 및 쓰기 권한을 "공공 읽기"로 설정합니다. 30 | ![알리바바 클라우드 OSS](/docs/images/aliyun_oss_4.png) 31 | ![알리바바 클라우드 OSS](/docs/images/aliyun_oss_5.png) -------------------------------------------------------------------------------- /docs/kr/docker.md: -------------------------------------------------------------------------------- 1 | # Docker 배포 가이드 2 | 3 | ## 빠른 시작 4 | 먼저 구성 파일을 준비하고, 서버 리스닝 포트를 `8888`로, 서버 리스닝 주소를 `0.0.0.0`으로 설정합니다. 5 | 6 | ### docker run 시작 7 | ```bash 8 | docker run -d \ 9 | -p 8888:8888 \ 10 | -v /path/to/config.toml:/app/config/config.toml \ 11 | -v /path/to/tasks:/app/tasks \ 12 | asteria798/krillinai 13 | ``` 14 | 15 | ### docker-compose 시작 16 | ```yaml 17 | version: '3' 18 | services: 19 | krillin: 20 | image: asteria798/krillinai 21 | ports: 22 | - "8888:8888" 23 | volumes: 24 | - /path/to/config.toml:/app/config/config.toml # 구성 파일 25 | - /path/to/tasks:/app/tasks # 출력 디렉토리 26 | ``` 27 | 28 | ## 모델 지속성 29 | fasterwhisper 모델을 사용하는 경우, KrillinAI는 모델에 필요한 파일을 `/app/models` 디렉토리와 `/app/bin` 디렉토리로 자동 다운로드합니다. 컨테이너가 삭제되면 이러한 파일은 사라집니다. 모델을 지속적으로 유지하려면 이 두 디렉토리를 호스트 머신의 디렉토리에 매핑할 수 있습니다. 30 | 31 | ### docker run 시작 32 | ```bash 33 | docker run -d \ 34 | -p 8888:8888 \ 35 | -v /path/to/config.toml:/app/config/config.toml \ 36 | -v /path/to/tasks:/app/tasks \ 37 | -v /path/to/models:/app/models \ 38 | -v /path/to/bin:/app/bin \ 39 | asteria798/krillinai 40 | ``` 41 | 42 | ### docker-compose 시작 43 | ```yaml 44 | version: '3' 45 | services: 46 | krillin: 47 | image: asteria798/krillinai 48 | ports: 49 | - "8888:8888" 50 | volumes: 51 | - /path/to/config.toml:/app/config/config.toml 52 | - /path/to/tasks:/app/tasks 53 | - /path/to/models:/app/models 54 | - /path/to/bin:/app/bin 55 | ``` 56 | 57 | ## 주의 사항 58 | 1. docker 컨테이너의 네트워크 모드가 host가 아닐 경우, 구성 파일의 서버 리스닝 주소를 `0.0.0.0`으로 설정하는 것이 좋습니다. 그렇지 않으면 서비스에 접근할 수 없을 수 있습니다. 59 | 2. 컨테이너 내에서 호스트 머신의 네트워크 프록시에 접근해야 하는 경우, 프록시 주소 구성 항목 `proxy`의 `127.0.0.1`을 `host.docker.internal`로 설정하십시오. 예: `http://host.docker.internal:7890` -------------------------------------------------------------------------------- /docs/kr/faq.md: -------------------------------------------------------------------------------- 1 | ### 1. `app.log` 구성 파일을 볼 수 없어 오류 내용을 알 수 없음 2 | Windows 사용자께서는 본 소프트웨어의 작업 디렉토리를 C 드라이브가 아닌 폴더에 두시기 바랍니다. 3 | 4 | ### 2. 비데스크톱 버전에서 구성 파일이 생성되었지만 여전히 "구성 파일을 찾을 수 없음" 오류 발생 5 | 구성 파일 이름이 `config.toml`인지 확인하세요. `config.toml.txt` 또는 다른 이름이 아니어야 합니다. 6 | 구성이 완료된 후, 본 소프트웨어의 작업 폴더 구조는 다음과 같아야 합니다: 7 | ``` 8 | /── config/ 9 | │ └── config.toml 10 | ├── cookies.txt (<- 선택적 cookies.txt 파일) 11 | └── krillinai.exe 12 | ``` 13 | 14 | ### 3. 대모델 구성을 입력했지만 "xxxxx는 xxxxx API Key 구성이 필요합니다" 오류 발생 15 | 모델 서비스와 음성 서비스는 모두 OpenAI의 서비스를 사용할 수 있지만, 대모델은 OpenAI가 아닌 다른 서비스를 사용할 수 있는 경우도 있습니다. 따라서 이 두 가지 구성은 분리되어 있으며, 대모델 구성 외에도 아래의 whisper 구성에서 해당 키 등의 정보를 입력해야 합니다. 16 | 17 | ### 4. 오류 메시지에 "yt-dlp error" 포함 18 | 비디오 다운로드 도구의 문제로, 현재로서는 네트워크 문제 또는 다운로드 도구 버전 문제일 수 있습니다. 네트워크 프록시가 활성화되어 있고 구성 파일의 프록시 구성 항목에 설정되어 있는지 확인하세요. 또한 홍콩 노드를 선택하는 것이 좋습니다. 다운로드 도구는 본 소프트웨어가 자동으로 설치하며, 설치 소스는 업데이트하겠지만 공식 소스가 아니므로 구버전일 수 있습니다. 문제가 발생하면 수동으로 업데이트를 시도해 보세요. 업데이트 방법은 다음과 같습니다: 19 | 20 | 소프트웨어 bin 디렉토리 위치에서 터미널을 열고 다음을 실행하세요: 21 | ``` 22 | ./yt-dlp.exe -U 23 | ``` 24 | 여기서 `yt-dlp.exe`는 시스템에서 실제 ytdlp 소프트웨어 이름으로 교체하세요. 25 | 26 | ### 5. 배포 후 자막 생성은 정상이나 합성된 자막이 비디오에 많은 깨짐 현상 발생 27 | 대부분은 Linux에 중국어 글꼴이 없기 때문입니다. [微软雅黑](https://modelscope.cn/models/Maranello/KrillinAI_dependency_cn/resolve/master/%E5%AD%97%E4%BD%93/msyh.ttc)와 [微软雅黑-bold](https://modelscope.cn/models/Maranello/KrillinAI_dependency_cn/resolve/master/%E5%AD%97%E4%BD%93/msyhbd.ttc) 글꼴(또는 요구 사항을 충족하는 글꼴)을 다운로드한 후, 아래 단계를 따라 진행하세요: 28 | 1. `/usr/share/fonts/` 아래에 msyh 폴더를 새로 만들고 다운로드한 글꼴을 해당 디렉토리에 복사합니다. 29 | 2. 30 | ``` 31 | cd /usr/share/fonts/msyh 32 | sudo mkfontscale 33 | sudo mkfontdir 34 | fc-cache 35 | ``` 36 | 37 | ### 6. 음성 합성의 음색 코드는 어떻게 입력하나요? 38 | 음성 서비스 제공자의 문서를 참조하세요. 다음은 본 프로젝트와 관련된 문서입니다: 39 | [OpenAI TTS 문서](https://platform.openai.com/docs/guides/text-to-speech/api-reference), Voice options에 위치 40 | [알리바바 클라우드 스마트 음성 상호작용 문서](https://help.aliyun.com/zh/isi/developer-reference/overview-of-speech-synthesis), 음색 목록 - voice 매개변수 값에 위치 -------------------------------------------------------------------------------- /docs/pt/aliyun.md: -------------------------------------------------------------------------------- 1 | ## Pré-requisitos 2 | É necessário ter uma conta do [Alibaba Cloud](https://www.aliyun.com) e passar pela verificação de identidade, a maioria dos serviços tem uma cota gratuita. 3 | 4 | ## Obtenção do `access_key_id` e `access_key_secret` do Alibaba Cloud 5 | 1. Acesse a [página de gerenciamento de AccessKey do Alibaba Cloud](https://ram.console.aliyun.com/profile/access-keys). 6 | 2. Clique em criar AccessKey, se necessário, escolha o modo de uso e selecione "Usar no ambiente de desenvolvimento local". 7 | ![Alibaba Cloud access key](/docs/images/aliyun_accesskey_1.png) 8 | 3. Guarde com segurança, é melhor copiar para um arquivo local. 9 | 10 | ## Ativação do serviço de voz do Alibaba Cloud 11 | 1. Acesse a [página de gerenciamento do serviço de voz do Alibaba Cloud](https://nls-portal.console.aliyun.com/applist), na primeira vez que entrar, será necessário ativar o serviço. 12 | 2. Clique em criar projeto. 13 | ![Alibaba Cloud speech](/docs/images/aliyun_speech_1.png) 14 | 3. Selecione as funcionalidades e ative. 15 | ![Alibaba Cloud speech](/docs/images/aliyun_speech_2.png) 16 | 4. A "síntese de voz de texto em fluxo (modelo grande CosyVoice)" precisa ser atualizada para a versão comercial, outros serviços podem ser usados na versão de experiência gratuita. 17 | ![Alibaba Cloud speech](/docs/images/aliyun_speech_3.png) 18 | 5. Copie a chave do aplicativo. 19 | ![Alibaba Cloud speech](/docs/images/aliyun_speech_4.png) 20 | 21 | ## Ativação do serviço OSS do Alibaba Cloud 22 | 1. Acesse o [console de serviços de armazenamento de objetos do Alibaba Cloud](https://oss.console.aliyun.com/overview), na primeira vez que entrar, será necessário ativar o serviço. 23 | 2. Selecione a lista de Buckets à esquerda e clique em criar. 24 | ![Alibaba Cloud OSS](/docs/images/aliyun_oss_1.png) 25 | 3. Selecione criação rápida, preencha um nome de Bucket que atenda aos requisitos e escolha a região **Xangai**, complete a criação (o nome preenchido aqui será o valor da configuração `aliyun.oss.bucket`). 26 | ![Alibaba Cloud OSS](/docs/images/aliyun_oss_2.png) 27 | 4. Após a criação, acesse o Bucket. 28 | ![Alibaba Cloud OSS](/docs/images/aliyun_oss_3.png) 29 | 5. Desative o interruptor "Impedir acesso público" e defina as permissões de leitura e escrita como "Leitura pública". 30 | ![Alibaba Cloud OSS](/docs/images/aliyun_oss_4.png) 31 | ![Alibaba Cloud OSS](/docs/images/aliyun_oss_5.png) -------------------------------------------------------------------------------- /docs/pt/docker.md: -------------------------------------------------------------------------------- 1 | # Guia de Implantação do Docker 2 | 3 | ## Começando Rápido 4 | Primeiro, prepare o arquivo de configuração, definindo a porta de escuta do servidor como `8888` e o endereço de escuta do servidor como `0.0.0.0`. 5 | 6 | ### Iniciar com docker run 7 | ```bash 8 | docker run -d \ 9 | -p 8888:8888 \ 10 | -v /path/to/config.toml:/app/config/config.toml \ 11 | -v /path/to/tasks:/app/tasks \ 12 | asteria798/krillinai 13 | ``` 14 | 15 | ### Iniciar com docker-compose 16 | ```yaml 17 | version: '3' 18 | services: 19 | krillin: 20 | image: asteria798/krillinai 21 | ports: 22 | - "8888:8888" 23 | volumes: 24 | - /path/to/config.toml:/app/config/config.toml # Arquivo de configuração 25 | - /path/to/tasks:/app/tasks # Diretório de saída 26 | ``` 27 | 28 | ## Persistência do Modelo 29 | Se você usar o modelo fasterwhisper, o KrillinAI fará o download automático dos arquivos necessários para o modelo nos diretórios `/app/models` e `/app/bin`. Esses arquivos serão perdidos após a exclusão do contêiner. Se precisar persistir o modelo, você pode mapear esses dois diretórios para um diretório no host. 30 | 31 | ### Iniciar com docker run 32 | ```bash 33 | docker run -d \ 34 | -p 8888:8888 \ 35 | -v /path/to/config.toml:/app/config/config.toml \ 36 | -v /path/to/tasks:/app/tasks \ 37 | -v /path/to/models:/app/models \ 38 | -v /path/to/bin:/app/bin \ 39 | asteria798/krillinai 40 | ``` 41 | 42 | ### Iniciar com docker-compose 43 | ```yaml 44 | version: '3' 45 | services: 46 | krillin: 47 | image: asteria798/krillinai 48 | ports: 49 | - "8888:8888" 50 | volumes: 51 | - /path/to/config.toml:/app/config/config.toml 52 | - /path/to/tasks:/app/tasks 53 | - /path/to/models:/app/models 54 | - /path/to/bin:/app/bin 55 | ``` 56 | 57 | ## Considerações 58 | 1. Se o modo de rede do contêiner Docker não for host, recomenda-se definir o endereço de escuta do servidor no arquivo de configuração como `0.0.0.0`, caso contrário, pode não ser possível acessar o serviço. 59 | 2. Se o contêiner precisar acessar o proxy de rede do host, configure o item de configuração do proxy `proxy` de `127.0.0.1` para `host.docker.internal`, por exemplo, `http://host.docker.internal:7890`. -------------------------------------------------------------------------------- /docs/pt/faq.md: -------------------------------------------------------------------------------- 1 | ### 1. Não consigo ver o arquivo de configuração `app.log`, não sei o que está causando o erro 2 | Usuários do Windows, por favor, coloque o diretório de trabalho deste software em uma pasta que não seja no disco C. 3 | 4 | ### 2. O arquivo de configuração foi criado, mas ainda aparece o erro "arquivo de configuração não encontrado" 5 | Certifique-se de que o nome do arquivo de configuração é `config.toml`, e não `config.toml.txt` ou outro. 6 | Após a configuração, a estrutura da pasta de trabalho deste software deve ser assim: 7 | ``` 8 | /── config/ 9 | │ └── config.toml 10 | ├── cookies.txt (<- arquivo cookies.txt opcional) 11 | └── krillinai.exe 12 | ``` 13 | 14 | ### 3. Preenchi a configuração do modelo grande, mas aparece o erro "xxxxx precisa da configuração da chave API xxxxx" 15 | Embora os serviços de modelo e de voz possam usar os serviços da OpenAI, também existem cenários em que o modelo grande usa serviços que não são da OpenAI, portanto, essas duas configurações são separadas. Além da configuração do modelo grande, procure a configuração do whisper abaixo para preencher a chave correspondente e outras informações. 16 | 17 | ### 4. O erro contém "yt-dlp error" 18 | Problemas com o downloader de vídeo, atualmente parece ser apenas um problema de rede ou de versão do downloader. Verifique se o proxy de rede está ativado e configurado na seção de proxy do arquivo de configuração, e recomenda-se escolher um nó em Hong Kong. O downloader é instalado automaticamente por este software, a fonte da instalação será atualizada, mas não é uma fonte oficial, então pode haver desatualizações. Se encontrar problemas, tente atualizar manualmente, o método de atualização é: 19 | 20 | Abra o terminal na localização do diretório bin do software e execute 21 | ``` 22 | ./yt-dlp.exe -U 23 | ``` 24 | Aqui, substitua `yt-dlp.exe` pelo nome real do software ytdlp no seu sistema. 25 | 26 | ### 5. Após a implantação, a geração de legendas está normal, mas as legendas incorporadas no vídeo têm muitos caracteres estranhos 27 | A maioria dos casos é devido à falta de fontes chinesas no Linux. Baixe as fontes [Microsoft YaHei](https://modelscope.cn/models/Maranello/KrillinAI_dependency_cn/resolve/master/%E5%AD%97%E4%BD%93/msyh.ttc) e [Microsoft YaHei Bold](https://modelscope.cn/models/Maranello/KrillinAI_dependency_cn/resolve/master/%E5%AD%97%E4%BD%93/msyhbd.ttc) (ou escolha fontes que atendam às suas necessidades) e siga os passos abaixo: 28 | 1. Crie uma pasta msyh em /usr/share/fonts/ e copie as fontes baixadas para esse diretório. 29 | 2. 30 | ``` 31 | cd /usr/share/fonts/msyh 32 | sudo mkfontscale 33 | sudo mkfontdir 34 | fc-cache 35 | ``` 36 | 37 | ### 6. Como preencher o código de timbre para síntese de voz? 38 | Por favor, consulte a documentação do provedor de serviços de voz, aqui estão os relacionados a este projeto: 39 | [Documentação OpenAI TTS](https://platform.openai.com/docs/guides/text-to-speech/api-reference), localizada em Opções de voz 40 | [Documentação de Interação de Voz Inteligente da Alibaba Cloud](https://help.aliyun.com/zh/isi/developer-reference/overview-of-speech-synthesis), localizada na lista de timbres - valor do parâmetro voice -------------------------------------------------------------------------------- /docs/rus/aliyun.md: -------------------------------------------------------------------------------- 1 | ## Предварительные условия 2 | Необходимо иметь аккаунт на [Alibaba Cloud](https://www.aliyun.com) и пройти процедуру реальной идентификации, большинство услуг имеют бесплатный лимит. 3 | 4 | ## Получение `access_key_id` и `access_key_secret` для Alibaba Cloud 5 | 1. Перейдите на [страницу управления AccessKey Alibaba Cloud](https://ram.console.aliyun.com/profile/access-keys). 6 | 2. Нажмите "Создать AccessKey", если необходимо, выберите способ использования, выберите "Использование в локальной среде разработки". 7 | ![阿里云access key](/docs/images/aliyun_accesskey_1.png) 8 | 3. Храните в надежном месте, лучше скопируйте в локальный файл. 9 | 10 | ## Подключение голосового сервиса Alibaba Cloud 11 | 1. Перейдите на [страницу управления голосовым сервисом Alibaba Cloud](https://nls-portal.console.aliyun.com/applist), при первом входе необходимо активировать сервис. 12 | 2. Нажмите "Создать проект". 13 | ![阿里云speech](/docs/images/aliyun_speech_1.png) 14 | 3. Выберите функции и активируйте их. 15 | ![阿里云speech](/docs/images/aliyun_speech_2.png) 16 | 4. "Потоковая текстовая синтезация речи (модель CosyVoice)" требует обновления до коммерческой версии, другие услуги можно использовать в бесплатной пробной версии. 17 | ![阿里云speech](/docs/images/aliyun_speech_3.png) 18 | 5. Скопируйте app key. 19 | ![阿里云speech](/docs/images/aliyun_speech_4.png) 20 | 21 | ## Подключение сервиса OSS Alibaba Cloud 22 | 1. Перейдите на [консоль управления объектным хранилищем Alibaba Cloud](https://oss.console.aliyun.com/overview), при первом входе необходимо активировать сервис. 23 | 2. Выберите список Bucket слева, затем нажмите "Создать". 24 | ![阿里云OSS](/docs/images/aliyun_oss_1.png) 25 | 3. Выберите "Быстрое создание", введите имя Bucket, соответствующее требованиям, и выберите регион **Шанхай**, завершите создание (введенное здесь имя будет значением конфигурационного параметра `aliyun.oss.bucket`). 26 | ![阿里云OSS](/docs/images/aliyun_oss_2.png) 27 | 4. После создания перейдите в Bucket. 28 | ![阿里云OSS](/docs/images/aliyun_oss_3.png) 29 | 5. Отключите переключатель "Запретить общий доступ" и установите права на чтение и запись на "Общий доступ для чтения". 30 | ![阿里云OSS](/docs/images/aliyun_oss_4.png) 31 | ![阿里云OSS](/docs/images/aliyun_oss_5.png) -------------------------------------------------------------------------------- /docs/rus/docker.md: -------------------------------------------------------------------------------- 1 | # Руководство по развертыванию Docker 2 | 3 | ## Быстрый старт 4 | Сначала подготовьте файл конфигурации, установив порт прослушивания сервера на `8888`, а адрес прослушивания сервера на `0.0.0.0`. 5 | 6 | ### Запуск с помощью docker run 7 | ```bash 8 | docker run -d \ 9 | -p 8888:8888 \ 10 | -v /path/to/config.toml:/app/config/config.toml \ 11 | -v /path/to/tasks:/app/tasks \ 12 | asteria798/krillinai 13 | ``` 14 | 15 | ### Запуск с помощью docker-compose 16 | ```yaml 17 | version: '3' 18 | services: 19 | krillin: 20 | image: asteria798/krillinai 21 | ports: 22 | - "8888:8888" 23 | volumes: 24 | - /path/to/config.toml:/app/config/config.toml # Файл конфигурации 25 | - /path/to/tasks:/app/tasks # Директория вывода 26 | ``` 27 | 28 | ## Персистентность модели 29 | Если используется модель fasterwhisper, KrillinAI автоматически загрузит необходимые файлы модели в директории `/app/models` и `/app/bin`. Эти файлы будут потеряны после удаления контейнера. Если необходимо сохранить модель, можно смонтировать эти две директории в директорию хоста. 30 | 31 | ### Запуск с помощью docker run 32 | ```bash 33 | docker run -d \ 34 | -p 8888:8888 \ 35 | -v /path/to/config.toml:/app/config/config.toml \ 36 | -v /path/to/tasks:/app/tasks \ 37 | -v /path/to/models:/app/models \ 38 | -v /path/to/bin:/app/bin \ 39 | asteria798/krillinai 40 | ``` 41 | 42 | ### Запуск с помощью docker-compose 43 | ```yaml 44 | version: '3' 45 | services: 46 | krillin: 47 | image: asteria798/krillinai 48 | ports: 49 | - "8888:8888" 50 | volumes: 51 | - /path/to/config.toml:/app/config/config.toml 52 | - /path/to/tasks:/app/tasks 53 | - /path/to/models:/app/models 54 | - /path/to/bin:/app/bin 55 | ``` 56 | 57 | ## Важные замечания 58 | 1. Если сетевой режим контейнера Docker не является host, рекомендуется установить адрес прослушивания сервера конфигурационного файла на `0.0.0.0`, иначе доступ к сервису может быть невозможен. 59 | 2. Если контейнеру необходимо получить доступ к сетевому прокси хоста, измените параметр конфигурации прокси `proxy` с `127.0.0.1` на `host.docker.internal`, например `http://host.docker.internal:7890`. -------------------------------------------------------------------------------- /docs/rus/faq.md: -------------------------------------------------------------------------------- 1 | ### 1. Не удается найти файл конфигурации `app.log`, невозможно узнать содержание ошибки 2 | Пользователям Windows рекомендуется разместить рабочую директорию программы в папке, отличной от диска C. 3 | 4 | ### 2. Файл конфигурации был создан, но возникает ошибка "Не найден файл конфигурации" 5 | Убедитесь, что имя файла конфигурации — `config.toml`, а не `config.toml.txt` или что-то другое. После завершения настройки структура рабочей папки программы должна выглядеть следующим образом: 6 | ``` 7 | /── config/ 8 | │ └── config.toml 9 | ├── cookies.txt (<- необязательный файл cookies.txt) 10 | └── krillinai.exe 11 | ``` 12 | 13 | ### 3. Заполнены настройки большого модели, но возникает ошибка "xxxxx требует настройки xxxxx API Key" 14 | Хотя модельный сервис и голосовой сервис могут использовать услуги openai, существуют сценарии, когда большая модель использует не openai, поэтому эти две настройки разделены. Кроме настроек большой модели, пожалуйста, найдите настройки whisper ниже и заполните соответствующий ключ и другую информацию. 15 | 16 | ### 4. Ошибка содержит "yt-dlp error" 17 | Проблема с загрузчиком видео, на данный момент это, скорее всего, проблема с сетью или версией загрузчика. Проверьте, включен ли сетевой прокси и правильно ли он настроен в конфигурационном файле, также рекомендуется выбрать узел в Гонконге. Загрузчик автоматически устанавливается программой, источник установки я буду обновлять, но это не официальный источник, поэтому могут быть задержки. Если возникли проблемы, попробуйте обновить вручную, способ обновления: 18 | 19 | Откройте терминал в каталоге bin программы и выполните 20 | ``` 21 | ./yt-dlp.exe -U 22 | ``` 23 | Здесь `yt-dlp.exe` замените на фактическое имя программы ytdlp в вашей системе. 24 | 25 | ### 5. После развертывания субтитры генерируются нормально, но в сгенерированных субтитрах много иероглифов 26 | Чаще всего это связано с отсутствием китайских шрифтов в Linux. Пожалуйста, скачайте шрифты [微软雅黑](https://modelscope.cn/models/Maranello/KrillinAI_dependency_cn/resolve/master/%E5%AD%97%E4%BD%93/msyh.ttc) и [微软雅黑-bold](https://modelscope.cn/models/Maranello/KrillinAI_dependency_cn/resolve/master/%E5%AD%97%E4%BD%93/msyhbd.ttc) (или выберите шрифты, соответствующие вашим требованиям), а затем выполните следующие шаги: 27 | 1. Создайте папку msyh в /usr/share/fonts/ и скопируйте загруженные шрифты в этот каталог. 28 | 2. 29 | ``` 30 | cd /usr/share/fonts/msyh 31 | sudo mkfontscale 32 | sudo mkfontdir 33 | fc-cache 34 | ``` 35 | 36 | ### 6. Как заполнить код голоса для синтеза речи? 37 | Пожалуйста, обратитесь к документации поставщика голосовых услуг, вот что касается этого проекта: 38 | [Документация OpenAI TTS](https://platform.openai.com/docs/guides/text-to-speech/api-reference), находится в разделе Voice options 39 | [Документация по интеллектуальному голосовому взаимодействию Alibaba Cloud](https://help.aliyun.com/zh/isi/developer-reference/overview-of-speech-synthesis), находится в списке голосов - значение параметра voice -------------------------------------------------------------------------------- /docs/vi/aliyun.md: -------------------------------------------------------------------------------- 1 | ## Điều kiện tiên quyết 2 | Cần có tài khoản [Alibaba Cloud](https://www.aliyun.com) và đã xác thực danh tính, hầu hết các dịch vụ đều có hạn mức miễn phí. 3 | 4 | ## Lấy `access_key_id` và `access_key_secret` của Alibaba Cloud 5 | 1. Truy cập [Trang quản lý AccessKey của Alibaba Cloud](https://ram.console.aliyun.com/profile/access-keys). 6 | 2. Nhấp vào "Tạo AccessKey", nếu cần chọn cách sử dụng, chọn "Sử dụng trong môi trường phát triển địa phương". 7 | ![Alibaba Cloud access key](/docs/images/aliyun_accesskey_1.png) 8 | 3. Bảo quản cẩn thận, tốt nhất là sao chép vào tệp tin địa phương để lưu. 9 | 10 | ## Kích hoạt dịch vụ giọng nói của Alibaba Cloud 11 | 1. Truy cập [Trang quản lý dịch vụ giọng nói của Alibaba Cloud](https://nls-portal.console.aliyun.com/applist), lần đầu truy cập cần kích hoạt dịch vụ. 12 | 2. Nhấp vào "Tạo dự án". 13 | ![Alibaba Cloud speech](/docs/images/aliyun_speech_1.png) 14 | 3. Chọn chức năng và kích hoạt. 15 | ![Alibaba Cloud speech](/docs/images/aliyun_speech_2.png) 16 | 4. "Tổng hợp giọng nói văn bản theo luồng (Mô hình lớn CosyVoice)" cần nâng cấp lên phiên bản thương mại, các dịch vụ khác có thể sử dụng phiên bản trải nghiệm miễn phí. 17 | ![Alibaba Cloud speech](/docs/images/aliyun_speech_3.png) 18 | 5. Sao chép app key là xong. 19 | ![Alibaba Cloud speech](/docs/images/aliyun_speech_4.png) 20 | 21 | ## Kích hoạt dịch vụ OSS của Alibaba Cloud 22 | 1. Truy cập [Bảng điều khiển dịch vụ lưu trữ đối tượng của Alibaba Cloud](https://oss.console.aliyun.com/overview), lần đầu truy cập cần kích hoạt dịch vụ. 23 | 2. Chọn danh sách Bucket ở bên trái, sau đó nhấp vào "Tạo". 24 | ![Alibaba Cloud OSS](/docs/images/aliyun_oss_1.png) 25 | 3. Chọn "Tạo nhanh", điền tên Bucket phù hợp và chọn khu vực **Thượng Hải**, hoàn tất việc tạo (tên điền ở đây chính là giá trị của cấu hình `aliyun.oss.bucket`). 26 | ![Alibaba Cloud OSS](/docs/images/aliyun_oss_2.png) 27 | 4. Sau khi tạo xong, vào Bucket. 28 | ![Alibaba Cloud OSS](/docs/images/aliyun_oss_3.png) 29 | 5. Tắt công tắc "Chặn truy cập công cộng" và thiết lập quyền đọc/ghi thành "Đọc công cộng". 30 | ![Alibaba Cloud OSS](/docs/images/aliyun_oss_4.png) 31 | ![Alibaba Cloud OSS](/docs/images/aliyun_oss_5.png) -------------------------------------------------------------------------------- /docs/vi/docker.md: -------------------------------------------------------------------------------- 1 | # Hướng dẫn triển khai Docker 2 | 3 | ## Bắt đầu nhanh 4 | Trước tiên, chuẩn bị tệp cấu hình, thiết lập cổng lắng nghe của máy chủ là `8888`, địa chỉ lắng nghe của máy chủ là `0.0.0.0`. 5 | 6 | ### Khởi động bằng docker run 7 | ```bash 8 | docker run -d \ 9 | -p 8888:8888 \ 10 | -v /path/to/config.toml:/app/config/config.toml \ 11 | -v /path/to/tasks:/app/tasks \ 12 | asteria798/krillinai 13 | ``` 14 | 15 | ### Khởi động bằng docker-compose 16 | ```yaml 17 | version: '3' 18 | services: 19 | krillin: 20 | image: asteria798/krillinai 21 | ports: 22 | - "8888:8888" 23 | volumes: 24 | - /path/to/config.toml:/app/config/config.toml # Tệp cấu hình 25 | - /path/to/tasks:/app/tasks # Thư mục đầu ra 26 | ``` 27 | 28 | ## Lưu trữ mô hình 29 | Nếu sử dụng mô hình fasterwhisper, KrillinAI sẽ tự động tải xuống các tệp cần thiết cho mô hình vào thư mục `/app/models` và thư mục `/app/bin`. Sau khi xóa container, các tệp này sẽ bị mất. Nếu cần lưu trữ mô hình, bạn có thể ánh xạ hai thư mục này đến thư mục của máy chủ. 30 | 31 | ### Khởi động bằng docker run 32 | ```bash 33 | docker run -d \ 34 | -p 8888:8888 \ 35 | -v /path/to/config.toml:/app/config/config.toml \ 36 | -v /path/to/tasks:/app/tasks \ 37 | -v /path/to/models:/app/models \ 38 | -v /path/to/bin:/app/bin \ 39 | asteria798/krillinai 40 | ``` 41 | 42 | ### Khởi động bằng docker-compose 43 | ```yaml 44 | version: '3' 45 | services: 46 | krillin: 47 | image: asteria798/krillinai 48 | ports: 49 | - "8888:8888" 50 | volumes: 51 | - /path/to/config.toml:/app/config/config.toml 52 | - /path/to/tasks:/app/tasks 53 | - /path/to/models:/app/models 54 | - /path/to/bin:/app/bin 55 | ``` 56 | 57 | ## Lưu ý 58 | 1. Nếu chế độ mạng của container docker không phải là host, nên thiết lập địa chỉ lắng nghe của máy chủ trong tệp cấu hình là `0.0.0.0`, nếu không có thể không truy cập được dịch vụ. 59 | 2. Nếu trong container cần truy cập proxy mạng của máy chủ, hãy thiết lập mục cấu hình proxy `127.0.0.1` thành `host.docker.internal`, ví dụ `http://host.docker.internal:7890`. -------------------------------------------------------------------------------- /docs/vi/faq.md: -------------------------------------------------------------------------------- 1 | ### 1. Không thấy tệp cấu hình `app.log`, không biết nội dung lỗi 2 | Người dùng Windows vui lòng đặt thư mục làm việc của phần mềm này ở một thư mục không phải ổ C. 3 | 4 | ### 2. Phiên bản không phải desktop đã tạo tệp cấu hình nhưng vẫn báo lỗi “Không tìm thấy tệp cấu hình” 5 | Đảm bảo tên tệp cấu hình là `config.toml`, chứ không phải `config.toml.txt` hoặc cái gì khác. 6 | Sau khi cấu hình xong, cấu trúc thư mục làm việc của phần mềm này nên như sau: 7 | ``` 8 | /── config/ 9 | │ └── config.toml 10 | ├── cookies.txt (<- tệp cookies.txt tùy chọn) 11 | └── krillinai.exe 12 | ``` 13 | 14 | ### 3. Đã điền cấu hình mô hình lớn nhưng báo lỗi “xxxxx cần cấu hình xxxxx API Key” 15 | Dịch vụ mô hình và dịch vụ giọng nói mặc dù có thể sử dụng dịch vụ của openai, nhưng cũng có những trường hợp mô hình lớn sử dụng dịch vụ không phải openai, vì vậy hai phần cấu hình này là tách biệt. Ngoài cấu hình mô hình lớn, vui lòng tìm cấu hình whisper bên dưới để điền các thông tin như khóa tương ứng. 16 | 17 | ### 4. Báo lỗi có chứa “yt-dlp error” 18 | Vấn đề của trình tải video, hiện tại có vẻ chỉ là vấn đề mạng hoặc phiên bản trình tải, hãy kiểm tra xem proxy mạng có đang mở và được cấu hình trong mục cấu hình proxy hay không, đồng thời khuyên bạn nên chọn nút Hong Kong. Trình tải được cài đặt tự động bởi phần mềm này, nguồn cài đặt tôi sẽ cập nhật nhưng không phải là nguồn chính thức, vì vậy có thể sẽ có độ trễ, nếu gặp vấn đề hãy thử cập nhật thủ công, phương pháp cập nhật: 19 | 20 | Mở terminal tại vị trí thư mục bin của phần mềm, thực hiện 21 | ``` 22 | ./yt-dlp.exe -U 23 | ``` 24 | Tại đây, thay thế `yt-dlp.exe` bằng tên phần mềm ytdlp thực tế trên hệ thống của bạn. 25 | 26 | ### 5. Sau khi triển khai, phụ đề được tạo bình thường nhưng phụ đề ghép vào video có nhiều ký tự lạ 27 | Phần lớn là do Linux thiếu phông chữ tiếng Trung. Vui lòng tải xuống phông chữ [Microsoft YaHei](https://modelscope.cn/models/Maranello/KrillinAI_dependency_cn/resolve/master/%E5%AD%97%E4%BD%93/msyh.ttc) và [Microsoft YaHei-bold](https://modelscope.cn/models/Maranello/KrillinAI_dependency_cn/resolve/master/%E5%AD%97%E4%BD%93/msyhbd.ttc) (hoặc tự chọn phông chữ phù hợp với yêu cầu của bạn), sau đó thực hiện theo các bước dưới đây: 28 | 1. Tạo thư mục msyh trong /usr/share/fonts/ và sao chép phông chữ đã tải xuống vào thư mục đó 29 | 2. 30 | ``` 31 | cd /usr/share/fonts/msyh 32 | sudo mkfontscale 33 | sudo mkfontdir 34 | fc-cache 35 | ``` 36 | 37 | ### 6. Làm thế nào để điền mã âm sắc cho tổng hợp giọng nói? 38 | Vui lòng tham khảo tài liệu của nhà cung cấp dịch vụ giọng nói, dưới đây là tài liệu liên quan đến dự án này: 39 | [Tài liệu OpenAI TTS](https://platform.openai.com/docs/guides/text-to-speech/api-reference), nằm ở tùy chọn Giọng nói 40 | [Tài liệu tương tác giọng nói thông minh của Alibaba Cloud](https://help.aliyun.com/zh/isi/developer-reference/overview-of-speech-synthesis), nằm ở danh sách âm sắc - giá trị tham số voice -------------------------------------------------------------------------------- /docs/zh/README.md: -------------------------------------------------------------------------------- 1 |
2 | KlicStudio 3 | 4 | # 极简部署AI视频翻译配音工具 5 | 6 | KrillinAI%2FKlicStudio | Trendshift 7 | 8 | **[English](/README.md)|[简体中文](/docs/zh/README.md)|[日本語](/docs/jp/README.md)|[한국어](/docs/kr/README.md)|[Tiếng Việt](/docs/vi/README.md)|[Français](/docs/fr/README.md)|[Deutsch](/docs/de/README.md)|[Español](/docs/es/README.md)|[Português](/docs/pt/README.md)|[Русский](/docs/rus/README.md)|[اللغة العربية](/docs/ar/README.md)** 9 | 10 | [![Twitter](https://img.shields.io/badge/Twitter-KrillinAI-orange?logo=twitter)](https://x.com/KrillinAI) 11 | [![QQ 群](https://img.shields.io/badge/QQ%20群-754069680-green?logo=tencent-qq)](https://jq.qq.com/?_wv=1027&k=754069680) 12 | [![Bilibili](https://img.shields.io/badge/dynamic/json?label=Bilibili&query=%24.data.follower&suffix=粉丝&url=https%3A%2F%2Fapi.bilibili.com%2Fx%2Frelation%2Fstat%3Fvmid%3D242124650&logo=bilibili&color=00A1D6&labelColor=FE7398&logoColor=FFFFFF)](https://space.bilibili.com/242124650) 13 | 14 |
15 | 16 | ## 项目简介 ([现在体验在线版本!](https://www.klic.studio/)) 17 | 18 | Klic Studio是Krillin AI开发的一款全能型音视频本地化与增强解决方案。这款简约而强大的工具,集音视频翻译、配音、语音克隆于一身,支持横竖屏格式输出,确保在所有主流平台(哔哩哔哩,小红书,抖音,视频号,快手,YouTube,TikTok等)都能完美呈现。通过端到端的工作流程,仅需点击几次,就能将原始素材转化为精美即用的跨平台内容。 19 | 20 | ## 主要特点与功能: 21 | 🎯 **一键启动**:无需复杂的环境配置,自动安装依赖,立即投入使用,新增桌面版本,使用更便捷! 22 | 23 | 📥 **视频获取**:支持yt-dlp下载或本地文件上传 24 | 25 | 📜 **精准识别**:基于Whisper的高准确度语音识别 26 | 27 | 🧠 **智能分段**:使用LLM进行字幕分段和对齐 28 | 29 | 🔄 **术语替换**:一键替换专业领域词汇 30 | 31 | 🌍 **专业翻译**:带上下文进行LLM翻译保持语义自然 32 | 33 | 🎙️ **配音克隆**:提供CosyVoice精选音色或自定义音色克隆 34 | 35 | 🎬 **视频合成**:自动处理横竖版视频和字幕排版 36 | 37 | 💻 **跨平台**:支持Windows、Linux、macOS,提供桌面版和server版 38 | 39 | 40 | ## 效果展示 41 | 下图为46分钟的本地视频导入,一键执行后生成的字幕文件入轨后的效果,无任何手动调整。无缺失、重叠,断句自然,翻译质量也非常高。 42 | ![对齐效果](/docs/images/alignment.png) 43 | 44 | 45 | 46 | 53 | 62 | 63 | 70 | 71 | 72 |
47 | 48 | ### 字幕翻译 49 | --- 50 | https://github.com/user-attachments/assets/bba1ac0a-fe6b-4947-b58d-ba99306d0339 51 | 52 | 54 | 55 | 56 | 57 | ### 配音 58 | --- 59 | https://github.com/user-attachments/assets/0b32fad3-c3ad-4b6a-abf0-0865f0dd2385 60 | 61 | 64 | 65 | ### 竖屏 66 | --- 67 | https://github.com/user-attachments/assets/c2c7b528-0ef8-4ba9-b8ac-f9f92f6d4e71 68 | 69 |
73 | 74 | ## 🔍 语音识别服务支持 75 | _**下表中的本地模型全部支持自动安装可执行文件+模型文件,你只要选择,其它的Klic帮你全部准备完毕。**_ 76 | 77 | | 服务源 | 支持平台 | 模型可选项 | 本地/云端 | 备注 | 78 | |--------------------|-----------------|----------------------------------------|-------|-------------| 79 | | **OpenAI Whisper** | 全平台 | - | 云端 | 速度快效果好 | 80 | | **FasterWhisper** | Windows/Linux | `tiny`/`medium`/`large-v2` (推荐medium+) | 本地 | 速度更快,无云服务开销 | 81 | | **WhisperKit** | macOS (仅限M系列芯片) | `large-v2` | 本地 | Apple芯片原生优化 | 82 | | **WhisperCpp** | 全平台 | `large-v2` | 本地 | 支持全平台 | 83 | | **阿里云ASR** | 全平台 | - | 云端 | 避免中国大陆网络问题 | 84 | 85 | ## 🚀 大语言模型支持 86 | 87 | ✅ 兼容所有符合 **OpenAI API规范** 的云端/本地大语言模型服务,包括但不限于: 88 | - OpenAI 89 | - Gemini 90 | - DeepSeek 91 | - 通义千问 92 | - 本地部署的开源模型 93 | - 其他兼容OpenAI格式的API服务 94 | 95 | ## 🎤 TTS文本转语音支持 96 | - 阿里云语音服务 97 | - OpenAI TTS 98 | 99 | ## 语言支持 100 | 输入语言支持:中文,英文,日语,德语,土耳其,韩语,俄语,马来语(持续增加中) 101 | 102 | 翻译语言支持:英文,中文,俄语,西班牙语,法语等101种语言 103 | 104 | ## 界面预览 105 | ![界面预览](/docs/images/ui_desktop.png) 106 | 107 | 108 | ## 🚀 快速开始 109 | ### 基本步骤 110 | 首先下载[Release](https://github.com/KrillinAI/KlicStudio/releases)中与你设备系统匹配的可执行文件,按照下面的教程选择桌面版还是非桌面版,然后放入空文件夹,把软件下载到一个空文件夹,因为运行之后会生成一些目录,放到空文件夹会好管理一些。 111 | 112 | 【如果是桌面版,即release文件带desktop的看此处】 113 | _桌面版是新发布的,为了解决新手用户难以正确编辑配置文件的问题,还有一些bug,持续更新中_ 114 | 1. 双击文件即可开始使用(桌面端也是需要配置的,在软件内配置) 115 | 116 | 【如果是非桌面版,即release文件不带desktop的看此处】 117 | _非桌面版是一开始的版本,配置比较复杂,但是功能稳定,同时适合服务器部署,因为会以web的方式提供ui_ 118 | 1. 在文件夹内创建`config`文件夹,然后在`config`文件夹创建`config.toml`文件,复制源代码`config`目录下的`config-example.toml`文件的内容填入`config.toml`,并按注释对照填写你的配置信息。 119 | 2. 双击,或在终端执行可执行文件,启动服务 120 | 3. 打开浏览器,输入`http://127.0.0.1:8888`,开始使用 (8888替换成你在配置文件中填写的端口) 121 | 122 | ### To: macOS用户 123 | 【如果是桌面版,即release文件带desktop的看此处】 124 | 桌面端目前打包方式由于签名等问题,还不能够做到双击直接运行或者dmg安装,需要手动信任应用,方法如下: 125 | 1. 在终端打开可执行文件(假设文件名是KlicStudio_1.0.0_desktop_macOS_arm64)所在目录 126 | 2. 依次执行以下命令: 127 | ``` 128 | sudo xattr -cr ./KlicStudio_1.0.0_desktop_macOS_arm64 129 | sudo chmod +x ./KlicStudio_1.0.0_desktop_macOS_arm64 130 | ./KlicStudio_1.0.0_desktop_macOS_arm64 131 | ``` 132 | 133 | 【如果是非桌面版,即release文件不带desktop的看此处】 134 | 本软件没有做签名,因此在macOS上运行时,在完成“基本步骤”中的文件配置后,还需要手动信任应用,方法如下: 135 | 1. 在终端打开可执行文件(假设文件名是KlicStudio_1.0.0_macOS_arm64)所在目录 136 | 2. 依次执行以下命令: 137 | ``` 138 | sudo xattr -rd com.apple.quarantine ./KlicStudio_1.0.0_macOS_arm64 139 | sudo chmod +x ./KlicStudio_1.0.0_macOS_arm64 140 | ./KlicStudio_1.0.0_macOS_arm64 141 | ``` 142 | 即可启动服务 143 | 144 | ### Docker部署 145 | 本项目支持Docker部署,请参考[Docker部署说明](./docker.md) 146 | 147 | ### Cookie配置说明(非必选) 148 | 149 | 如果你遇到视频下载失败的情况 150 | 151 | 请参考 [Cookie 配置说明](./get_cookies.md) 配置你的Cookie信息。 152 | 153 | ### 配置帮助(必看) 154 | 最快速便捷的配置方式: 155 | * `transcribe.provider.name`填写`openai`,这样只需要填写`transcribe.openai`块,以及`llm`块的大模型配置就可以进行字幕翻译。(`app.proxy`、`model`和`openai.base_url`按自己情况选填) 156 | 157 | 使用本地语言识别模型的配置方式(兼顾成本、速度与质量的选择) 158 | * `transcribe.provider.name`填写`fasterwhisper`,`transcribe.fasterwhisper.model`填写`large-v2`,然后再填写`llm`填写大模型配置,就可以进行字幕翻译,本地模型会自动下载安装。(`app.proxy`和`openai.base_url`等同上) 159 | 160 | 文本转语音(TTS)是可选的,配置逻辑和上面一样,填写`tts.provider.name`,然后填写`tts`下面对应的配置块就可以了,UI里声音代码按照选择的提供商的文档进行填写即可(下方常见问题里有文档地址)。阿里云的aksk等的填写可能会重复,这是为了保证配置结构清晰。 161 | 注意:使用声音克隆的话,`tts`只支持选择`aliyun`。 162 | 163 | **阿里云AccessKey、Bucket、AppKey的获取请阅读**:[阿里云配置说明](./aliyun.md) 164 | 165 | 请理解,任务=语音识别+大模型翻译+语音服务(TTS等,可选),这对于你理解配置文件很有帮助。 166 | 167 | ## 常见问题 168 | 169 | 请移步[常见问题](./faq.md) 170 | 171 | ## 贡献规范 172 | 1. 不要提交无用文件,如.vscode、.idea等,请善于使用.gitignore过滤 173 | 2. 不要提交config.toml,而是使用config-example.toml提交 174 | 175 | ## 联系我们 176 | 1. 加入我们的QQ群,解答问题:754069680 177 | 2. 关注我们的社交媒体账号,[哔哩哔哩](https://space.bilibili.com/242124650),每天分享AI科技领域优质内容 178 | 179 | ## Star History 180 | 181 | [![Star History Chart](https://api.star-history.com/svg?repos=KrillinAI/KlicStudio&type=Date)](https://star-history.com/#KrillinAI/KlicStudio&Date) 182 | -------------------------------------------------------------------------------- /docs/zh/aliyun.md: -------------------------------------------------------------------------------- 1 | ## 前提条件 2 | 需要先有[阿里云](https://www.aliyun.com)账号并经过实名认证,多数服务有免费额度 3 | 4 | ## 阿里云`access_key_id`和`access_key_secret`获取 5 | 1. 进入[阿里云AccessKey管理页面](https://ram.console.aliyun.com/profile/access-keys) 6 | 2. 点击创建AccessKey,如需要选择使用方式,选择“本地开发环境中使用” 7 | ![阿里云access key](/docs/images/aliyun_accesskey_1.png) 8 | 3. 妥善保管,最好复制到本地文件保存 9 | 10 | ## 阿里云语音服务开通 11 | 1. 进入[阿里云语音服务管理页面](https://nls-portal.console.aliyun.com/applist),首次进入需开通服务 12 | 2. 点击创建项目 13 | ![阿里云speech](/docs/images/aliyun_speech_1.png) 14 | 3. 选择功能并开通 15 | ![阿里云speech](/docs/images/aliyun_speech_2.png) 16 | 4. “流式文本语音合成(CosyVoice大模型)”需要升级成商业版,其它服务可以用免费体验版 17 | ![阿里云speech](/docs/images/aliyun_speech_3.png) 18 | 5. 复制app key即可 19 | ![阿里云speech](/docs/images/aliyun_speech_4.png) 20 | 21 | ## 阿里云OSS服务开通 22 | 1. 进入[阿里云对象存储服务控制台](https://oss.console.aliyun.com/overview),首次进入需开通服务 23 | 2. 左侧选择Bucket列表,然后点击创建 24 | ![阿里云OSS](/docs/images/aliyun_oss_1.png) 25 | 3. 选择快捷创建,填写符合要求的Bucket名称并选择**上海**地域,完成创建(此处填写的名字就是配置项`aliyun.oss.bucket`的值) 26 | ![阿里云OSS](/docs/images/aliyun_oss_2.png) 27 | 4. 创建完成后进入Bucket 28 | ![阿里云OSS](/docs/images/aliyun_oss_3.png) 29 | 5. 将“阻止公共访问”开关关闭,并设置读写权限为“公共读” 30 | ![阿里云OSS](/docs/images/aliyun_oss_4.png) 31 | ![阿里云OSS](/docs/images/aliyun_oss_5.png) -------------------------------------------------------------------------------- /docs/zh/docker.md: -------------------------------------------------------------------------------- 1 | # Docker 部署指南 2 | 3 | ## 快速开始 4 | 先准备好配置文件,设置服务器监听端口为`8888`、服务器监听地址为`0.0.0.0` 5 | 6 | ### docker run启动 7 | ```bash 8 | docker run -d \ 9 | -p 8888:8888 \ 10 | -v /path/to/config.toml:/app/config/config.toml \ 11 | -v /path/to/tasks:/app/tasks \ 12 | asteria798/krillinai 13 | ``` 14 | 15 | ### docker-compose启动 16 | ```yaml 17 | version: '3' 18 | services: 19 | krillin: 20 | image: asteria798/krillinai 21 | ports: 22 | - "8888:8888" 23 | volumes: 24 | - /path/to/config.toml:/app/config/config.toml # 配置文件 25 | - /path/to/tasks:/app/tasks # 输出目录 26 | ``` 27 | 28 | ## 持久化模型 29 | 如果使用fasterwhisper模型, KrillinAI 会自动下载模型所需文件到`/app/models`目录和`/app/bin`目录。容器删除后,这些文件会丢失。如果需要持久化模型,可以将这两个目录映射到宿主机的目录。 30 | 31 | ### docker run启动 32 | ```bash 33 | docker run -d \ 34 | -p 8888:8888 \ 35 | -v /path/to/config.toml:/app/config/config.toml \ 36 | -v /path/to/tasks:/app/tasks \ 37 | -v /path/to/models:/app/models \ 38 | -v /path/to/bin:/app/bin \ 39 | asteria798/krillinai 40 | ``` 41 | 42 | ### docker-compose启动 43 | ```yaml 44 | version: '3' 45 | services: 46 | krillin: 47 | image: asteria798/krillinai 48 | ports: 49 | - "8888:8888" 50 | volumes: 51 | - /path/to/config.toml:/app/config/config.toml 52 | - /path/to/tasks:/app/tasks 53 | - /path/to/models:/app/models 54 | - /path/to/bin:/app/bin 55 | ``` 56 | 57 | ## 注意事项 58 | 1. 如果docker容器的网络模式不为host,建议将配置文件服务器监听地址设置为`0.0.0.0`,否则可能无法访问服务。 59 | 2. 如果容器内需要访问宿主机的网络代理,请将代理地址配置项`proxy`的`127.0.0.1`设置为`host.docker.internal`,例如`http://host.docker.internal:7890` 60 | -------------------------------------------------------------------------------- /docs/zh/faq.md: -------------------------------------------------------------------------------- 1 | ### 1. 看不到`app.log`配置文件,无法知道报错内容 2 | Windows用户请将本软件的工作目录放在非C盘的文件夹。 3 | 4 | ### 2. 非桌面版明明创建了配置文件,但还是报错“找不到配置文件” 5 | 确保配置文件名是`config.toml`,而不是`config.toml.txt`或其它。 6 | 配置完成后,本软件的工作文件夹的结构应该是这样的: 7 | ``` 8 | /── config/ 9 | │ └── config.toml 10 | ├── cookies.txt (<- 可选的cookies.txt文件) 11 | └── krillinai.exe 12 | ``` 13 | 14 | ### 3. 填写了大模型配置,但是报错“xxxxx需要配置xxxxx API Key” 15 | 模型服务和语音服务虽然可以都用openai的服务,但是也有大模型单独使用非openai的场景,因此这两块配置是分开的,除了大模型配置,请往配置下方找whisper配置填写对应的密钥等信息。 16 | 17 | ### 4. 报错内含“yt-dlp error” 18 | 视频下载器的问题,目前看来无非就是网络问题或者下载器版本问题,检查下网络代理有没有打开并且配置到配置文件的代理配置项,同时建议选择香港节点。下载器是本软件自动安装的,安装的源我会更新但毕竟不是官方源,所以可能会有落后,遇到问题尝试手动更新一下,更新方法: 19 | 20 | 在软件bin目录位置打开终端,执行 21 | ``` 22 | ./yt-dlp.exe -U 23 | ``` 24 | 此处`yt-dlp.exe`替换为你系统实际的ytdlp软件名称。 25 | 26 | ### 5. 部署后,字幕生成正常,但是合成的字幕嵌入视频里有很多乱码 27 | 多数是因为Linux缺失中文字体。请下载[微软雅黑](https://modelscope.cn/models/Maranello/KrillinAI_dependency_cn/resolve/master/%E5%AD%97%E4%BD%93/msyh.ttc)和[微软雅黑-bold](https://modelscope.cn/models/Maranello/KrillinAI_dependency_cn/resolve/master/%E5%AD%97%E4%BD%93/msyhbd.ttc)字体(或者自行选择满足你要求的字体),然后按下面的步骤操作: 28 | 1. 在/usr/share/fonts/下新建msyh文件夹并拷贝下载的字体到该目录内 29 | 2. 30 | ``` 31 | cd /usr/share/fonts/msyh 32 | sudo mkfontscale 33 | sudo mkfontdir 34 | fc-cache 35 | ``` 36 | 37 | ### 6. 语音合成的音色代码怎么填? 38 | 请参照语音服务提供商的文档,以下是本项目相关的: 39 | [OpenAI TTS文档](https://platform.openai.com/docs/guides/text-to-speech/api-reference), 位于Voice options 40 | [阿里云智能语音交互文档](https://help.aliyun.com/zh/isi/developer-reference/overview-of-speech-synthesis) ,位于音色列表-voice参数值 -------------------------------------------------------------------------------- /faq.md: -------------------------------------------------------------------------------- 1 | ### 1. Cannot find `app.log` configuration file, unable to know the error content 2 | Windows users please place the working directory of this software in a folder that is not on the C drive. 3 | 4 | ### 2. The configuration file was created for the non-desktop version, but it still reports "Configuration file not found" 5 | Ensure that the configuration file name is `config.toml`, not `config.toml.txt` or something else. After configuration, the structure of the working folder for this software should look like this: 6 | ``` 7 | /── config/ 8 | │ └── config.toml 9 | ├── cookies.txt (<- Optional cookies.txt file) 10 | └── krillinai.exe 11 | ``` 12 | 13 | ### 3. Filled in the large model configuration, but it reports "xxxxx requires configuration of xxxxx API Key" 14 | Although both the model service and voice service can use OpenAI's services, there are scenarios where the large model uses non-OpenAI services separately. Therefore, these two configurations are separate. In addition to the large model configuration, please look for the whisper configuration below to fill in the corresponding keys and other information. 15 | 16 | ### 4. Error contains "yt-dlp error" 17 | The issue with the video downloader seems to be either a network problem or a downloader version issue. Check if the network proxy is enabled and configured in the proxy configuration item of the configuration file, and it is recommended to choose a Hong Kong node. The downloader is automatically installed by this software; I will update the installation source, but since it is not the official source, it may be outdated. If you encounter issues, try updating it manually with the following method: 18 | 19 | Open a terminal in the software's bin directory and execute 20 | ``` 21 | ./yt-dlp.exe -U 22 | ``` 23 | Replace `yt-dlp.exe` with the actual name of the ytdlp software on your system. 24 | 25 | ### 5. After deployment, subtitles are generated normally, but the embedded subtitles in the video have a lot of garbled text 26 | Most of this is due to missing Chinese fonts on Linux. Please download the [Microsoft YaHei](https://modelscope.cn/models/Maranello/KrillinAI_dependency_cn/resolve/master/%E5%AD%97%E4%BD%93/msyh.ttc) and [Microsoft YaHei Bold](https://modelscope.cn/models/Maranello/KrillinAI_dependency_cn/resolve/master/%E5%AD%97%E4%BD%93/msyhbd.ttc) fonts (or choose fonts that meet your requirements), and then follow these steps: 27 | 1. Create a msyh folder under /usr/share/fonts/ and copy the downloaded fonts to this directory. 28 | 2. 29 | ``` 30 | cd /usr/share/fonts/msyh 31 | sudo mkfontscale 32 | sudo mkfontdir 33 | fc-cache 34 | ``` 35 | 36 | ### 6. How to fill in the voice synthesis tone code? 37 | Please refer to the documentation of the voice service provider. The following are related to this project: 38 | [OpenAI TTS Documentation](https://platform.openai.com/docs/guides/text-to-speech/api-reference), located in Voice options 39 | [Alibaba Cloud Intelligent Voice Interaction Documentation](https://help.aliyun.com/zh/isi/developer-reference/overview-of-speech-synthesis), located in Tone List - voice parameter values -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module krillin-ai 2 | 3 | go 1.22 4 | 5 | require ( 6 | fyne.io/fyne/v2 v2.5.4 7 | github.com/BurntSushi/toml v1.4.0 8 | github.com/aliyun/alibaba-cloud-sdk-go v1.63.72 9 | github.com/aliyun/alibabacloud-oss-go-sdk-v2 v1.1.3 10 | github.com/gin-gonic/gin v1.10.0 11 | github.com/go-resty/resty/v2 v2.7.0 12 | github.com/google/uuid v1.4.0 13 | github.com/gorilla/websocket v1.5.0 14 | github.com/samber/lo v1.38.1 15 | github.com/sashabaranov/go-openai v1.36.0 16 | go.uber.org/zap v1.25.0 17 | golang.org/x/sync v0.9.0 18 | ) 19 | 20 | require ( 21 | fyne.io/systray v1.11.0 // indirect 22 | github.com/aliyun/alibabacloud-nls-go-sdk v1.1.1 // indirect 23 | github.com/bytedance/sonic v1.11.6 // indirect 24 | github.com/bytedance/sonic/loader v0.1.1 // indirect 25 | github.com/cloudwego/base64x v0.1.4 // indirect 26 | github.com/cloudwego/iasm v0.2.0 // indirect 27 | github.com/davecgh/go-spew v1.1.1 // indirect 28 | github.com/fredbi/uri v1.1.0 // indirect 29 | github.com/fsnotify/fsnotify v1.7.0 // indirect 30 | github.com/fyne-io/gl-js v0.0.0-20220119005834-d2da28d9ccfe // indirect 31 | github.com/fyne-io/glfw-js v0.0.0-20241126112943-313d8a0fe1d0 // indirect 32 | github.com/fyne-io/image v0.0.0-20220602074514-4956b0afb3d2 // indirect 33 | github.com/gabriel-vasile/mimetype v1.4.3 // indirect 34 | github.com/gin-contrib/sse v0.1.0 // indirect 35 | github.com/go-gl/gl v0.0.0-20211210172815-726fda9656d6 // indirect 36 | github.com/go-gl/glfw/v3.3/glfw v0.0.0-20240506104042-037f3cc74f2a // indirect 37 | github.com/go-playground/locales v0.14.1 // indirect 38 | github.com/go-playground/universal-translator v0.18.1 // indirect 39 | github.com/go-playground/validator/v10 v10.20.0 // indirect 40 | github.com/go-text/render v0.2.0 // indirect 41 | github.com/go-text/typesetting v0.2.0 // indirect 42 | github.com/goccy/go-json v0.10.2 // indirect 43 | github.com/godbus/dbus/v5 v5.1.0 // indirect 44 | github.com/google/go-cmp v0.5.9 // indirect 45 | github.com/gopherjs/gopherjs v1.17.2 // indirect 46 | github.com/jeandeaual/go-locale v0.0.0-20240223122105-ce5225dcaa49 // indirect 47 | github.com/jmespath/go-jmespath v0.4.0 // indirect 48 | github.com/json-iterator/go v1.1.12 // indirect 49 | github.com/jsummers/gobmp v0.0.0-20151104160322-e2ba15ffa76e // indirect 50 | github.com/klauspost/cpuid/v2 v2.2.7 // indirect 51 | github.com/kr/pretty v0.3.1 // indirect 52 | github.com/leodido/go-urn v1.4.0 // indirect 53 | github.com/mattn/go-isatty v0.0.20 // indirect 54 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 55 | github.com/modern-go/reflect2 v1.0.2 // indirect 56 | github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 // indirect 57 | github.com/nicksnyder/go-i18n/v2 v2.4.0 // indirect 58 | github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b // indirect 59 | github.com/pelletier/go-toml/v2 v2.2.2 // indirect 60 | github.com/pmezard/go-difflib v1.0.0 // indirect 61 | github.com/rymdport/portal v0.3.0 // indirect 62 | github.com/satori/go.uuid v1.2.0 // indirect 63 | github.com/srwiley/oksvg v0.0.0-20221011165216-be6e8873101c // indirect 64 | github.com/srwiley/rasterx v0.0.0-20220730225603-2ab79fcdd4ef // indirect 65 | github.com/stretchr/testify v1.9.0 // indirect 66 | github.com/twitchyliquid64/golang-asm v0.15.1 // indirect 67 | github.com/ugorji/go/codec v1.2.12 // indirect 68 | github.com/yuin/goldmark v1.7.1 // indirect 69 | go.uber.org/atomic v1.10.0 // indirect 70 | go.uber.org/multierr v1.11.0 // indirect 71 | golang.org/x/arch v0.8.0 // indirect 72 | golang.org/x/crypto v0.23.0 // indirect 73 | golang.org/x/exp v0.0.0-20221031165847-c99f073a8326 // indirect 74 | golang.org/x/image v0.18.0 // indirect 75 | golang.org/x/mobile v0.0.0-20231127183840-76ac6878050a // indirect 76 | golang.org/x/net v0.25.0 // indirect 77 | golang.org/x/sys v0.20.0 // indirect 78 | golang.org/x/text v0.20.0 // indirect 79 | golang.org/x/time v0.4.0 // indirect 80 | google.golang.org/protobuf v1.34.1 // indirect 81 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect 82 | gopkg.in/ini.v1 v1.67.0 // indirect 83 | gopkg.in/yaml.v3 v3.0.1 // indirect 84 | ) 85 | -------------------------------------------------------------------------------- /internal/api/subtitle.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "path/filepath" 7 | "time" 8 | ) 9 | 10 | // WordReplacement 词语替换 11 | type WordReplacement struct { 12 | From string `json:"from"` 13 | To string `json:"to"` 14 | } 15 | 16 | // SubtitleTask 字幕任务 17 | type SubtitleTask struct { 18 | URL string `json:"url"` // 视频URL 19 | Language string `json:"language"` // 界面语言 20 | OriginLang string `json:"origin_lang"` // 源语言 21 | TargetLang string `json:"target_lang"` // 目标语言 22 | Bilingual int `json:"bilingual"` // 是否双语 1:是 2:否 23 | TranslationSubtitlePos int `json:"translation_subtitle_pos"` // 翻译字幕位置 1:上方 2:下方 24 | TTS int `json:"tts"` // 是否配音 1:是 2:否 25 | TTSVoiceCode string `json:"tts_voice_code,omitempty"` // 配音声音代码 26 | TTSVoiceCloneSrcFileURL string `json:"tts_voice_clone_src_file_url,omitempty"` // 音色克隆源文件URL 27 | ModalFilter int `json:"modal_filter"` // 是否过滤语气词 1:是 2:否 28 | Replace []string `json:"replace,omitempty"` // 词汇替换列表 29 | EmbedSubtitleVideoType string `json:"embed_subtitle_video_type"` // 字幕嵌入视频类型 none:不嵌入 horizontal:横屏 vertical:竖屏 all:全部 30 | VerticalMajorTitle string `json:"vertical_major_title,omitempty"` // 竖屏主标题 31 | VerticalMinorTitle string `json:"vertical_minor_title,omitempty"` // 竖屏副标题 32 | } 33 | 34 | // SubtitleResult 字幕结果 35 | type SubtitleResult struct { 36 | Name string `json:"name"` // 文件名 37 | DownloadURL string `json:"download_url"` // 下载URL 38 | } 39 | 40 | // TaskStatus 任务状态 41 | type TaskStatus struct { 42 | TaskId string `json:"task_id"` // 任务ID 43 | ProcessPercent int `json:"process_percent"` // 处理进度百分比 44 | Status string `json:"status"` // 任务状态 45 | Message string `json:"message"` // 状态消息 46 | SubtitleInfo []SubtitleResult `json:"subtitle_info"` // 字幕信息 47 | SpeechDownloadURL string `json:"speech_download_url"` // 配音下载URL 48 | } 49 | 50 | // CreateSubtitleTask 创建字幕任务 51 | func CreateSubtitleTask(task *SubtitleTask) (*TaskStatus, error) { 52 | // 生成任务ID 53 | taskId := generateTaskId() 54 | 55 | // 创建任务目录 56 | taskDir := filepath.Join("tasks", taskId) 57 | if err := createTaskDirectory(taskDir); err != nil { 58 | return nil, fmt.Errorf("创建任务目录失败: %v", err) 59 | } 60 | 61 | // 启动异步任务处理 62 | go processTask(taskId, task) 63 | 64 | return &TaskStatus{ 65 | TaskId: taskId, 66 | ProcessPercent: 0, 67 | Status: "created", 68 | Message: "任务已创建", 69 | }, nil 70 | } 71 | 72 | // GetSubtitleTaskStatus 获取任务状态 73 | func GetSubtitleTaskStatus(taskId string) (*TaskStatus, error) { 74 | // 获取任务状态 75 | status, err := getTaskStatus(taskId) 76 | if err != nil { 77 | return nil, fmt.Errorf("获取任务状态失败: %v", err) 78 | } 79 | 80 | // 如果任务完成,添加下载链接 81 | if status.ProcessPercent >= 100 { 82 | status.SubtitleInfo = []SubtitleResult{ 83 | { 84 | Name: "字幕.srt", 85 | DownloadURL: fmt.Sprintf("/tasks/%s/output/subtitle.srt", taskId), 86 | }, 87 | { 88 | Name: "字幕.ass", 89 | DownloadURL: fmt.Sprintf("/tasks/%s/output/subtitle.ass", taskId), 90 | }, 91 | } 92 | 93 | // 如果启用了配音,添加配音下载链接 94 | if status.SpeechDownloadURL == "" { 95 | status.SpeechDownloadURL = fmt.Sprintf("/tasks/%s/output/speech.mp3", taskId) 96 | } 97 | } 98 | 99 | return status, nil 100 | } 101 | 102 | // 以下是辅助函数,需要在实际使用时实现 103 | func generateTaskId() string { 104 | // TODO: 实现任务ID生成逻辑 105 | return "task-" + time.Now().Format("20060102150405") 106 | } 107 | 108 | func createTaskDirectory(taskDir string) error { 109 | // TODO: 实现任务目录创建逻辑 110 | return os.MkdirAll(taskDir, 0755) 111 | } 112 | 113 | func processTask(taskId string, task *SubtitleTask) { 114 | // TODO: 实现任务处理逻辑 115 | // 1. 下载视频 116 | // 2. 提取音频 117 | // 3. 语音识别 118 | // 4. 翻译字幕 119 | // 5. 生成字幕文件 120 | // 6. 如果需要,生成配音 121 | // 7. 如果需要,嵌入字幕到视频 122 | // 8. 更新任务状态 123 | } 124 | 125 | func getTaskStatus(taskId string) (*TaskStatus, error) { 126 | // TODO: 实现任务状态获取逻辑 127 | return &TaskStatus{ 128 | TaskId: taskId, 129 | ProcessPercent: 50, 130 | Status: "processing", 131 | Message: "正在处理中", 132 | }, nil 133 | } 134 | -------------------------------------------------------------------------------- /internal/desktop/desktop.go: -------------------------------------------------------------------------------- 1 | package desktop 2 | 3 | import ( 4 | "fmt" 5 | "image/color" 6 | "krillin-ai/config" 7 | "krillin-ai/log" 8 | "time" 9 | 10 | "fyne.io/fyne/v2" 11 | "fyne.io/fyne/v2/app" 12 | "fyne.io/fyne/v2/canvas" 13 | "fyne.io/fyne/v2/container" 14 | "fyne.io/fyne/v2/dialog" 15 | "fyne.io/fyne/v2/layout" 16 | "fyne.io/fyne/v2/theme" 17 | "fyne.io/fyne/v2/widget" 18 | "go.uber.org/zap" 19 | ) 20 | 21 | func createNavButton(text string, icon fyne.Resource, isSelected bool, onTap func()) *widget.Button { 22 | btn := widget.NewButtonWithIcon(text, icon, onTap) 23 | 24 | // 根据选中状态设置颜色 25 | if isSelected { 26 | btn.Importance = widget.HighImportance 27 | } else { 28 | btn.Importance = widget.LowImportance 29 | } 30 | 31 | return btn 32 | } 33 | 34 | // Show 展示桌面 35 | func Show() { 36 | myApp := app.New() 37 | 38 | // 自定义主题 39 | myApp.Settings().SetTheme(NewCustomTheme(false)) 40 | 41 | myWindow := myApp.NewWindow("Krillin AI") 42 | 43 | logoContainer := container.NewVBox() 44 | 45 | logo := canvas.NewText("Krillin AI", color.NRGBA{R: 88, G: 157, B: 246, A: 255}) 46 | logo.TextSize = 28 47 | logo.TextStyle = fyne.TextStyle{Bold: true} 48 | logo.Alignment = fyne.TextAlignCenter 49 | 50 | separator := canvas.NewRectangle(color.NRGBA{R: 210, G: 225, B: 245, A: 255}) 51 | separator.SetMinSize(fyne.NewSize(0, 2)) 52 | 53 | slogan := canvas.NewText("智能内容创作助手", color.NRGBA{R: 100, G: 120, B: 160, A: 255}) 54 | slogan.TextSize = 12 55 | slogan.Alignment = fyne.TextAlignCenter 56 | 57 | logoContainer.Add(logo) 58 | logoContainer.Add(separator) 59 | logoContainer.Add(slogan) 60 | 61 | // 创建左侧导航栏 62 | navItems := []string{"工作台 Workbench", "配置 Config"} 63 | navIcons := []fyne.Resource{theme.DocumentIcon(), theme.SettingsIcon()} 64 | 65 | // 存储导航按钮列表 66 | var navButtons []*widget.Button 67 | navContainer := container.NewVBox() 68 | 69 | // 创建内容区域,使用Stack容器来叠放多个内容 70 | contentStack := container.NewStack() 71 | 72 | // 预先创建两个tab的内容 73 | workbenchContent := CreateSubtitleTab(myWindow) 74 | configContent := CreateConfigTab(myWindow) 75 | 76 | // 默认显示工作台内容 77 | contentStack.Add(workbenchContent) 78 | contentStack.Add(configContent) 79 | 80 | configContent.Hide() 81 | 82 | currentSelectedIndex := 0 83 | 84 | // 创建导航项 85 | for i, item := range navItems { 86 | index := i // 捕获变量 87 | isSelected := i == currentSelectedIndex 88 | 89 | // 创建导航按钮以及点击处理函数 90 | navBtn := createNavButton(item, navIcons[i], isSelected, func() { 91 | // 如果已经是当前选中项,不做任何操作 92 | if currentSelectedIndex == index { 93 | return 94 | } 95 | 96 | // 更新所有导航项的状态 97 | for j, btn := range navButtons { 98 | if j == index { 99 | btn.Importance = widget.HighImportance 100 | } else { 101 | btn.Importance = widget.LowImportance 102 | } 103 | } 104 | 105 | if index == 0 { 106 | // tab切换出去就保存和重新加载配置 107 | err := config.SaveConfig() 108 | if err != nil { 109 | // 保存配置失败,弹出提示框 110 | dialog.ShowError(fmt.Errorf("保存配置失败: %v", err), myWindow) 111 | log.GetLogger().Error("保存配置失败 Failed to save config", zap.Error(err)) 112 | return 113 | } 114 | log.GetLogger().Info("配置已保存 Config saved successfully") 115 | workbenchContent.Show() 116 | configContent.Hide() 117 | // 确保进度条和下载区域状态正确显示 118 | workbenchContent.Refresh() 119 | FadeAnimation(workbenchContent, 300*time.Millisecond, 0.0, 1.0) 120 | } else { 121 | workbenchContent.Hide() 122 | configContent.Show() 123 | FadeAnimation(configContent, 300*time.Millisecond, 0.0, 1.0) 124 | } 125 | 126 | // 更新当前选中的索引 127 | currentSelectedIndex = index 128 | navContainer.Refresh() 129 | contentStack.Refresh() 130 | }) 131 | 132 | // 将导航按钮添加到列表和容器中 133 | navButtons = append(navButtons, navBtn) 134 | navContainer.Add(container.NewPadded(navBtn)) 135 | } 136 | 137 | navBackground := canvas.NewRectangle(color.NRGBA{R: 250, G: 251, B: 254, A: 255}) 138 | 139 | navWithBackground := container.NewStack( 140 | navBackground, 141 | container.NewBorder( 142 | container.NewPadded(logoContainer), 143 | nil, nil, nil, 144 | container.NewPadded(navContainer), 145 | ), 146 | ) 147 | 148 | // 主布局 149 | split := container.NewHSplit(navWithBackground, container.NewPadded(contentStack)) 150 | split.SetOffset(0.2) 151 | 152 | mainContainer := container.NewPadded(split) 153 | 154 | // 底部状态栏 155 | statusText := canvas.NewText("就绪", color.NRGBA{R: 100, G: 120, B: 160, A: 180}) 156 | statusText.TextSize = 12 157 | statusBar := container.NewHBox( 158 | layout.NewSpacer(), 159 | statusText, 160 | ) 161 | 162 | finalContainer := container.NewBorder(nil, container.NewPadded(statusBar), nil, nil, mainContainer) 163 | 164 | myWindow.SetContent(finalContainer) 165 | myWindow.Resize(fyne.NewSize(1000, 700)) 166 | myWindow.CenterOnScreen() 167 | myWindow.ShowAndRun() 168 | 169 | // 关闭窗口时保存配置 170 | err := config.SaveConfig() 171 | if err != nil { 172 | log.GetLogger().Error("保存配置失败 Failed to save config", zap.Error(err)) 173 | return 174 | } 175 | log.GetLogger().Info("配置已保存 Config saved successfully") 176 | } 177 | -------------------------------------------------------------------------------- /internal/desktop/file.go: -------------------------------------------------------------------------------- 1 | package desktop 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "fmt" 7 | "io" 8 | "mime/multipart" 9 | "net/http" 10 | "os" 11 | "path/filepath" 12 | 13 | "fyne.io/fyne/v2" 14 | "fyne.io/fyne/v2/dialog" 15 | "fyne.io/fyne/v2/storage" 16 | ) 17 | 18 | type FileManager struct { 19 | window fyne.Window 20 | files []string 21 | selectedFiles []string // 多文件选择 22 | } 23 | 24 | func NewFileManager(window fyne.Window) *FileManager { 25 | return &FileManager{ 26 | window: window, 27 | files: make([]string, 0), 28 | selectedFiles: make([]string, 0), 29 | } 30 | } 31 | 32 | func (fm *FileManager) ShowUploadDialog() { 33 | fd := dialog.NewFileOpen(func(reader fyne.URIReadCloser, err error) { 34 | if err != nil { 35 | dialog.ShowError(err, fm.window) 36 | return 37 | } 38 | if reader == nil { 39 | return 40 | } 41 | 42 | // 获取文件路径 43 | filePath := reader.URI().Path() 44 | fileName := filepath.Base(filePath) 45 | 46 | err = fm.uploadFile(filePath, fileName) 47 | if err != nil { 48 | dialog.ShowError(err, fm.window) 49 | return 50 | } 51 | 52 | dialog.ShowInformation("成功", "文件上传成功", fm.window) 53 | }, fm.window) 54 | 55 | fd.Show() 56 | } 57 | 58 | func (fm *FileManager) ShowMultiUploadDialog() { 59 | // 清空已选择的文件 60 | fm.selectedFiles = make([]string, 0) 61 | 62 | fm.showAddFileDialog() 63 | } 64 | 65 | func (fm *FileManager) showAddFileDialog() { 66 | fd := dialog.NewFileOpen(func(reader fyne.URIReadCloser, err error) { 67 | if err != nil { 68 | dialog.ShowError(err, fm.window) 69 | return 70 | } 71 | if reader == nil { 72 | return 73 | } 74 | 75 | // 获取文件路径 76 | filePath := reader.URI().Path() 77 | fm.selectedFiles = append(fm.selectedFiles, filePath) 78 | reader.Close() 79 | 80 | // 是否继续添加文件 81 | continueDialog := dialog.NewConfirm( 82 | "添加更多文件", 83 | fmt.Sprintf("已选择 %d 个文件。是否继续添加?", len(fm.selectedFiles)), 84 | func(cont bool) { 85 | if cont { 86 | // 继续添加文件 87 | fm.showAddFileDialog() 88 | } else { 89 | // 上传 90 | if len(fm.selectedFiles) > 0 { 91 | // 文件名列表 92 | fileNames := make([]string, len(fm.selectedFiles)) 93 | for i, path := range fm.selectedFiles { 94 | fileNames[i] = filepath.Base(path) 95 | } 96 | 97 | // 上传 98 | err := fm.uploadMultipleFiles(fm.selectedFiles, fileNames) 99 | if err != nil { 100 | dialog.ShowError(err, fm.window) 101 | return 102 | } 103 | 104 | dialog.ShowInformation("成功", fmt.Sprintf("已上传 %d 个文件", len(fm.selectedFiles)), fm.window) 105 | } 106 | } 107 | }, 108 | fm.window, 109 | ) 110 | continueDialog.Show() 111 | }, fm.window) 112 | 113 | fd.SetFilter(storage.NewExtensionFileFilter([]string{".mp4", ".mov", ".avi", ".mkv", ".wmv"})) 114 | fd.Show() 115 | } 116 | 117 | func (fm *FileManager) uploadFile(filePath, fileName string) error { 118 | file, err := os.Open(filePath) 119 | if err != nil { 120 | return err 121 | } 122 | defer file.Close() 123 | 124 | // 创建multipart form 125 | body := &bytes.Buffer{} 126 | writer := multipart.NewWriter(body) 127 | part, err := writer.CreateFormFile("file", fileName) 128 | if err != nil { 129 | return err 130 | } 131 | _, err = io.Copy(part, file) 132 | if err != nil { 133 | return err 134 | } 135 | writer.Close() 136 | 137 | // 发送请求 138 | resp, err := http.Post("http://localhost:8888/api/file", writer.FormDataContentType(), body) 139 | if err != nil { 140 | return err 141 | } 142 | defer resp.Body.Close() 143 | 144 | var result struct { 145 | Error int `json:"error"` 146 | Msg string `json:"msg"` 147 | Data struct { 148 | FilePath string `json:"file_path"` 149 | } `json:"data"` 150 | } 151 | 152 | err = json.NewDecoder(resp.Body).Decode(&result) 153 | if err != nil { 154 | return err 155 | } 156 | 157 | if result.Error != 0 && result.Error != 200 { 158 | return fmt.Errorf(result.Msg) 159 | } 160 | 161 | fm.files = append(fm.files, result.Data.FilePath) 162 | return nil 163 | } 164 | 165 | func (fm *FileManager) uploadMultipleFiles(filePaths []string, fileNames []string) error { 166 | // 显示上传进度对话框 167 | progress := dialog.NewProgress("上传中", "正在上传文件...", fm.window) 168 | progress.Show() 169 | defer progress.Hide() 170 | 171 | body := &bytes.Buffer{} 172 | writer := multipart.NewWriter(body) 173 | 174 | for i, filePath := range filePaths { 175 | file, err := os.Open(filePath) 176 | if err != nil { 177 | return err 178 | } 179 | 180 | part, err := writer.CreateFormFile("file", fileNames[i]) 181 | if err != nil { 182 | file.Close() 183 | return err 184 | } 185 | 186 | _, err = io.Copy(part, file) 187 | file.Close() 188 | if err != nil { 189 | return err 190 | } 191 | 192 | progress.SetValue(float64(i+1) / float64(len(filePaths))) 193 | } 194 | writer.Close() 195 | 196 | resp, err := http.Post("http://localhost:8888/api/file", writer.FormDataContentType(), body) 197 | if err != nil { 198 | return err 199 | } 200 | defer resp.Body.Close() 201 | 202 | var result struct { 203 | Error int `json:"error"` 204 | Msg string `json:"msg"` 205 | Data struct { 206 | FilePath []string `json:"file_path"` 207 | } `json:"data"` 208 | } 209 | 210 | err = json.NewDecoder(resp.Body).Decode(&result) 211 | if err != nil { 212 | return err 213 | } 214 | 215 | if result.Error != 0 && result.Error != 200 { 216 | return fmt.Errorf(result.Msg) 217 | } 218 | 219 | fm.files = append(fm.files, result.Data.FilePath...) 220 | return nil 221 | } 222 | 223 | func (fm *FileManager) GetFileCount() int { 224 | return len(fm.files) 225 | } 226 | 227 | func (fm *FileManager) GetFileName(index int) string { 228 | if index < 0 || index >= len(fm.files) { 229 | return "" 230 | } 231 | return filepath.Base(fm.files[index]) 232 | } 233 | 234 | func (fm *FileManager) DownloadFile(index int) { 235 | if index < 0 || index >= len(fm.files) { 236 | return 237 | } 238 | 239 | filePath := fm.files[index] 240 | 241 | dialog.ShowFileSave(func(writer fyne.URIWriteCloser, err error) { 242 | if err != nil { 243 | dialog.ShowError(err, fm.window) 244 | return 245 | } 246 | if writer == nil { 247 | return 248 | } 249 | 250 | resp, err := http.Get("http://localhost:8888" + filePath) 251 | if err != nil { 252 | dialog.ShowError(err, fm.window) 253 | return 254 | } 255 | defer resp.Body.Close() 256 | 257 | _, err = io.Copy(writer, resp.Body) 258 | if err != nil { 259 | dialog.ShowError(err, fm.window) 260 | return 261 | } 262 | 263 | writer.Close() 264 | dialog.ShowInformation("成功", "文件下载完成", fm.window) 265 | }, fm.window) 266 | } 267 | -------------------------------------------------------------------------------- /internal/desktop/theme.go: -------------------------------------------------------------------------------- 1 | package desktop 2 | 3 | import ( 4 | "image/color" 5 | 6 | "fyne.io/fyne/v2" 7 | "fyne.io/fyne/v2/theme" 8 | ) 9 | 10 | // customTheme 自定义主题 11 | type customTheme struct { 12 | baseTheme fyne.Theme 13 | forceDark bool 14 | } 15 | 16 | func NewCustomTheme(forceDark bool) fyne.Theme { 17 | if forceDark { 18 | return &customTheme{baseTheme: theme.DefaultTheme(), forceDark: true} 19 | } 20 | return &customTheme{baseTheme: theme.DefaultTheme(), forceDark: false} 21 | } 22 | 23 | func (t *customTheme) Color(name fyne.ThemeColorName, variant fyne.ThemeVariant) color.Color { 24 | if t.forceDark || variant == theme.VariantDark { 25 | return t.darkColors(name) 26 | } 27 | return t.lightColors(name) 28 | } 29 | 30 | // lightColors 浅色主题配色方案 31 | func (t *customTheme) lightColors(name fyne.ThemeColorName) color.Color { 32 | switch name { 33 | // 主色系 34 | case theme.ColorNamePrimary: 35 | return color.NRGBA{R: 100, G: 150, B: 240, A: 255} 36 | 37 | // 背景与前景 38 | case theme.ColorNameBackground: 39 | return color.NRGBA{R: 248, G: 249, B: 252, A: 255} // 极浅灰背景 40 | case theme.ColorNameForeground: 41 | return color.NRGBA{R: 30, G: 35, B: 45, A: 255} // 深灰文字 42 | case theme.ColorNameDisabled: 43 | return color.NRGBA{R: 180, G: 185, B: 190, A: 150} // 柔和禁用色 44 | 45 | // 按钮状态 46 | case theme.ColorNameButton: 47 | return color.NRGBA{R: 70, G: 130, B: 230, A: 255} 48 | case theme.ColorNameHover: 49 | return color.NRGBA{R: 90, G: 150, B: 240, A: 255} // 浅蓝悬停 50 | case theme.ColorNamePressed: 51 | return color.NRGBA{R: 50, G: 110, B: 210, A: 255} // 深蓝按下 52 | 53 | // 输入组件 54 | case theme.ColorNameInputBackground: 55 | return color.NRGBA{R: 255, G: 255, B: 255, A: 255} // 纯白输入框 56 | case theme.ColorNameInputBorder: 57 | return color.NRGBA{R: 210, G: 215, B: 220, A: 255} // 浅灰边框 58 | case theme.ColorNamePlaceHolder: 59 | return color.NRGBA{R: 160, G: 165, B: 170, A: 200} // 灰占位符 60 | 61 | // 其他 62 | case theme.ColorNameSelection: 63 | return color.NRGBA{R: 200, G: 225, B: 255, A: 180} // 淡蓝选中 64 | case theme.ColorNameScrollBar: 65 | return color.NRGBA{R: 200, G: 205, B: 210, A: 200} 66 | case theme.ColorNameShadow: 67 | return color.NRGBA{R: 0, G: 0, B: 0, A: 25} // 柔和阴影 68 | 69 | // 状态色 70 | case theme.ColorNameError: 71 | return color.NRGBA{R: 230, G: 70, B: 70, A: 255} // 红色错误 72 | case theme.ColorNameWarning: 73 | return color.NRGBA{R: 245, G: 160, B: 50, A: 255} // 橙色警告 74 | case theme.ColorNameSuccess: 75 | return color.NRGBA{R: 60, G: 180, B: 120, A: 255} // 绿色成功 76 | case theme.ColorNameFocus: 77 | return color.NRGBA{R: 70, G: 130, B: 230, A: 100} // 半透明焦点 78 | 79 | default: 80 | return t.baseTheme.Color(name, theme.VariantLight) 81 | } 82 | } 83 | 84 | // darkColors 深色主题配色方案 85 | func (t *customTheme) darkColors(name fyne.ThemeColorName) color.Color { 86 | switch name { 87 | // 主色系 88 | case theme.ColorNamePrimary: 89 | return color.NRGBA{R: 90, G: 150, B: 250, A: 255} // 稍亮的蓝色 90 | 91 | // 背景与前景 92 | case theme.ColorNameBackground: 93 | return color.NRGBA{R: 20, G: 22, B: 30, A: 255} // 更深的灰蓝背景 94 | case theme.ColorNameForeground: 95 | return color.NRGBA{R: 230, G: 235, B: 240, A: 255} // 浅灰文字 96 | case theme.ColorNameDisabled: 97 | return color.NRGBA{R: 100, G: 105, B: 110, A: 150} // 深色禁用 98 | 99 | // 按钮状态 100 | case theme.ColorNameButton: 101 | return color.NRGBA{R: 50, G: 55, B: 65, A: 255} // 更深的按钮背景 102 | case theme.ColorNameHover: 103 | return color.NRGBA{R: 70, G: 75, B: 85, A: 255} // 浅灰悬停 104 | case theme.ColorNamePressed: 105 | return color.NRGBA{R: 30, G: 35, B: 45, A: 255} // 更深按下 106 | 107 | // 输入组件 108 | case theme.ColorNameInputBackground: 109 | return color.NRGBA{R: 35, G: 38, B: 48, A: 255} // 更深的输入框背景 110 | case theme.ColorNameInputBorder: 111 | return color.NRGBA{R: 60, G: 65, B: 75, A: 255} // 更深的边框 112 | case theme.ColorNamePlaceHolder: 113 | return color.NRGBA{R: 120, G: 125, B: 130, A: 200} // 灰占位符 114 | 115 | // 其他 116 | case theme.ColorNameSelection: 117 | return color.NRGBA{R: 70, G: 130, B: 230, A: 180} // 蓝色选中 118 | case theme.ColorNameScrollBar: 119 | return color.NRGBA{R: 60, G: 65, B: 75, A: 200} // 更深的滚动条 120 | case theme.ColorNameShadow: 121 | return color.NRGBA{R: 0, G: 0, B: 0, A: 50} // 深色阴影 122 | 123 | // 状态色(更鲜艳) 124 | case theme.ColorNameError: 125 | return color.NRGBA{R: 240, G: 80, B: 80, A: 255} 126 | case theme.ColorNameWarning: 127 | return color.NRGBA{R: 255, G: 170, B: 60, A: 255} 128 | case theme.ColorNameSuccess: 129 | return color.NRGBA{R: 70, G: 190, B: 130, A: 255} 130 | case theme.ColorNameFocus: 131 | return color.NRGBA{R: 80, G: 140, B: 240, A: 100} 132 | 133 | default: 134 | return t.baseTheme.Color(name, theme.VariantDark) 135 | } 136 | } 137 | 138 | // Icon 主题图标 139 | func (t *customTheme) Icon(name fyne.ThemeIconName) fyne.Resource { 140 | return t.baseTheme.Icon(name) 141 | } 142 | 143 | // Font 主题字体 144 | func (t *customTheme) Font(style fyne.TextStyle) fyne.Resource { 145 | return t.baseTheme.Font(style) 146 | } 147 | 148 | // Size 主题尺寸设置 149 | func (t *customTheme) Size(name fyne.ThemeSizeName) float32 { 150 | switch name { 151 | case theme.SizeNamePadding: 152 | return 10 153 | case theme.SizeNameInlineIcon: 154 | return 20 155 | case theme.SizeNameScrollBar: 156 | return 10 157 | case theme.SizeNameScrollBarSmall: 158 | return 4 159 | case theme.SizeNameSeparatorThickness: 160 | return 1 161 | case theme.SizeNameText: 162 | return 14 163 | case theme.SizeNameInputBorder: 164 | return 1.5 165 | case theme.SizeNameInputRadius: 166 | return 5 167 | default: 168 | return t.baseTheme.Size(name) 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /internal/dto/subtitle_task.go: -------------------------------------------------------------------------------- 1 | package dto 2 | 3 | type StartVideoSubtitleTaskReq struct { 4 | AppId uint32 `json:"app_id"` 5 | Url string `json:"url"` 6 | OriginLanguage string `json:"origin_lang"` 7 | TargetLang string `json:"target_lang"` 8 | Bilingual uint8 `json:"bilingual"` 9 | TranslationSubtitlePos uint8 `json:"translation_subtitle_pos"` 10 | ModalFilter uint8 `json:"modal_filter"` 11 | Tts uint8 `json:"tts"` 12 | TtsVoiceCode string `json:"tts_voice_code"` 13 | TtsVoiceCloneSrcFileUrl string `json:"tts_voice_clone_src_file_url"` 14 | Replace []string `json:"replace"` 15 | Language string `json:"language"` 16 | EmbedSubtitleVideoType string `json:"embed_subtitle_video_type"` 17 | VerticalMajorTitle string `json:"vertical_major_title"` 18 | VerticalMinorTitle string `json:"vertical_minor_title"` 19 | OriginLanguageWordOneLine int `json:"origin_language_word_one_line"` 20 | } 21 | 22 | type StartVideoSubtitleTaskResData struct { 23 | TaskId string `json:"task_id"` 24 | } 25 | 26 | type StartVideoSubtitleTaskRes struct { 27 | Error int32 `json:"error"` 28 | Msg string `json:"msg"` 29 | Data *StartVideoSubtitleTaskResData `json:"data"` 30 | } 31 | 32 | type GetVideoSubtitleTaskReq struct { 33 | TaskId string `form:"taskId"` 34 | } 35 | 36 | type VideoInfo struct { 37 | Title string `json:"title"` 38 | Description string `json:"description"` 39 | TranslatedTitle string `json:"translated_title"` 40 | TranslatedDescription string `json:"translated_description"` 41 | Language string `json:"language"` 42 | } 43 | 44 | type SubtitleInfo struct { 45 | Name string `json:"name"` 46 | DownloadUrl string `json:"download_url"` 47 | } 48 | 49 | type GetVideoSubtitleTaskResData struct { 50 | TaskId string `json:"task_id"` 51 | ProcessPercent uint8 `json:"process_percent"` 52 | VideoInfo *VideoInfo `json:"video_info"` 53 | SubtitleInfo []*SubtitleInfo `json:"subtitle_info"` 54 | TargetLanguage string `json:"target_language"` 55 | SpeechDownloadUrl string `json:"speech_download_url"` 56 | } 57 | 58 | type GetVideoSubtitleTaskRes struct { 59 | Error int32 `json:"error"` 60 | Msg string `json:"msg"` 61 | Data *GetVideoSubtitleTaskResData `json:"data"` 62 | } 63 | -------------------------------------------------------------------------------- /internal/handler/init.go: -------------------------------------------------------------------------------- 1 | package handler 2 | 3 | import "krillin-ai/internal/service" 4 | 5 | type Handler struct { 6 | Service *service.Service 7 | } 8 | 9 | func NewHandler() *Handler { 10 | return &Handler{ 11 | Service: service.NewService(), 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /internal/handler/middleware.go: -------------------------------------------------------------------------------- 1 | package handler 2 | -------------------------------------------------------------------------------- /internal/handler/subtitle_task.go: -------------------------------------------------------------------------------- 1 | package handler 2 | 3 | import ( 4 | "github.com/gin-gonic/gin" 5 | "go.uber.org/zap" 6 | "krillin-ai/internal/dto" 7 | "krillin-ai/internal/response" 8 | "krillin-ai/log" 9 | "os" 10 | "path/filepath" 11 | ) 12 | 13 | func (h Handler) StartSubtitleTask(c *gin.Context) { 14 | var req dto.StartVideoSubtitleTaskReq 15 | if err := c.ShouldBindJSON(&req); err != nil { 16 | log.GetLogger().Error("StartSubtitleTask ShouldBindJSON err", zap.Error(err)) 17 | response.R(c, response.Response{ 18 | Error: -1, 19 | Msg: "参数错误", 20 | Data: nil, 21 | }) 22 | return 23 | } 24 | 25 | svc := h.Service 26 | 27 | data, err := svc.StartSubtitleTask(req) 28 | if err != nil { 29 | response.R(c, response.Response{ 30 | Error: -1, 31 | Msg: err.Error(), 32 | Data: nil, 33 | }) 34 | return 35 | } 36 | response.R(c, response.Response{ 37 | Error: 0, 38 | Msg: "成功", 39 | Data: data, 40 | }) 41 | } 42 | 43 | func (h Handler) GetSubtitleTask(c *gin.Context) { 44 | var req dto.GetVideoSubtitleTaskReq 45 | if err := c.ShouldBindQuery(&req); err != nil { 46 | response.R(c, response.Response{ 47 | Error: -1, 48 | Msg: "参数错误", 49 | Data: nil, 50 | }) 51 | return 52 | } 53 | svc := h.Service 54 | data, err := svc.GetTaskStatus(req) 55 | if err != nil { 56 | response.R(c, response.Response{ 57 | Error: -1, 58 | Msg: err.Error(), 59 | Data: nil, 60 | }) 61 | return 62 | } 63 | response.R(c, response.Response{ 64 | Error: 0, 65 | Msg: "成功", 66 | Data: data, 67 | }) 68 | } 69 | 70 | func (h Handler) UploadFile(c *gin.Context) { 71 | form, err := c.MultipartForm() 72 | if err != nil { 73 | response.R(c, response.Response{ 74 | Error: -1, 75 | Msg: "未能获取文件", 76 | Data: nil, 77 | }) 78 | return 79 | } 80 | 81 | files := form.File["file"] 82 | if len(files) == 0 { 83 | response.R(c, response.Response{ 84 | Error: -1, 85 | Msg: "未上传任何文件", 86 | Data: nil, 87 | }) 88 | return 89 | } 90 | 91 | // 保存每个文件 92 | var savedFiles []string 93 | for _, file := range files { 94 | savePath := "./uploads/" + file.Filename 95 | if err := c.SaveUploadedFile(file, savePath); err != nil { 96 | response.R(c, response.Response{ 97 | Error: -1, 98 | Msg: "文件保存失败: " + file.Filename, 99 | Data: nil, 100 | }) 101 | return 102 | } 103 | savedFiles = append(savedFiles, "local:"+savePath) 104 | } 105 | 106 | response.R(c, response.Response{ 107 | Error: 0, 108 | Msg: "文件上传成功", 109 | Data: gin.H{"file_path": savedFiles}, 110 | }) 111 | } 112 | 113 | func (h Handler) DownloadFile(c *gin.Context) { 114 | requestedFile := c.Param("filepath") 115 | if requestedFile == "" { 116 | response.R(c, response.Response{ 117 | Error: -1, 118 | Msg: "文件路径为空", 119 | Data: nil, 120 | }) 121 | return 122 | } 123 | 124 | localFilePath := filepath.Join(".", requestedFile) 125 | if _, err := os.Stat(localFilePath); os.IsNotExist(err) { 126 | response.R(c, response.Response{ 127 | Error: -1, 128 | Msg: "文件不存在", 129 | Data: nil, 130 | }) 131 | return 132 | } 133 | c.FileAttachment(localFilePath, filepath.Base(localFilePath)) 134 | } 135 | -------------------------------------------------------------------------------- /internal/response/response.go: -------------------------------------------------------------------------------- 1 | package response 2 | 3 | import "github.com/gin-gonic/gin" 4 | 5 | type Response struct { 6 | Error int32 `json:"error"` 7 | Msg string `json:"msg"` 8 | Data any `json:"data"` 9 | } 10 | 11 | func R(c *gin.Context, data any) { 12 | c.JSON(200, data) 13 | } 14 | -------------------------------------------------------------------------------- /internal/router/router.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "krillin-ai/internal/handler" 5 | "krillin-ai/static" 6 | "net/http" 7 | 8 | "github.com/gin-gonic/gin" 9 | ) 10 | 11 | func SetupRouter(r *gin.Engine) { 12 | api := r.Group("/api") 13 | 14 | hdl := handler.NewHandler() 15 | { 16 | api.POST("/capability/subtitleTask", hdl.StartSubtitleTask) 17 | api.GET("/capability/subtitleTask", hdl.GetSubtitleTask) 18 | api.POST("/file", hdl.UploadFile) 19 | api.GET("/file/*filepath", hdl.DownloadFile) 20 | } 21 | 22 | r.GET("/", func(c *gin.Context) { 23 | c.Redirect(http.StatusMovedPermanently, "/static") 24 | }) 25 | r.StaticFS("/static", http.FS(static.EmbeddedFiles)) 26 | } 27 | -------------------------------------------------------------------------------- /internal/server/server.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "krillin-ai/config" 7 | "krillin-ai/internal/router" 8 | "krillin-ai/log" 9 | "net/http" 10 | 11 | "github.com/gin-gonic/gin" 12 | "go.uber.org/zap" 13 | ) 14 | 15 | var BackEnd *http.Server 16 | 17 | func StartBackend() error { 18 | gin.SetMode(gin.ReleaseMode) 19 | engine := gin.Default() 20 | router.SetupRouter(engine) 21 | BackEnd = &http.Server{ 22 | Addr: fmt.Sprintf("%s:%d", config.Conf.Server.Host, config.Conf.Server.Port), 23 | Handler: engine, 24 | } 25 | log.GetLogger().Info("服务启动", zap.String("host", config.Conf.Server.Host), zap.Int("port", config.Conf.Server.Port)) 26 | // return engine.Run(fmt.Sprintf("%s:%d", config.Conf.Server.Host, config.Conf.Server.Port)) 27 | err := BackEnd.ListenAndServe() 28 | if err != nil && err != http.ErrServerClosed { 29 | log.GetLogger().Error("服务启动失败", zap.Error(err)) 30 | return err 31 | } 32 | log.GetLogger().Info("服务关闭") 33 | return nil 34 | } 35 | 36 | func StopBackend() error { 37 | if BackEnd == nil { 38 | return nil 39 | } 40 | ctx, cancel := context.WithCancel(context.Background()) 41 | defer cancel() 42 | if err := BackEnd.Shutdown(ctx); err != nil { 43 | log.GetLogger().Error("服务关闭失败", zap.Error(err)) 44 | return err 45 | } 46 | BackEnd = nil 47 | log.GetLogger().Info("服务已成功关闭") 48 | return nil 49 | } 50 | -------------------------------------------------------------------------------- /internal/service/audio2subtitle_test.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | ) 7 | 8 | func Test_isValidSplitContent(t *testing.T) { 9 | // 固定的测试文件路径 10 | splitContentFile := "g:\\bin\\AI\\tasks\\gdQRrtQP\\srt_no_ts_1.srt" 11 | originalTextFile := "g:\\bin\\AI\\tasks\\gdQRrtQP\\output\\origin_1.txt" 12 | 13 | // 读取分割内容文件 14 | splitContent, err := os.ReadFile(splitContentFile) 15 | if err != nil { 16 | t.Fatalf("读取分割内容文件失败: %v", err) 17 | } 18 | 19 | // 读取原始文本文件 20 | originalText, err := os.ReadFile(originalTextFile) 21 | if err != nil { 22 | t.Fatalf("读取原始文本文件失败: %v", err) 23 | } 24 | 25 | // 执行测试 26 | if _, err := parseAndCheckContent(string(splitContent), string(originalText)); err != nil { 27 | t.Errorf("parseAndCheckContent() error = %v, want nil", err) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /internal/service/get_video_info.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "go.uber.org/zap" 7 | "krillin-ai/config" 8 | "krillin-ai/internal/storage" 9 | "krillin-ai/internal/types" 10 | "krillin-ai/log" 11 | "os/exec" 12 | "strings" 13 | ) 14 | 15 | func (s Service) getVideoInfo(ctx context.Context, stepParam *types.SubtitleTaskStepParam) error { 16 | link := stepParam.Link 17 | if strings.Contains(link, "youtube.com") || strings.Contains(link, "bilibili.com") { 18 | var ( 19 | err error 20 | title, description string 21 | ) 22 | // 获取标题 23 | titleCmdArgs := []string{"--skip-download", "--encoding", "utf-8", "--get-title", stepParam.Link} 24 | descriptionCmdArgs := []string{"--skip-download", "--encoding", "utf-8", "--get-description", stepParam.Link} 25 | titleCmdArgs = append(titleCmdArgs, "--cookies", "./cookies.txt") 26 | descriptionCmdArgs = append(descriptionCmdArgs, "--cookies", "./cookies.txt") 27 | if config.Conf.App.Proxy != "" { 28 | titleCmdArgs = append(titleCmdArgs, "--proxy", config.Conf.App.Proxy) 29 | descriptionCmdArgs = append(descriptionCmdArgs, "--proxy", config.Conf.App.Proxy) 30 | } 31 | if storage.FfmpegPath != "ffmpeg" { 32 | titleCmdArgs = append(titleCmdArgs, "--ffmpeg-location", storage.FfmpegPath) 33 | descriptionCmdArgs = append(descriptionCmdArgs, "--ffmpeg-location", storage.FfmpegPath) 34 | } 35 | cmd := exec.Command(storage.YtdlpPath, titleCmdArgs...) 36 | var output []byte 37 | output, err = cmd.CombinedOutput() 38 | if err != nil { 39 | log.GetLogger().Error("getVideoInfo yt-dlp error", zap.Any("stepParam", stepParam), zap.String("output", string(output)), zap.Error(err)) 40 | output = []byte{} 41 | // 不需要整个流程退出 42 | } 43 | title = string(output) 44 | cmd = exec.Command(storage.YtdlpPath, descriptionCmdArgs...) 45 | output, err = cmd.CombinedOutput() 46 | if err != nil { 47 | log.GetLogger().Error("getVideoInfo yt-dlp error", zap.Any("stepParam", stepParam), zap.String("output", string(output)), zap.Error(err)) 48 | output = []byte{} 49 | } 50 | description = string(output) 51 | log.GetLogger().Debug("getVideoInfo title and description", zap.String("title", title), zap.String("description", description)) 52 | // 翻译 53 | var result string 54 | result, err = s.ChatCompleter.ChatCompletion(fmt.Sprintf(types.TranslateVideoTitleAndDescriptionPrompt, types.GetStandardLanguageName(stepParam.TargetLanguage), title+"####"+description)) 55 | if err != nil { 56 | log.GetLogger().Error("getVideoInfo openai chat completion error", zap.Any("stepParam", stepParam), zap.Error(err)) 57 | } 58 | log.GetLogger().Debug("getVideoInfo translate video info result", zap.String("result", result)) 59 | 60 | taskPtr := stepParam.TaskPtr 61 | 62 | taskPtr.Title = title 63 | taskPtr.Description = description 64 | taskPtr.OriginLanguage = string(stepParam.OriginLanguage) 65 | taskPtr.TargetLanguage = string(stepParam.TargetLanguage) 66 | taskPtr.ProcessPct = 10 67 | splitResult := strings.Split(result, "####") 68 | if len(splitResult) == 1 { 69 | taskPtr.TranslatedTitle = splitResult[0] 70 | } else if len(splitResult) == 2 { 71 | taskPtr.TranslatedTitle = splitResult[0] 72 | taskPtr.TranslatedDescription = splitResult[1] 73 | } else { 74 | log.GetLogger().Error("getVideoInfo translate video info error split result length != 1 and 2", zap.Any("stepParam", stepParam), zap.Any("translate result", result), zap.Error(err)) 75 | } 76 | } 77 | return nil 78 | } 79 | -------------------------------------------------------------------------------- /internal/service/init.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import ( 4 | "krillin-ai/config" 5 | "krillin-ai/internal/types" 6 | "krillin-ai/log" 7 | "krillin-ai/pkg/aliyun" 8 | "krillin-ai/pkg/fasterwhisper" 9 | "krillin-ai/pkg/openai" 10 | "krillin-ai/pkg/whisper" 11 | "krillin-ai/pkg/whispercpp" 12 | "krillin-ai/pkg/whisperkit" 13 | 14 | "go.uber.org/zap" 15 | ) 16 | 17 | type Service struct { 18 | Transcriber types.Transcriber 19 | ChatCompleter types.ChatCompleter 20 | TtsClient types.Ttser 21 | OssClient *aliyun.OssClient 22 | VoiceCloneClient *aliyun.VoiceCloneClient 23 | } 24 | 25 | func NewService() *Service { 26 | var transcriber types.Transcriber 27 | var chatCompleter types.ChatCompleter 28 | var ttsClient types.Ttser 29 | 30 | switch config.Conf.Transcribe.Provider { 31 | case "openai": 32 | transcriber = whisper.NewClient(config.Conf.Transcribe.Openai.BaseUrl, config.Conf.Transcribe.Openai.ApiKey, config.Conf.App.Proxy) 33 | case "fasterwhisper": 34 | transcriber = fasterwhisper.NewFastwhisperProcessor(config.Conf.Transcribe.Fasterwhisper.Model) 35 | case "whispercpp": 36 | transcriber = whispercpp.NewWhispercppProcessor(config.Conf.Transcribe.Whispercpp.Model) 37 | case "whisperkit": 38 | transcriber = whisperkit.NewWhisperKitProcessor(config.Conf.Transcribe.Whisperkit.Model) 39 | case "aliyun": 40 | cc, err := aliyun.NewAsrClient(config.Conf.Transcribe.Aliyun.Speech.AccessKeyId, config.Conf.Transcribe.Aliyun.Speech.AccessKeySecret, config.Conf.Transcribe.Aliyun.Speech.AppKey, true) 41 | if err != nil { 42 | log.GetLogger().Error("创建阿里云语音识别客户端失败: ", zap.Error(err)) 43 | return nil 44 | } 45 | transcriber = cc 46 | } 47 | log.GetLogger().Info("当前选择的转录源: ", zap.String("transcriber", config.Conf.Transcribe.Provider)) 48 | 49 | chatCompleter = openai.NewClient(config.Conf.Llm.BaseUrl, config.Conf.Llm.ApiKey, config.Conf.App.Proxy) 50 | 51 | switch config.Conf.Tts.Provider { 52 | case "openai": 53 | ttsClient = openai.NewClient(config.Conf.Tts.Openai.BaseUrl, config.Conf.Tts.Openai.ApiKey, config.Conf.App.Proxy) 54 | case "aliyun": 55 | ttsClient = aliyun.NewTtsClient(config.Conf.Tts.Aliyun.Speech.AccessKeyId, config.Conf.Tts.Aliyun.Speech.AccessKeySecret, config.Conf.Tts.Aliyun.Speech.AppKey) 56 | } 57 | 58 | return &Service{ 59 | Transcriber: transcriber, 60 | ChatCompleter: chatCompleter, 61 | TtsClient: ttsClient, 62 | OssClient: aliyun.NewOssClient(config.Conf.Transcribe.Aliyun.Oss.AccessKeyId, config.Conf.Transcribe.Aliyun.Oss.AccessKeySecret, config.Conf.Transcribe.Aliyun.Oss.Bucket), 63 | VoiceCloneClient: aliyun.NewVoiceCloneClient(config.Conf.Tts.Aliyun.Speech.AccessKeyId, config.Conf.Tts.Aliyun.Speech.AccessKeySecret, config.Conf.Tts.Aliyun.Speech.AppKey), 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /internal/service/link2file.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "go.uber.org/zap" 8 | "krillin-ai/config" 9 | "krillin-ai/internal/storage" 10 | "krillin-ai/internal/types" 11 | "krillin-ai/log" 12 | "krillin-ai/pkg/util" 13 | "os/exec" 14 | "strings" 15 | ) 16 | 17 | func (s Service) linkToFile(ctx context.Context, stepParam *types.SubtitleTaskStepParam) error { 18 | var ( 19 | err error 20 | output []byte 21 | ) 22 | link := stepParam.Link 23 | audioPath := fmt.Sprintf("%s/%s", stepParam.TaskBasePath, types.SubtitleTaskAudioFileName) 24 | videoPath := fmt.Sprintf("%s/%s", stepParam.TaskBasePath, types.SubtitleTaskVideoFileName) 25 | stepParam.TaskPtr.ProcessPct = 3 26 | if strings.Contains(link, "local:") { 27 | // 本地文件 28 | videoPath = strings.ReplaceAll(link, "local:", "") 29 | stepParam.InputVideoPath = videoPath 30 | cmd := exec.Command(storage.FfmpegPath, "-i", videoPath, "-vn", "-ar", "44100", "-ac", "2", "-ab", "192k", "-f", "mp3", audioPath) 31 | output, err = cmd.CombinedOutput() 32 | if err != nil { 33 | log.GetLogger().Error("generateAudioSubtitles.linkToFile ffmpeg error", zap.Any("step param", stepParam), zap.String("output", string(output)), zap.Error(err)) 34 | return fmt.Errorf("generateAudioSubtitles.linkToFile ffmpeg error: %w", err) 35 | } 36 | } else if strings.Contains(link, "youtube.com") { 37 | var videoId string 38 | videoId, err = util.GetYouTubeID(link) 39 | if err != nil { 40 | log.GetLogger().Error("linkToFile.GetYouTubeID error", zap.Any("step param", stepParam), zap.Error(err)) 41 | return fmt.Errorf("linkToFile.GetYouTubeID error: %w", err) 42 | } 43 | stepParam.Link = "https://www.youtube.com/watch?v=" + videoId 44 | cmdArgs := []string{"-f", "bestaudio", "--extract-audio", "--audio-format", "mp3", "--audio-quality", "192K", "-o", audioPath, stepParam.Link} 45 | if config.Conf.App.Proxy != "" { 46 | cmdArgs = append(cmdArgs, "--proxy", config.Conf.App.Proxy) 47 | } 48 | cmdArgs = append(cmdArgs, "--cookies", "./cookies.txt") 49 | if storage.FfmpegPath != "ffmpeg" { 50 | cmdArgs = append(cmdArgs, "--ffmpeg-location", storage.FfmpegPath) 51 | } 52 | cmd := exec.Command(storage.YtdlpPath, cmdArgs...) 53 | output, err = cmd.CombinedOutput() 54 | if err != nil { 55 | log.GetLogger().Error("linkToFile download audio yt-dlp error", zap.Any("step param", stepParam), zap.String("output", string(output)), zap.Error(err)) 56 | return fmt.Errorf("linkToFile download audio yt-dlp error: %w", err) 57 | } 58 | } else if strings.Contains(link, "bilibili.com") { 59 | videoId := util.GetBilibiliVideoId(link) 60 | if videoId == "" { 61 | return errors.New("linkToFile error: invalid link") 62 | } 63 | stepParam.Link = "https://www.bilibili.com/video/" + videoId 64 | cmdArgs := []string{"-f", "bestaudio[ext=m4a]", "-x", "--audio-format", "mp3", "-o", audioPath, stepParam.Link} 65 | if config.Conf.App.Proxy != "" { 66 | cmdArgs = append(cmdArgs, "--proxy", config.Conf.App.Proxy) 67 | } 68 | if storage.FfmpegPath != "ffmpeg" { 69 | cmdArgs = append(cmdArgs, "--ffmpeg-location", storage.FfmpegPath) 70 | } 71 | cmd := exec.Command(storage.YtdlpPath, cmdArgs...) 72 | output, err = cmd.CombinedOutput() 73 | if err != nil { 74 | log.GetLogger().Error("linkToFile download audio yt-dlp error", zap.Any("step param", stepParam), zap.String("output", string(output)), zap.Error(err)) 75 | return fmt.Errorf("linkToFile download audio yt-dlp error: %w", err) 76 | } 77 | } else { 78 | log.GetLogger().Info("linkToFile.unsupported link type", zap.Any("step param", stepParam)) 79 | return errors.New("linkToFile error: unsupported link, only support youtube, bilibili and local file") 80 | } 81 | stepParam.TaskPtr.ProcessPct = 6 82 | stepParam.AudioFilePath = audioPath 83 | 84 | if !strings.HasPrefix(link, "local:") && stepParam.EmbedSubtitleVideoType != "none" { 85 | // 需要下载原视频 86 | cmdArgs := []string{"-f", "bestvideo[height<=1080][ext=mp4]+bestaudio[ext=m4a]/bestvideo[height<=720][ext=mp4]+bestaudio[ext=m4a]/bestvideo[height<=480][ext=mp4]+bestaudio[ext=m4a]", "-o", videoPath, stepParam.Link} 87 | if config.Conf.App.Proxy != "" { 88 | cmdArgs = append(cmdArgs, "--proxy", config.Conf.App.Proxy) 89 | } 90 | if storage.FfmpegPath != "ffmpeg" { 91 | cmdArgs = append(cmdArgs, "--ffmpeg-location", storage.FfmpegPath) 92 | } 93 | cmd := exec.Command(storage.YtdlpPath, cmdArgs...) 94 | output, err = cmd.CombinedOutput() 95 | if err != nil { 96 | log.GetLogger().Error("linkToFile download video yt-dlp error", zap.Any("step param", stepParam), zap.String("output", string(output)), zap.Error(err)) 97 | return fmt.Errorf("linkToFile download video yt-dlp error: %w", err) 98 | } 99 | stepParam.InputVideoPath = videoPath 100 | } 101 | 102 | // 更新字幕任务信息 103 | stepParam.TaskPtr.ProcessPct = 10 104 | return nil 105 | } 106 | -------------------------------------------------------------------------------- /internal/service/upload_subtitle.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "go.uber.org/zap" 7 | "krillin-ai/internal/types" 8 | "krillin-ai/log" 9 | "krillin-ai/pkg/util" 10 | ) 11 | 12 | func (s Service) uploadSubtitles(ctx context.Context, stepParam *types.SubtitleTaskStepParam) error { 13 | subtitleInfos := make([]types.SubtitleInfo, 0) 14 | var err error 15 | for _, info := range stepParam.SubtitleInfos { 16 | resultPath := info.Path 17 | if len(stepParam.ReplaceWordsMap) > 0 { // 需要进行替换 18 | replacedSrcFile := util.AddSuffixToFileName(resultPath, "_replaced") 19 | err = util.ReplaceFileContent(resultPath, replacedSrcFile, stepParam.ReplaceWordsMap) 20 | if err != nil { 21 | log.GetLogger().Error("uploadSubtitles ReplaceFileContent err", zap.Any("stepParam", stepParam), zap.Error(err)) 22 | return fmt.Errorf("uploadSubtitles ReplaceFileContent err: %w", err) 23 | } 24 | resultPath = replacedSrcFile 25 | } 26 | subtitleInfos = append(subtitleInfos, types.SubtitleInfo{ 27 | TaskId: stepParam.TaskId, 28 | Name: info.Name, 29 | DownloadUrl: "/api/file/" + resultPath, 30 | }) 31 | } 32 | // 更新字幕任务信息 33 | taskPtr := stepParam.TaskPtr 34 | taskPtr.SubtitleInfos = subtitleInfos 35 | taskPtr.Status = types.SubtitleTaskStatusSuccess 36 | taskPtr.ProcessPct = 100 37 | // 配音文件 38 | if stepParam.TtsResultFilePath != "" { 39 | taskPtr.SpeechDownloadUrl = "/api/file/" + stepParam.TtsResultFilePath 40 | } 41 | return nil 42 | } 43 | -------------------------------------------------------------------------------- /internal/storage/bin.go: -------------------------------------------------------------------------------- 1 | package storage 2 | 3 | var ( 4 | FfmpegPath string 5 | FfprobePath string 6 | YtdlpPath string 7 | FasterwhisperPath string 8 | WhisperKitPath string 9 | WhispercppPath string 10 | ) 11 | -------------------------------------------------------------------------------- /internal/storage/subtitle_task.go: -------------------------------------------------------------------------------- 1 | package storage 2 | 3 | import ( 4 | "sync" 5 | ) 6 | 7 | var SubtitleTasks = sync.Map{} // task id -> SubtitleTask,用于接口查询数据 8 | -------------------------------------------------------------------------------- /internal/types/embed_subtitle.go: -------------------------------------------------------------------------------- 1 | package types 2 | 3 | const AssHeaderHorizontal = `[Script Info] 4 | Title: Example 5 | Original Script: 6 | ScriptType: v4.00+ 7 | PlayDepth: 0 8 | 9 | [V4+ Styles] 10 | Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding 11 | Style: Major,Arial,18,&H00BFFF,&H000000FF,&H00000000,&H64000000,-1,0,0,0,100,100,0,0,1,2.5,1.5,2,10,10,20,1 12 | Style: Minor,Arial,12,&H00BFFF,&H000000FF,&H00000000,&H64000000,-1,0,0,0,100,100,0,0,1,2.5,1.5,2,10,10,30,1 13 | 14 | 15 | [Events] 16 | Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 17 | ` 18 | const AssHeaderVertical = `[Script Info] 19 | Title: Example 20 | Original Script: 21 | ScriptType: v4.00+ 22 | PlayDepth: 0 23 | 24 | [V4+ Styles] 25 | Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding 26 | Style: Major,Arial,15,&H00BFFF,&H000000FF,&H00000000,&H64000000,-1,0,0,0,100,100,-10,0,1,2.5,1.5,2,10,10,80,1 27 | Style: Minor,Arial,8,&H00BFFF,&H000000FF,&H00000000,&H64000000,-1,0,0,0,100,100,-10,0,1,2.5,1.5,2,10,10,100,1 28 | 29 | 30 | [Events] 31 | Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 32 | ` 33 | -------------------------------------------------------------------------------- /internal/types/fasterwhisper.go: -------------------------------------------------------------------------------- 1 | package types 2 | 3 | type FasterWhisperOutput struct { 4 | Segments []struct { 5 | Id int `json:"id"` 6 | Seek int `json:"seek"` 7 | Start float64 `json:"start"` 8 | End float64 `json:"end"` 9 | Text string `json:"text"` 10 | Tokens []int `json:"tokens"` 11 | Temperature float64 `json:"temperature"` 12 | AvgLogprob float64 `json:"avg_logprob"` 13 | CompressionRatio float64 `json:"compression_ratio"` 14 | NoSpeechProb float64 `json:"no_speech_prob"` 15 | Words []struct { 16 | Start float64 `json:"start"` 17 | End float64 `json:"end"` 18 | Word string `json:"word"` 19 | Probability float64 `json:"probability"` 20 | } `json:"words"` 21 | } `json:"segments"` 22 | Language string `json:"language"` 23 | Text string `json:"text"` 24 | } 25 | -------------------------------------------------------------------------------- /internal/types/interface.go: -------------------------------------------------------------------------------- 1 | package types 2 | 3 | type ChatCompleter interface { 4 | ChatCompletion(query string) (string, error) 5 | } 6 | 7 | type Transcriber interface { 8 | Transcription(audioFile, language, wordDir string) (*TranscriptionData, error) 9 | } 10 | 11 | type Ttser interface { 12 | Text2Speech(text string, voice string, outputFile string) error 13 | } 14 | -------------------------------------------------------------------------------- /internal/types/whispercpp.go: -------------------------------------------------------------------------------- 1 | package types 2 | 3 | type WhispercppOutput struct { 4 | SystemInfo string `json:"systeminfo"` 5 | Model struct { 6 | Type string `json:"type"` 7 | Multilingual bool `json:"multilingual"` 8 | Vocab int `json:"vocab"` 9 | Audio struct { 10 | Ctx int `json:"ctx"` 11 | State int `json:"state"` 12 | Head int `json:"head"` 13 | Layer int `json:"layer"` 14 | } `json:"audio"` 15 | Text struct { 16 | Ctx int `json:"ctx"` 17 | State int `json:"state"` 18 | Head int `json:"head"` 19 | Layer int `json:"layer"` 20 | } `json:"text"` 21 | Mels int `json:"mels"` 22 | Ftype int `json:"ftype"` 23 | } `json:"model"` 24 | Params struct { 25 | Model string `json:"model"` 26 | Language string `json:"language"` 27 | Translate bool `json:"translate"` 28 | } `json:"params"` 29 | Result struct { 30 | Language string `json:"language"` 31 | } `json:"result"` 32 | Transcription []struct { 33 | Timestamps struct { 34 | From string `json:"from"` 35 | To string `json:"to"` 36 | } `json:"timestamps"` 37 | Offsets struct { 38 | From int `json:"from"` 39 | To int `json:"to"` 40 | } `json:"offsets"` 41 | Text string `json:"text"` 42 | Tokens []struct { 43 | Text string `json:"text"` 44 | Timestamps struct { 45 | From string `json:"from"` 46 | To string `json:"to"` 47 | } `json:"timestamps"` 48 | Offsets struct { 49 | From int `json:"from"` 50 | To int `json:"to"` 51 | } `json:"offsets"` 52 | ID int `json:"id"` 53 | P float64 `json:"p"` 54 | TDtw int `json:"t_dtw"` 55 | } `json:"tokens"` 56 | } `json:"transcription"` 57 | } 58 | -------------------------------------------------------------------------------- /internal/types/whisperkit.go: -------------------------------------------------------------------------------- 1 | package types 2 | 3 | type WhisperKitOutput struct { 4 | Text string `json:"text"` 5 | Language string `json:"language"` 6 | Segments []struct { 7 | Seek int `json:"seek"` 8 | Tokens []int `json:"tokens"` 9 | CompressionRatio float64 `json:"compressionRatio"` 10 | Temperature float64 `json:"temperature"` 11 | AvgLogprob float64 `json:"avgLogprob"` 12 | NoSpeechProb float64 `json:"noSpeechProb"` 13 | Id int `json:"id"` 14 | TokenLogProbs []map[string]float64 `json:"tokenLogProbs"` 15 | Start float64 `json:"start"` 16 | Words []struct { 17 | Start float64 `json:"start"` 18 | End float64 `json:"end"` 19 | Word string `json:"word"` 20 | Probability float64 `json:"probability"` 21 | Tokens []int `json:"tokens"` 22 | } `json:"words"` 23 | Text string `json:"text"` 24 | End float64 `json:"end"` 25 | } `json:"segments"` 26 | } 27 | -------------------------------------------------------------------------------- /log/zap.go: -------------------------------------------------------------------------------- 1 | package log 2 | 3 | import ( 4 | "go.uber.org/zap" 5 | "go.uber.org/zap/zapcore" 6 | "os" 7 | ) 8 | 9 | var Logger *zap.Logger 10 | 11 | func InitLogger() { 12 | file, err := os.OpenFile("app.log", os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666) 13 | if err != nil { 14 | panic("无法打开日志文件: " + err.Error()) 15 | } 16 | 17 | fileSyncer := zapcore.AddSync(file) 18 | consoleSyncer := zapcore.AddSync(os.Stdout) 19 | 20 | encoderConfig := zap.NewProductionEncoderConfig() 21 | encoderConfig.EncodeTime = zapcore.ISO8601TimeEncoder 22 | 23 | core := zapcore.NewTee( 24 | zapcore.NewCore(zapcore.NewJSONEncoder(encoderConfig), fileSyncer, zap.DebugLevel), // 写入文件(JSON 格式) 25 | zapcore.NewCore(zapcore.NewConsoleEncoder(encoderConfig), consoleSyncer, zap.InfoLevel), // 输出到终端 26 | ) 27 | 28 | Logger = zap.New(core, zap.AddCaller()) 29 | } 30 | 31 | func GetLogger() *zap.Logger { 32 | return Logger 33 | } 34 | -------------------------------------------------------------------------------- /pkg/aliyun/base.go: -------------------------------------------------------------------------------- 1 | package aliyun 2 | 3 | import ( 4 | "encoding/json" 5 | "github.com/aliyun/alibaba-cloud-sdk-go/sdk" 6 | "github.com/aliyun/alibaba-cloud-sdk-go/sdk/requests" 7 | "go.uber.org/zap" 8 | "krillin-ai/config" 9 | "krillin-ai/log" 10 | ) 11 | 12 | type TokenResult struct { 13 | ErrMsg string 14 | Token struct { 15 | UserId string 16 | Id string 17 | ExpireTime int64 18 | } 19 | } 20 | 21 | func CreateToken(ak, sk string) (string, error) { 22 | client, err := sdk.NewClientWithAccessKey("cn-shanghai", ak, sk) 23 | if err != nil { 24 | return "", err 25 | } 26 | if config.Conf.App.Proxy != "" { 27 | client.SetHttpProxy(config.Conf.App.Proxy) 28 | } 29 | request := requests.NewCommonRequest() 30 | request.Method = "POST" 31 | request.Domain = "nls-meta.cn-shanghai.aliyuncs.com" 32 | request.ApiName = "CreateToken" 33 | request.Version = "2019-02-28" 34 | response, err := client.ProcessCommonRequest(request) 35 | if err != nil { 36 | log.GetLogger().Error("aliyun sdk create token request error:", zap.Error(err)) 37 | return "", err 38 | } 39 | 40 | var tr TokenResult 41 | err = json.Unmarshal([]byte(response.GetHttpContentString()), &tr) 42 | if err != nil { 43 | log.GetLogger().Error("aliyun sdk json unmarshal error:", zap.Error(err)) 44 | return "", err 45 | } 46 | return tr.Token.Id, nil 47 | } 48 | -------------------------------------------------------------------------------- /pkg/aliyun/chat.go: -------------------------------------------------------------------------------- 1 | package aliyun 2 | 3 | import ( 4 | "context" 5 | goopenai "github.com/sashabaranov/go-openai" 6 | "go.uber.org/zap" 7 | "krillin-ai/log" 8 | ) 9 | 10 | type ChatClient struct { 11 | *goopenai.Client 12 | } 13 | 14 | func NewChatClient(apiKey string) *ChatClient { 15 | cfg := goopenai.DefaultConfig(apiKey) 16 | cfg.BaseURL = "https://dashscope.aliyuncs.com/compatible-mode/v1" // 使用阿里云的openai兼容模式调用 17 | return &ChatClient{ 18 | Client: goopenai.NewClientWithConfig(cfg), 19 | } 20 | } 21 | 22 | func (c ChatClient) ChatCompletion(query string) (string, error) { 23 | req := goopenai.ChatCompletionRequest{ 24 | Model: "qwen-plus", 25 | Messages: []goopenai.ChatCompletionMessage{ 26 | { 27 | Role: goopenai.ChatMessageRoleSystem, 28 | Content: "You are an assistant that helps with subtitle translation.", 29 | }, 30 | { 31 | Role: goopenai.ChatMessageRoleUser, 32 | Content: query, 33 | }, 34 | }, 35 | } 36 | 37 | resp, err := c.CreateChatCompletion(context.Background(), req) 38 | if err != nil { 39 | log.GetLogger().Error("aliyun openai create chat completion failed", zap.Error(err)) 40 | return "", err 41 | } 42 | 43 | resContent := resp.Choices[0].Message.Content 44 | 45 | return resContent, nil 46 | } 47 | -------------------------------------------------------------------------------- /pkg/aliyun/oss.go: -------------------------------------------------------------------------------- 1 | package aliyun 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss" 7 | "github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss/credentials" 8 | "os" 9 | ) 10 | 11 | type OssClient struct { 12 | *oss.Client 13 | Bucket string 14 | } 15 | 16 | func NewOssClient(accessKeyID, accessKeySecret, bucket string) *OssClient { 17 | credProvider := credentials.NewStaticCredentialsProvider(accessKeyID, accessKeySecret) 18 | 19 | cfg := oss.LoadDefaultConfig(). 20 | WithCredentialsProvider(credProvider). 21 | WithRegion("cn-shanghai") 22 | 23 | client := oss.NewClient(cfg) 24 | 25 | return &OssClient{client, bucket} 26 | } 27 | 28 | func (o *OssClient) UploadFile(ctx context.Context, objectKey, filePath, bucket string) error { 29 | file, err := os.Open(filePath) 30 | if err != nil { 31 | return fmt.Errorf("failed to open file: %v", err) 32 | } 33 | defer file.Close() 34 | 35 | _, err = o.PutObject(ctx, &oss.PutObjectRequest{ 36 | Bucket: &bucket, 37 | Key: &objectKey, 38 | Body: file, 39 | }) 40 | if err != nil { 41 | return fmt.Errorf("failed to upload file to OSS: %v", err) 42 | } 43 | 44 | fmt.Printf("File %s uploaded successfully to bucket %s as %s\n", filePath, bucket, objectKey) 45 | return nil 46 | } 47 | -------------------------------------------------------------------------------- /pkg/aliyun/tts.go: -------------------------------------------------------------------------------- 1 | package aliyun 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "github.com/gorilla/websocket" 7 | "go.uber.org/zap" 8 | "krillin-ai/config" 9 | "krillin-ai/log" 10 | "krillin-ai/pkg/util" 11 | "net/http" 12 | "os" 13 | "time" 14 | ) 15 | 16 | type TtsClient struct { 17 | AccessKeyID string 18 | AccessKeySecret string 19 | Appkey string 20 | } 21 | 22 | type TtsHeader struct { 23 | Appkey string `json:"appkey"` 24 | MessageID string `json:"message_id"` 25 | TaskID string `json:"task_id"` 26 | Namespace string `json:"namespace"` 27 | Name string `json:"name"` 28 | } 29 | 30 | type StartSynthesisPayload struct { 31 | Voice string `json:"voice,omitempty"` 32 | Format string `json:"format,omitempty"` 33 | SampleRate int `json:"sample_rate,omitempty"` 34 | Volume int `json:"volume,omitempty"` 35 | SpeechRate int `json:"speech_rate,omitempty"` 36 | PitchRate int `json:"pitch_rate,omitempty"` 37 | EnableSubtitle bool `json:"enable_subtitle,omitempty"` 38 | EnablePhonemeTimestamp bool `json:"enable_phoneme_timestamp,omitempty"` 39 | } 40 | 41 | type RunSynthesisPayload struct { 42 | Text string `json:"text"` 43 | } 44 | 45 | type Message struct { 46 | Header TtsHeader `json:"header"` 47 | Payload interface{} `json:"payload,omitempty"` 48 | } 49 | 50 | func NewTtsClient(accessKeyId, accessKeySecret, appkey string) *TtsClient { 51 | return &TtsClient{ 52 | AccessKeyID: accessKeyId, 53 | AccessKeySecret: accessKeySecret, 54 | Appkey: appkey, 55 | } 56 | } 57 | 58 | func (c *TtsClient) Text2Speech(text, voice, outputFile string) error { 59 | file, err := os.OpenFile(outputFile, os.O_CREATE|os.O_WRONLY, 0666) 60 | if err != nil { 61 | return fmt.Errorf("failed to create file: %w", err) 62 | } 63 | defer file.Close() 64 | 65 | var conn *websocket.Conn 66 | token, _ := CreateToken(c.AccessKeyID, c.AccessKeySecret) 67 | fullURL := "wss://nls-gateway-cn-beijing.aliyuncs.com/ws/v1?token=" + token 68 | dialer := websocket.DefaultDialer 69 | if config.Conf.App.Proxy != "" { 70 | dialer.Proxy = http.ProxyURL(config.Conf.App.ParsedProxy) 71 | } 72 | dialer.HandshakeTimeout = 10 * time.Second 73 | conn, _, err = dialer.Dial(fullURL, nil) 74 | if err != nil { 75 | return err 76 | } 77 | _ = conn.SetReadDeadline(time.Now().Add(time.Second * 60)) 78 | defer c.Close(conn) 79 | 80 | onTextMessage := func(message string) { 81 | log.GetLogger().Info("Received text message", zap.String("Message", message)) 82 | } 83 | 84 | onBinaryMessage := func(data []byte) { 85 | if _, err := file.Write(data); err != nil { 86 | log.GetLogger().Error("Failed to write data to file", zap.Error(err)) 87 | } 88 | } 89 | 90 | var ( 91 | synthesisStarted = make(chan struct{}) 92 | synthesisComplete = make(chan struct{}) 93 | ) 94 | 95 | startPayload := StartSynthesisPayload{ 96 | Voice: voice, 97 | Format: "wav", 98 | SampleRate: 44100, 99 | Volume: 50, 100 | SpeechRate: 0, 101 | PitchRate: 0, 102 | } 103 | 104 | go c.receiveMessages(conn, onTextMessage, onBinaryMessage, synthesisStarted, synthesisComplete) 105 | 106 | taskId := util.GenerateID() 107 | log.GetLogger().Info("SpeechClient StartSynthesis", zap.String("taskId", taskId), zap.Any("payload", startPayload)) 108 | if err := c.StartSynthesis(conn, taskId, startPayload, synthesisStarted); err != nil { 109 | return fmt.Errorf("failed to start synthesis: %w", err) 110 | } 111 | 112 | if err := c.RunSynthesis(conn, taskId, text); err != nil { 113 | return fmt.Errorf("failed to run synthesis: %w", err) 114 | } 115 | 116 | if err := c.StopSynthesis(conn, taskId, synthesisComplete); err != nil { 117 | return fmt.Errorf("failed to stop synthesis: %w", err) 118 | } 119 | 120 | return nil 121 | } 122 | 123 | func (c *TtsClient) sendMessage(conn *websocket.Conn, taskId, name string, payload interface{}) error { 124 | message := Message{ 125 | Header: TtsHeader{ 126 | Appkey: c.Appkey, 127 | MessageID: util.GenerateID(), 128 | TaskID: taskId, 129 | Namespace: "FlowingSpeechSynthesizer", 130 | Name: name, 131 | }, 132 | Payload: payload, 133 | } 134 | jsonData, _ := json.Marshal(message) 135 | log.GetLogger().Debug("SpeechClient sendMessage", zap.String("message", string(jsonData))) 136 | return conn.WriteJSON(message) 137 | } 138 | 139 | func (c *TtsClient) StartSynthesis(conn *websocket.Conn, taskId string, payload StartSynthesisPayload, synthesisStarted chan struct{}) error { 140 | err := c.sendMessage(conn, taskId, "StartSynthesis", payload) 141 | if err != nil { 142 | return err 143 | } 144 | 145 | // 阻塞等待 SynthesisStarted 事件 146 | <-synthesisStarted 147 | 148 | return nil 149 | } 150 | 151 | func (c *TtsClient) RunSynthesis(conn *websocket.Conn, taskId, text string) error { 152 | return c.sendMessage(conn, taskId, "RunSynthesis", RunSynthesisPayload{Text: text}) 153 | } 154 | 155 | func (c *TtsClient) StopSynthesis(conn *websocket.Conn, taskId string, synthesisComplete chan struct{}) error { 156 | err := c.sendMessage(conn, taskId, "StopSynthesis", nil) 157 | if err != nil { 158 | return err 159 | } 160 | 161 | // 阻塞等待 SynthesisCompleted 事件 162 | <-synthesisComplete 163 | 164 | return nil 165 | } 166 | 167 | func (c *TtsClient) Close(conn *websocket.Conn) error { 168 | err := conn.WriteMessage(websocket.CloseMessage, websocket.FormatCloseMessage(websocket.CloseNormalClosure, "")) 169 | if err != nil { 170 | return err 171 | } 172 | return conn.Close() 173 | } 174 | 175 | func (c *TtsClient) receiveMessages(conn *websocket.Conn, onTextMessage func(string), onBinaryMessage func([]byte), synthesisStarted, synthesisComplete chan struct{}) { 176 | defer close(synthesisComplete) 177 | for { 178 | messageType, message, err := conn.ReadMessage() 179 | if err != nil { 180 | if websocket.IsCloseError(err, websocket.CloseNormalClosure) { 181 | log.GetLogger().Error("SpeechClient receiveMessages websocket非正常关闭", zap.Error(err)) 182 | return 183 | } 184 | return 185 | } 186 | if messageType == websocket.TextMessage { 187 | var msg Message 188 | if err := json.Unmarshal(message, &msg); err != nil { 189 | log.GetLogger().Error("SpeechClient receiveMessages json解析失败", zap.Error(err)) 190 | return 191 | } 192 | if msg.Header.Name == "SynthesisCompleted" { 193 | log.GetLogger().Info("SynthesisCompleted event received") 194 | // 收到结束消息退出 195 | break 196 | } else if msg.Header.Name == "SynthesisStarted" { 197 | log.GetLogger().Info("SynthesisStarted event received") 198 | close(synthesisStarted) 199 | } else { 200 | onTextMessage(string(message)) 201 | } 202 | } else if messageType == websocket.BinaryMessage { 203 | onBinaryMessage(message) 204 | } 205 | } 206 | } 207 | -------------------------------------------------------------------------------- /pkg/aliyun/voice_clone.go: -------------------------------------------------------------------------------- 1 | package aliyun 2 | 3 | import ( 4 | "crypto/hmac" 5 | "crypto/sha1" 6 | "encoding/base64" 7 | "fmt" 8 | "go.uber.org/zap" 9 | "krillin-ai/log" 10 | "net/url" 11 | "sort" 12 | "strings" 13 | "time" 14 | 15 | "github.com/go-resty/resty/v2" 16 | "github.com/google/uuid" 17 | ) 18 | 19 | // _encodeText URL-编码文本,保证符合规范 20 | func _encodeText(text string) string { 21 | encoded := url.QueryEscape(text) 22 | // 根据规范替换特殊字符 23 | return strings.ReplaceAll(strings.ReplaceAll(strings.ReplaceAll(encoded, "+", "%20"), "*", "%2A"), "%7E", "~") 24 | } 25 | 26 | // _encodeDict URL-编码字典(map)为查询字符串 27 | func _encodeDict(dic map[string]string) string { 28 | var keys []string 29 | for key := range dic { 30 | keys = append(keys, key) 31 | } 32 | sort.Strings(keys) 33 | values := url.Values{} 34 | 35 | for _, k := range keys { 36 | values.Add(k, dic[k]) 37 | } 38 | encodedText := values.Encode() 39 | // 对整个查询字符串进行编码 40 | return strings.ReplaceAll(strings.ReplaceAll(strings.ReplaceAll(encodedText, "+", "%20"), "*", "%2A"), "%7E", "~") 41 | } 42 | 43 | // 生成签名 44 | func GenerateSignature(secret, stringToSign string) string { 45 | key := []byte(secret + "&") 46 | data := []byte(stringToSign) 47 | hash := hmac.New(sha1.New, key) 48 | hash.Write(data) 49 | signature := base64.StdEncoding.EncodeToString(hash.Sum(nil)) 50 | // 对签名进行URL编码 51 | return _encodeText(signature) 52 | } 53 | 54 | type VoiceCloneResp struct { 55 | RequestId string `json:"RequestId"` 56 | Message string `json:"Message"` 57 | Code int `json:"Code"` 58 | VoiceName string `json:"VoiceName"` 59 | } 60 | 61 | type VoiceCloneClient struct { 62 | restyClient *resty.Client 63 | accessKeyID string 64 | accessKeySecret string 65 | appkey string 66 | } 67 | 68 | func NewVoiceCloneClient(accessKeyID, accessKeySecret, appkey string) *VoiceCloneClient { 69 | return &VoiceCloneClient{ 70 | restyClient: resty.New(), 71 | accessKeyID: accessKeyID, 72 | accessKeySecret: accessKeySecret, 73 | appkey: appkey, 74 | } 75 | } 76 | 77 | func (c *VoiceCloneClient) CosyVoiceClone(voicePrefix, audioURL string) (string, error) { 78 | log.GetLogger().Info("CosyVoiceClone请求开始", zap.String("voicePrefix", voicePrefix), zap.String("audioURL", audioURL)) 79 | parameters := map[string]string{ 80 | "AccessKeyId": c.accessKeyID, 81 | "Action": "CosyVoiceClone", 82 | "Format": "JSON", 83 | "RegionId": "cn-shanghai", 84 | "SignatureMethod": "HMAC-SHA1", 85 | "SignatureNonce": uuid.New().String(), 86 | "SignatureVersion": "1.0", 87 | "Timestamp": time.Now().UTC().Format("2006-01-02T15:04:05Z"), 88 | "Version": "2019-08-19", 89 | "VoicePrefix": voicePrefix, 90 | "Url": audioURL, 91 | } 92 | 93 | queryString := _encodeDict(parameters) 94 | stringToSign := "POST" + "&" + _encodeText("/") + "&" + _encodeText(queryString) 95 | signature := GenerateSignature(c.accessKeySecret, stringToSign) 96 | fullURL := fmt.Sprintf("https://nls-slp.cn-shanghai.aliyuncs.com/?Signature=%s&%s", signature, queryString) 97 | 98 | values := url.Values{} 99 | for key, value := range parameters { 100 | values.Add(key, value) 101 | } 102 | var res VoiceCloneResp 103 | resp, err := c.restyClient.R().SetResult(&res).Post(fullURL) 104 | if err != nil { 105 | log.GetLogger().Error("CosyVoiceClone post error", zap.Error(err)) 106 | return "", fmt.Errorf("CosyVoiceClone post error: %w: ", err) 107 | } 108 | log.GetLogger().Info("CosyVoiceClone请求完毕", zap.String("Response", resp.String())) 109 | if res.Message != "SUCCESS" { 110 | log.GetLogger().Error("CosyVoiceClone res message is not success", zap.String("Request Id", res.RequestId), zap.Int("Code", res.Code), zap.String("Message", res.Message)) 111 | return "", fmt.Errorf("CosyVoiceClone res message is not success, message: %s", res.Message) 112 | } 113 | return res.VoiceName, nil 114 | } 115 | 116 | func (c *VoiceCloneClient) CosyCloneList(voicePrefix string, pageIndex, pageSize int) { 117 | parameters := map[string]string{ 118 | "AccessKeyId": c.accessKeyID, 119 | "Action": "ListCosyVoice", 120 | "Format": "JSON", 121 | "RegionId": "cn-shanghai", 122 | "SignatureMethod": "HMAC-SHA1", 123 | "SignatureNonce": uuid.New().String(), 124 | "SignatureVersion": "1.0", 125 | "Timestamp": time.Now().UTC().Format("2006-01-02T15:04:05Z"), 126 | "Version": "2019-08-19", 127 | "VoicePrefix": voicePrefix, 128 | "PageIndex": fmt.Sprintf("%d", pageIndex), 129 | "PageSize": fmt.Sprintf("%d", pageSize), 130 | } 131 | 132 | queryString := _encodeDict(parameters) 133 | stringToSign := "POST" + "&" + _encodeText("/") + "&" + _encodeText(queryString) 134 | signature := GenerateSignature(c.accessKeySecret, stringToSign) 135 | fullURL := fmt.Sprintf("https://nls-slp.cn-shanghai.aliyuncs.com/?Signature=%s&%s", signature, queryString) 136 | 137 | values := url.Values{} 138 | for key, value := range parameters { 139 | values.Add(key, value) 140 | } 141 | resp, err := c.restyClient.R().Post(fullURL) 142 | if err != nil { 143 | log.GetLogger().Error("CosyCloneList请求失败", zap.Error(err)) 144 | return 145 | } 146 | log.GetLogger().Info("CosyCloneList请求成功", zap.String("Response", resp.String())) 147 | } 148 | -------------------------------------------------------------------------------- /pkg/fasterwhisper/init.go: -------------------------------------------------------------------------------- 1 | package fasterwhisper 2 | 3 | type FastwhisperProcessor struct { 4 | WorkDir string // 生成中间文件的目录 5 | Model string 6 | } 7 | 8 | func NewFastwhisperProcessor(model string) *FastwhisperProcessor { 9 | return &FastwhisperProcessor{ 10 | Model: model, 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /pkg/fasterwhisper/transcription.go: -------------------------------------------------------------------------------- 1 | package fasterwhisper 2 | 3 | import ( 4 | "encoding/json" 5 | "go.uber.org/zap" 6 | "krillin-ai/internal/storage" 7 | "krillin-ai/internal/types" 8 | "krillin-ai/log" 9 | "krillin-ai/pkg/util" 10 | "os" 11 | "os/exec" 12 | "strings" 13 | ) 14 | 15 | func (c *FastwhisperProcessor) Transcription(audioFile, language, workDir string) (*types.TranscriptionData, error) { 16 | cmdArgs := []string{ 17 | "--model_dir", "./models/", 18 | "--model", c.Model, 19 | "--one_word", "2", 20 | "--output_format", "json", 21 | "--language", language, 22 | "--output_dir", workDir, 23 | audioFile, 24 | } 25 | cmd := exec.Command(storage.FasterwhisperPath, cmdArgs...) 26 | log.GetLogger().Info("FastwhisperProcessor转录开始", zap.String("cmd", cmd.String())) 27 | output, err := cmd.CombinedOutput() 28 | if err != nil && !strings.Contains(string(output), "Subtitles are written to") { 29 | log.GetLogger().Error("FastwhisperProcessor cmd 执行失败", zap.String("output", string(output)), zap.Error(err)) 30 | return nil, err 31 | } 32 | log.GetLogger().Info("FastwhisperProcessor转录json生成完毕", zap.String("audio file", audioFile)) 33 | 34 | var result types.FasterWhisperOutput 35 | fileData, err := os.Open(util.ChangeFileExtension(audioFile, ".json")) 36 | if err != nil { 37 | log.GetLogger().Error("FastwhisperProcessor 打开json文件失败", zap.Error(err)) 38 | return nil, err 39 | } 40 | defer fileData.Close() 41 | decoder := json.NewDecoder(fileData) 42 | if err = decoder.Decode(&result); err != nil { 43 | log.GetLogger().Error("FastwhisperProcessor 解析json文件失败", zap.Error(err)) 44 | return nil, err 45 | } 46 | 47 | var ( 48 | transcriptionData types.TranscriptionData 49 | num int 50 | ) 51 | for _, segment := range result.Segments { 52 | transcriptionData.Text += strings.ReplaceAll(segment.Text, "—", " ") // 连字符处理,因为模型存在很多错误添加到连字符 53 | for _, word := range segment.Words { 54 | if strings.Contains(word.Word, "—") { 55 | // 对称切分 56 | mid := (word.Start + word.End) / 2 57 | seperatedWords := strings.Split(word.Word, "—") 58 | transcriptionData.Words = append(transcriptionData.Words, []types.Word{ 59 | { 60 | Num: num, 61 | Text: util.CleanPunction(strings.TrimSpace(seperatedWords[0])), 62 | Start: word.Start, 63 | End: mid, 64 | }, 65 | { 66 | Num: num + 1, 67 | Text: util.CleanPunction(strings.TrimSpace(seperatedWords[1])), 68 | Start: mid, 69 | End: word.End, 70 | }, 71 | }...) 72 | num += 2 73 | } else { 74 | transcriptionData.Words = append(transcriptionData.Words, types.Word{ 75 | Num: num, 76 | Text: util.CleanPunction(strings.TrimSpace(word.Word)), 77 | Start: word.Start, 78 | End: word.End, 79 | }) 80 | num++ 81 | } 82 | } 83 | } 84 | log.GetLogger().Info("FastwhisperProcessor转录成功") 85 | return &transcriptionData, nil 86 | } 87 | -------------------------------------------------------------------------------- /pkg/openai/init.go: -------------------------------------------------------------------------------- 1 | package openai 2 | 3 | import ( 4 | "github.com/sashabaranov/go-openai" 5 | "krillin-ai/config" 6 | "net/http" 7 | ) 8 | 9 | type Client struct { 10 | client *openai.Client 11 | } 12 | 13 | func NewClient(baseUrl, apiKey, proxyAddr string) *Client { 14 | cfg := openai.DefaultConfig(apiKey) 15 | if baseUrl != "" { 16 | cfg.BaseURL = baseUrl 17 | } 18 | 19 | if proxyAddr != "" { 20 | transport := &http.Transport{ 21 | Proxy: http.ProxyURL(config.Conf.App.ParsedProxy), 22 | } 23 | cfg.HTTPClient = &http.Client{ 24 | Transport: transport, 25 | } 26 | } 27 | 28 | client := openai.NewClientWithConfig(cfg) 29 | return &Client{client: client} 30 | } 31 | -------------------------------------------------------------------------------- /pkg/openai/openai.go: -------------------------------------------------------------------------------- 1 | package openai 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | openai "github.com/sashabaranov/go-openai" 7 | "go.uber.org/zap" 8 | "io" 9 | "krillin-ai/config" 10 | "krillin-ai/log" 11 | "net/http" 12 | "os" 13 | "strings" 14 | ) 15 | 16 | func (c *Client) ChatCompletion(query string) (string, error) { 17 | req := openai.ChatCompletionRequest{ 18 | Model: config.Conf.Llm.Model, 19 | Messages: []openai.ChatCompletionMessage{ 20 | { 21 | Role: openai.ChatMessageRoleSystem, 22 | Content: "You are an assistant that helps with subtitle translation.", 23 | }, 24 | { 25 | Role: openai.ChatMessageRoleUser, 26 | Content: query, 27 | }, 28 | }, 29 | Stream: true, 30 | MaxTokens: 8192, 31 | } 32 | 33 | stream, err := c.client.CreateChatCompletionStream(context.Background(), req) 34 | if err != nil { 35 | log.GetLogger().Error("openai create chat completion stream failed", zap.Error(err)) 36 | return "", err 37 | } 38 | defer stream.Close() 39 | 40 | var resContent string 41 | for { 42 | response, err := stream.Recv() 43 | if err == io.EOF { 44 | break 45 | } 46 | if err != nil { 47 | log.GetLogger().Error("openai stream receive failed", zap.Error(err)) 48 | return "", err 49 | } 50 | if len(response.Choices) == 0 { 51 | log.GetLogger().Info("openai stream receive no choices", zap.Any("response", response)) 52 | continue 53 | } 54 | 55 | resContent += response.Choices[0].Delta.Content 56 | } 57 | 58 | return resContent, nil 59 | } 60 | 61 | func (c *Client) Text2Speech(text, voice string, outputFile string) error { 62 | baseUrl := config.Conf.Tts.Openai.BaseUrl 63 | if baseUrl == "" { 64 | baseUrl = "https://api.openai.com/v1" 65 | } 66 | url := baseUrl + "/audio/speech" 67 | 68 | // 创建HTTP请求 69 | reqBody := fmt.Sprintf(`{ 70 | "model": "tts-1", 71 | "input": "%s", 72 | "voice":"%s", 73 | "response_format": "wav" 74 | }`, text, voice) 75 | req, err := http.NewRequest("POST", url, strings.NewReader(reqBody)) 76 | if err != nil { 77 | return err 78 | } 79 | 80 | req.Header.Set("Content-Type", "application/json") 81 | req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", config.Conf.Tts.Openai.ApiKey)) 82 | 83 | // 发送HTTP请求 84 | client := &http.Client{} 85 | resp, err := client.Do(req) 86 | if err != nil { 87 | return err 88 | } 89 | defer resp.Body.Close() 90 | 91 | if resp.StatusCode != http.StatusOK { 92 | body, _ := io.ReadAll(resp.Body) 93 | log.GetLogger().Error("openai tts failed", zap.Int("status_code", resp.StatusCode), zap.String("body", string(body))) 94 | return fmt.Errorf("openai tts none-200 status code: %d", resp.StatusCode) 95 | } 96 | 97 | file, err := os.Create(outputFile) 98 | if err != nil { 99 | return err 100 | } 101 | defer file.Close() 102 | 103 | _, err = io.Copy(file, resp.Body) 104 | if err != nil { 105 | return err 106 | } 107 | 108 | return nil 109 | } 110 | -------------------------------------------------------------------------------- /pkg/util/audio.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "go.uber.org/zap" 5 | "krillin-ai/internal/storage" 6 | "krillin-ai/log" 7 | "os/exec" 8 | "path/filepath" 9 | "strings" 10 | ) 11 | 12 | // 把音频处理成单声道、16k采样率 13 | func ProcessAudio(filePath string) (string, error) { 14 | dest := strings.ReplaceAll(filePath, filepath.Ext(filePath), "_mono_16K.mp3") 15 | cmdArgs := []string{"-i", filePath, "-ac", "1", "-ar", "16000", "-b:a", "192k", dest} 16 | cmd := exec.Command(storage.FfmpegPath, cmdArgs...) 17 | output, err := cmd.CombinedOutput() 18 | if err != nil { 19 | log.GetLogger().Error("处理音频失败", zap.Error(err), zap.String("audio file", filePath), zap.String("output", string(output))) 20 | return "", err 21 | } 22 | return dest, nil 23 | } 24 | -------------------------------------------------------------------------------- /pkg/util/base.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "archive/zip" 5 | "fmt" 6 | "github.com/google/uuid" 7 | "io" 8 | "math" 9 | "math/rand" 10 | "net/url" 11 | "os" 12 | "path/filepath" 13 | "regexp" 14 | "runtime" 15 | "strconv" 16 | "strings" 17 | "unicode" 18 | ) 19 | 20 | var strWithUpperLowerNum = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ123456789") 21 | 22 | func GenerateRandStringWithUpperLowerNum(n int) string { 23 | b := make([]rune, n) 24 | for i := range b { 25 | b[i] = strWithUpperLowerNum[rand.Intn(len(strWithUpperLowerNum))] 26 | } 27 | return string(b) 28 | } 29 | 30 | func GetYouTubeID(youtubeURL string) (string, error) { 31 | parsedURL, err := url.Parse(youtubeURL) 32 | if err != nil { 33 | return "", err 34 | } 35 | 36 | if strings.Contains(parsedURL.Path, "watch") { 37 | queryParams := parsedURL.Query() 38 | if id, exists := queryParams["v"]; exists { 39 | return id[0], nil 40 | } 41 | } else { 42 | pathSegments := strings.Split(parsedURL.Path, "/") 43 | return pathSegments[len(pathSegments)-1], nil 44 | } 45 | 46 | return "", fmt.Errorf("no video ID found") 47 | } 48 | 49 | func GetBilibiliVideoId(url string) string { 50 | re := regexp.MustCompile(`https://(?:www\.)?bilibili\.com/(?:video/|video/av\d+/)(BV[a-zA-Z0-9]+)`) 51 | matches := re.FindStringSubmatch(url) 52 | if len(matches) > 1 { 53 | // 返回匹配到的BV号 54 | return matches[1] 55 | } 56 | return "" 57 | } 58 | 59 | // 将浮点数秒数转换为HH:MM:SS,SSS格式的字符串 60 | func FormatTime(seconds float32) string { 61 | totalSeconds := int(math.Floor(float64(seconds))) // 获取总秒数 62 | milliseconds := int((seconds - float32(totalSeconds)) * 1000) // 获取毫秒部分 63 | 64 | hours := totalSeconds / 3600 65 | minutes := (totalSeconds % 3600) / 60 66 | secs := totalSeconds % 60 67 | return fmt.Sprintf("%02d:%02d:%02d,%03d", hours, minutes, secs, milliseconds) 68 | } 69 | 70 | // 判断字符串是否是纯数字(字幕编号) 71 | func IsNumber(s string) bool { 72 | _, err := strconv.Atoi(s) 73 | return err == nil 74 | } 75 | 76 | func Unzip(zipFile, destDir string) error { 77 | zipReader, err := zip.OpenReader(zipFile) 78 | if err != nil { 79 | return fmt.Errorf("打开zip文件失败: %v", err) 80 | } 81 | defer zipReader.Close() 82 | 83 | err = os.MkdirAll(destDir, 0755) 84 | if err != nil { 85 | return fmt.Errorf("创建目标目录失败: %v", err) 86 | } 87 | 88 | for _, file := range zipReader.File { 89 | filePath := filepath.Join(destDir, file.Name) 90 | 91 | if file.FileInfo().IsDir() { 92 | err := os.MkdirAll(filePath, file.Mode()) 93 | if err != nil { 94 | return fmt.Errorf("创建目录失败: %v", err) 95 | } 96 | continue 97 | } 98 | 99 | destFile, err := os.Create(filePath) 100 | if err != nil { 101 | return fmt.Errorf("创建文件失败: %v", err) 102 | } 103 | defer destFile.Close() 104 | 105 | zipFileReader, err := file.Open() 106 | if err != nil { 107 | return fmt.Errorf("打开zip文件内容失败: %v", err) 108 | } 109 | defer zipFileReader.Close() 110 | 111 | _, err = io.Copy(destFile, zipFileReader) 112 | if err != nil { 113 | return fmt.Errorf("复制文件内容失败: %v", err) 114 | } 115 | } 116 | 117 | return nil 118 | } 119 | 120 | func GenerateID() string { 121 | return strings.ReplaceAll(uuid.New().String(), "-", "") 122 | } 123 | 124 | // ChangeFileExtension 修改文件后缀 125 | func ChangeFileExtension(path string, newExt string) string { 126 | ext := filepath.Ext(path) 127 | return path[:len(path)-len(ext)] + newExt 128 | } 129 | 130 | func CleanPunction(word string) string { 131 | return strings.TrimFunc(word, func(r rune) bool { 132 | return unicode.IsPunct(r) 133 | }) 134 | } 135 | 136 | func IsAlphabetic(r rune) bool { 137 | if unicode.IsLetter(r) { // 中文在IsLetter中会返回true 138 | switch { 139 | // 英语及其他拉丁字母的范围 140 | case r >= 'A' && r <= 'Z', r >= 'a' && r <= 'z': 141 | return true 142 | // 扩展拉丁字母(法语、西班牙语等使用的附加字符) 143 | case r >= '\u00C0' && r <= '\u024F': 144 | return true 145 | // 希腊字母 146 | case r >= '\u0370' && r <= '\u03FF': 147 | return true 148 | // 西里尔字母(俄语等) 149 | case r >= '\u0400' && r <= '\u04FF': 150 | return true 151 | default: 152 | return false 153 | } 154 | } 155 | return false 156 | } 157 | 158 | func ContainsAlphabetic(text string) bool { 159 | for _, r := range text { 160 | if IsAlphabetic(r) { 161 | return true 162 | } 163 | } 164 | return false 165 | } 166 | 167 | // CopyFile 复制文件 168 | func CopyFile(src, dst string) error { 169 | sourceFile, err := os.Open(src) 170 | if err != nil { 171 | return err 172 | } 173 | defer sourceFile.Close() 174 | 175 | destinationFile, err := os.Create(dst) 176 | if err != nil { 177 | return err 178 | } 179 | defer destinationFile.Close() 180 | 181 | _, err = io.Copy(destinationFile, sourceFile) 182 | if err != nil { 183 | return err 184 | } 185 | 186 | return destinationFile.Sync() 187 | } 188 | 189 | // SanitizePathName 清理字符串,使其成为合法路径名 190 | func SanitizePathName(name string) string { 191 | var illegalChars *regexp.Regexp 192 | if runtime.GOOS == "windows" { 193 | // Windows 特殊字符 194 | illegalChars = regexp.MustCompile(`[<>:"/\\|?*\x00-\x1F]`) 195 | } else { 196 | // POSIX 系统:只禁用 / 和空字节 197 | illegalChars = regexp.MustCompile(`[/\x00]`) 198 | } 199 | 200 | sanitized := illegalChars.ReplaceAllString(name, "_") 201 | 202 | // 去除前后空格 203 | sanitized = strings.TrimSpace(sanitized) 204 | 205 | // 防止空字符串 206 | if sanitized == "" { 207 | sanitized = "unnamed" 208 | } 209 | 210 | // 避免 Windows 下的保留文件名 211 | reserved := map[string]bool{ 212 | "CON": true, "PRN": true, "AUX": true, "NUL": true, 213 | "COM1": true, "COM2": true, "COM3": true, "COM4": true, 214 | "LPT1": true, "LPT2": true, 215 | } 216 | 217 | upper := strings.ToUpper(sanitized) 218 | if reserved[upper] { 219 | sanitized = "_" + sanitized 220 | } 221 | 222 | return sanitized 223 | } 224 | -------------------------------------------------------------------------------- /pkg/util/download.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "fmt" 5 | "go.uber.org/zap" 6 | "io" 7 | "krillin-ai/config" 8 | "krillin-ai/log" 9 | "net/http" 10 | "os" 11 | "time" 12 | ) 13 | 14 | // 用于显示下载进度,实现io.Writer 15 | type progressWriter struct { 16 | Total uint64 17 | Downloaded uint64 18 | StartTime time.Time 19 | } 20 | 21 | func (pw *progressWriter) Write(p []byte) (int, error) { 22 | n := len(p) 23 | pw.Downloaded += uint64(n) 24 | 25 | // 初始化开始时间 26 | if pw.StartTime.IsZero() { 27 | pw.StartTime = time.Now() 28 | } 29 | 30 | percent := float64(pw.Downloaded) / float64(pw.Total) * 100 31 | elapsed := time.Since(pw.StartTime).Seconds() 32 | speed := float64(pw.Downloaded) / 1024 / 1024 / elapsed 33 | 34 | fmt.Printf("\r下载进度: %.2f%% (%.2f MB / %.2f MB) | 速度: %.2f MB/s", 35 | percent, 36 | float64(pw.Downloaded)/1024/1024, 37 | float64(pw.Total)/1024/1024, 38 | speed) 39 | 40 | return n, nil 41 | } 42 | 43 | // DownloadFile 下载文件并保存到指定路径,支持代理 44 | func DownloadFile(urlStr, filepath, proxyAddr string) error { 45 | log.GetLogger().Info("开始下载文件", zap.String("url", urlStr)) 46 | client := &http.Client{} 47 | if proxyAddr != "" { 48 | client.Transport = &http.Transport{ 49 | Proxy: http.ProxyURL(config.Conf.App.ParsedProxy), 50 | } 51 | } 52 | 53 | resp, err := client.Get(urlStr) 54 | if err != nil { 55 | return err 56 | } 57 | defer resp.Body.Close() 58 | 59 | size := resp.ContentLength 60 | fmt.Printf("文件大小: %.2f MB\n", float64(size)/1024/1024) 61 | 62 | out, err := os.Create(filepath) 63 | if err != nil { 64 | return err 65 | } 66 | defer out.Close() 67 | 68 | // 带有进度的 Reader 69 | progress := &progressWriter{ 70 | Total: uint64(size), 71 | } 72 | reader := io.TeeReader(resp.Body, progress) 73 | 74 | _, err = io.Copy(out, reader) 75 | if err != nil { 76 | return err 77 | } 78 | fmt.Printf("\n") // 进度信息结束,换新行 79 | 80 | log.GetLogger().Info("文件下载完成", zap.String("路径", filepath)) 81 | return nil 82 | } 83 | -------------------------------------------------------------------------------- /pkg/util/language.go: -------------------------------------------------------------------------------- 1 | package util 2 | -------------------------------------------------------------------------------- /pkg/util/subtitle.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "krillin-ai/internal/storage" 7 | "os" 8 | "os/exec" 9 | "path/filepath" 10 | "regexp" 11 | "strconv" 12 | "strings" 13 | "unicode" 14 | ) 15 | 16 | // 处理每一个字幕块 17 | func ProcessBlock(block []string, targetLanguageFile, targetLanguageTextFile, originLanguageFile, originLanguageTextFile *os.File, isTargetOnTop bool) { 18 | var targetLines, originLines []string 19 | // 匹配时间戳的正则表达式 20 | timePattern := regexp.MustCompile(`\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}`) 21 | for _, line := range block { 22 | if timePattern.MatchString(line) || IsNumber(line) { 23 | // 时间戳和编号行保留在两个文件中 24 | targetLines = append(targetLines, line) 25 | originLines = append(originLines, line) 26 | continue 27 | } 28 | if len(targetLines) == 2 && len(originLines) == 2 { // 刚写完编号和时间戳,到了上方的文字行 29 | if isTargetOnTop { 30 | targetLines = append(targetLines, line) 31 | targetLanguageTextFile.WriteString(line) // 文稿文件 32 | } else { 33 | originLines = append(originLines, line) 34 | originLanguageTextFile.WriteString(line) 35 | } 36 | continue 37 | } 38 | // 到了下方的文字行 39 | if isTargetOnTop { 40 | originLines = append(originLines, line) 41 | originLanguageTextFile.WriteString(line) 42 | } else { 43 | targetLines = append(targetLines, line) 44 | targetLanguageTextFile.WriteString(line) 45 | } 46 | } 47 | 48 | if len(targetLines) > 2 { 49 | // 写入目标语言文件 50 | for _, line := range targetLines { 51 | targetLanguageFile.WriteString(line + "\n") 52 | } 53 | targetLanguageFile.WriteString("\n") 54 | } 55 | 56 | if len(originLines) > 2 { 57 | // 写入源语言文件 58 | for _, line := range originLines { 59 | originLanguageFile.WriteString(line + "\n") 60 | } 61 | originLanguageFile.WriteString("\n") 62 | } 63 | } 64 | 65 | // IsSubtitleText 是否是字幕文件中的字幕文字行 66 | func IsSubtitleText(line string) bool { 67 | if line == "" { 68 | return false 69 | } 70 | if IsNumber(line) { 71 | return false 72 | } 73 | timelinePattern := regexp.MustCompile(`\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}`) 74 | return !timelinePattern.MatchString(line) 75 | } 76 | 77 | type Format struct { 78 | Duration string `json:"duration"` 79 | } 80 | 81 | type ProbeData struct { 82 | Format Format `json:"format"` 83 | } 84 | 85 | type SrtBlock struct { 86 | Index int 87 | Timestamp string 88 | TargetLanguageSentence string 89 | OriginLanguageSentence string 90 | } 91 | 92 | func TrimString(s string) string { 93 | s = strings.Replace(s, "[中文翻译]", "", -1) 94 | s = strings.Replace(s, "[英文句子]", "", -1) 95 | // 去除开头的空格和 '[' 96 | s = strings.TrimLeft(s, " [") 97 | 98 | // 去除结尾的空格和 ']' 99 | s = strings.TrimRight(s, " ]") 100 | 101 | //替换中文单引号 102 | s = strings.ReplaceAll(s, "’", "'") 103 | 104 | return s 105 | } 106 | 107 | func SplitSentence(sentence string) []string { 108 | // 使用正则表达式移除标点符号和特殊字符(保留各语言字母、数字和空格) 109 | re := regexp.MustCompile(`[^\p{L}\p{N}\s']+`) 110 | cleanedSentence := re.ReplaceAllString(sentence, " ") 111 | 112 | // 使用 strings.Fields 按空格拆分成单词 113 | words := strings.Fields(cleanedSentence) 114 | 115 | return words 116 | } 117 | 118 | func MergeFile(finalFile string, files ...string) error { 119 | // 创建最终文件 120 | final, err := os.Create(finalFile) 121 | if err != nil { 122 | return err 123 | } 124 | 125 | // 逐个读取文件并写入最终文件 126 | for _, file := range files { 127 | f, err := os.Open(file) 128 | if err != nil { 129 | return err 130 | } 131 | defer f.Close() 132 | 133 | scanner := bufio.NewScanner(f) 134 | for scanner.Scan() { 135 | line := scanner.Text() 136 | final.WriteString(line + "\n") 137 | } 138 | } 139 | 140 | return nil 141 | } 142 | 143 | func MergeSrtFiles(finalFile string, files ...string) error { 144 | output, err := os.Create(finalFile) 145 | if err != nil { 146 | return err 147 | } 148 | defer output.Close() 149 | writer := bufio.NewWriter(output) 150 | lineNumber := 0 151 | for _, file := range files { 152 | // 不存在某一个file就跳过 153 | if _, err = os.Stat(file); os.IsNotExist(err) { 154 | continue 155 | } 156 | // 打开当前字幕文件 157 | f, err := os.Open(file) 158 | if err != nil { 159 | return err 160 | } 161 | defer f.Close() 162 | // 处理当前字幕文件 163 | scanner := bufio.NewScanner(f) 164 | for scanner.Scan() { 165 | line := scanner.Text() 166 | 167 | if strings.Contains(line, "```") { 168 | continue 169 | } 170 | 171 | if IsNumber(line) { 172 | lineNumber++ 173 | line = strconv.Itoa(lineNumber) 174 | } 175 | 176 | writer.WriteString(line + "\n") 177 | } 178 | } 179 | writer.Flush() 180 | 181 | return nil 182 | } 183 | 184 | // 给定文件和替换map,将文件中所有的key替换成value 185 | func ReplaceFileContent(srcFile, dstFile string, replacements map[string]string) error { 186 | file, err := os.Open(srcFile) 187 | if err != nil { 188 | return err 189 | } 190 | defer file.Close() 191 | 192 | outFile, err := os.Create(dstFile) 193 | if err != nil { 194 | return err 195 | } 196 | defer outFile.Close() 197 | 198 | scanner := bufio.NewScanner(file) 199 | writer := bufio.NewWriter(outFile) // 提高性能 200 | defer writer.Flush() 201 | 202 | for scanner.Scan() { 203 | line := scanner.Text() 204 | for before, after := range replacements { 205 | line = strings.ReplaceAll(line, before, after) 206 | } 207 | _, _ = writer.WriteString(line + "\n") 208 | } 209 | 210 | if err = scanner.Err(); err != nil { 211 | return err 212 | } 213 | 214 | return nil 215 | } 216 | 217 | // 获得文件名后加上后缀的新文件名,不改变扩展名,例如:/home/ubuntu/abc.srt变成/home/ubuntu/abc_tmp.srt 218 | func AddSuffixToFileName(filePath, suffix string) string { 219 | dir := filepath.Dir(filePath) 220 | ext := filepath.Ext(filePath) 221 | name := strings.TrimSuffix(filepath.Base(filePath), ext) 222 | newName := fmt.Sprintf("%s%s%s", name, suffix, ext) 223 | return filepath.Join(dir, newName) 224 | } 225 | 226 | // 去除字符串中的标点符号等字符,确保字符中的内容都是whisper模型可以识别出来的,便于时间戳对齐 227 | func GetRecognizableString(s string) string { 228 | var result []rune 229 | for _, v := range s { 230 | // 英文字母和数字 231 | if unicode.Is(unicode.Latin, v) || unicode.Is(unicode.Number, v) { 232 | result = append(result, v) 233 | } 234 | // 中文 235 | if unicode.Is(unicode.Han, v) { 236 | result = append(result, v) 237 | } 238 | // 韩文 239 | if unicode.Is(unicode.Hangul, v) { 240 | result = append(result, v) 241 | } 242 | // 日文平假片假 243 | if unicode.Is(unicode.Hiragana, v) || unicode.Is(unicode.Katakana, v) { 244 | result = append(result, v) 245 | } 246 | } 247 | return string(result) 248 | } 249 | 250 | func GetAudioDuration(inputFile string) (float64, error) { 251 | // 使用 ffprobe 获取精确时长 252 | cmd := exec.Command(storage.FfprobePath, "-i", inputFile, "-show_entries", "format=duration", "-v", "quiet", "-of", "csv=p=0") 253 | cmdOutput, err := cmd.Output() 254 | if err != nil { 255 | return 0, fmt.Errorf("GetAudioDuration failed to get audio duration: %w", err) 256 | } 257 | 258 | // 解析时长 259 | duration, err := strconv.ParseFloat(strings.TrimSpace(string(cmdOutput)), 64) 260 | if err != nil { 261 | return 0, fmt.Errorf("GetAudioDuration failed to parse audio duration: %w", err) 262 | } 263 | 264 | return duration, nil 265 | } 266 | -------------------------------------------------------------------------------- /pkg/util/video.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "fmt" 5 | "krillin-ai/internal/storage" 6 | "os/exec" 7 | ) 8 | 9 | func ReplaceAudioInVideo(videoFile string, audioFile string, outputFile string) error { 10 | cmd := exec.Command(storage.FfmpegPath, "-i", videoFile, "-i", audioFile, "-c:v", "copy", "-map", "0:v:0", "-map", "1:a:0", outputFile) 11 | 12 | if err := cmd.Run(); err != nil { 13 | return fmt.Errorf("error replacing audio in video: %v", err) 14 | } 15 | 16 | return nil 17 | } 18 | -------------------------------------------------------------------------------- /pkg/whisper/init.go: -------------------------------------------------------------------------------- 1 | package whisper 2 | 3 | import ( 4 | "github.com/sashabaranov/go-openai" 5 | "krillin-ai/config" 6 | "net/http" 7 | ) 8 | 9 | type Client struct { 10 | client *openai.Client 11 | } 12 | 13 | func NewClient(baseUrl, apiKey, proxyAddr string) *Client { 14 | cfg := openai.DefaultConfig(apiKey) 15 | if baseUrl != "" { 16 | cfg.BaseURL = baseUrl 17 | } 18 | 19 | if proxyAddr != "" { 20 | transport := &http.Transport{ 21 | Proxy: http.ProxyURL(config.Conf.App.ParsedProxy), 22 | } 23 | cfg.HTTPClient = &http.Client{ 24 | Transport: transport, 25 | } 26 | } 27 | 28 | client := openai.NewClientWithConfig(cfg) 29 | return &Client{client: client} 30 | } 31 | -------------------------------------------------------------------------------- /pkg/whisper/whisper.go: -------------------------------------------------------------------------------- 1 | package whisper 2 | 3 | import ( 4 | "context" 5 | "github.com/sashabaranov/go-openai" 6 | "go.uber.org/zap" 7 | "krillin-ai/internal/types" 8 | "krillin-ai/log" 9 | "strings" 10 | ) 11 | 12 | func (c *Client) Transcription(audioFile, language, workDir string) (*types.TranscriptionData, error) { 13 | resp, err := c.client.CreateTranscription( 14 | context.Background(), 15 | openai.AudioRequest{ 16 | Model: openai.Whisper1, 17 | FilePath: audioFile, 18 | Format: openai.AudioResponseFormatVerboseJSON, 19 | TimestampGranularities: []openai.TranscriptionTimestampGranularity{ 20 | openai.TranscriptionTimestampGranularityWord, 21 | }, 22 | Language: language, 23 | }, 24 | ) 25 | if err != nil { 26 | log.GetLogger().Error("openai create transcription failed", zap.Error(err)) 27 | return nil, err 28 | } 29 | 30 | transcriptionData := &types.TranscriptionData{ 31 | Language: resp.Language, 32 | Text: strings.ReplaceAll(resp.Text, "-", " "), // 连字符处理,因为模型存在很多错误添加到连字符 33 | Words: make([]types.Word, 0), 34 | } 35 | num := 0 36 | for _, word := range resp.Words { 37 | if strings.Contains(word.Word, "—") { 38 | // 对称切分 39 | mid := (word.Start + word.End) / 2 40 | seperatedWords := strings.Split(word.Word, "—") 41 | transcriptionData.Words = append(transcriptionData.Words, []types.Word{ 42 | { 43 | Num: num, 44 | Text: seperatedWords[0], 45 | Start: word.Start, 46 | End: mid, 47 | }, 48 | { 49 | Num: num + 1, 50 | Text: seperatedWords[1], 51 | Start: mid, 52 | End: word.End, 53 | }, 54 | }...) 55 | num += 2 56 | } else { 57 | transcriptionData.Words = append(transcriptionData.Words, types.Word{ 58 | Num: num, 59 | Text: word.Word, 60 | Start: word.Start, 61 | End: word.End, 62 | }) 63 | num++ 64 | } 65 | } 66 | 67 | return transcriptionData, nil 68 | } 69 | -------------------------------------------------------------------------------- /pkg/whispercpp/init.go: -------------------------------------------------------------------------------- 1 | package whispercpp 2 | 3 | type WhispercppProcessor struct { 4 | WorkDir string // 生成中间文件的目录 5 | Model string 6 | } 7 | 8 | func NewWhispercppProcessor(model string) *WhispercppProcessor { 9 | return &WhispercppProcessor{ 10 | Model: model, 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /pkg/whispercpp/transcription.go: -------------------------------------------------------------------------------- 1 | package whispercpp 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "krillin-ai/internal/storage" 7 | "krillin-ai/internal/types" 8 | "krillin-ai/log" 9 | "krillin-ai/pkg/util" 10 | "os" 11 | "os/exec" 12 | "regexp" 13 | "strconv" 14 | "strings" 15 | 16 | "go.uber.org/zap" 17 | ) 18 | 19 | func (c *WhispercppProcessor) Transcription(audioFile, language, workDir string) (*types.TranscriptionData, error) { 20 | name := util.ChangeFileExtension(audioFile, "") 21 | cmdArgs := []string{ 22 | "-m", fmt.Sprintf("./models/whispercpp/ggml-%s.bin", c.Model), 23 | "--output-json-full", 24 | "--flash-attn", 25 | "--split-on-word", 26 | "--language", language, 27 | "--output-file", name, 28 | "--file", audioFile, 29 | } 30 | cmd := exec.Command(storage.WhispercppPath, cmdArgs...) 31 | log.GetLogger().Info("WhispercppProcessor转录开始", zap.String("cmd", cmd.String())) 32 | output, err := cmd.CombinedOutput() 33 | if err != nil && !strings.Contains(string(output), "output_json: saving output to") { 34 | log.GetLogger().Error("WhispercppProcessor cmd 执行失败", zap.String("output", string(output)), zap.Error(err)) 35 | return nil, err 36 | } 37 | log.GetLogger().Info("WhispercppProcessor转录json生成完毕", zap.String("audio file", audioFile)) 38 | 39 | var result types.WhispercppOutput 40 | fileData, err := os.Open(util.ChangeFileExtension(audioFile, ".json")) 41 | if err != nil { 42 | log.GetLogger().Error("WhispercppProcessor 打开json文件失败", zap.Error(err)) 43 | return nil, err 44 | } 45 | defer fileData.Close() 46 | decoder := json.NewDecoder(fileData) 47 | if err = decoder.Decode(&result); err != nil { 48 | log.GetLogger().Error("WhispercppProcessor 解析json文件失败", zap.Error(err)) 49 | return nil, err 50 | } 51 | 52 | var ( 53 | transcriptionData types.TranscriptionData 54 | num int 55 | ) 56 | for _, segment := range result.Transcription { 57 | transcriptionData.Text += strings.ReplaceAll(segment.Text, "—", " ") // 连字符处理,因为模型存在很多错误添加到连字符 58 | for _, word := range segment.Tokens { 59 | fromSec, err := parseTimestampToSeconds(word.Timestamps.From) 60 | if err != nil { 61 | log.GetLogger().Error("解析开始时间失败", zap.Error(err)) 62 | return nil, err 63 | } 64 | 65 | toSec, err := parseTimestampToSeconds(word.Timestamps.To) 66 | if err != nil { 67 | log.GetLogger().Error("解析结束时间失败", zap.Error(err)) 68 | return nil, err 69 | } 70 | regex := regexp.MustCompile(`^\[.*\]$`) 71 | if regex.MatchString(word.Text) { 72 | continue 73 | } else if strings.Contains(word.Text, "—") { 74 | // 对称切分 75 | mid := (fromSec + toSec) / 2 76 | seperatedWords := strings.Split(word.Text, "—") 77 | transcriptionData.Words = append(transcriptionData.Words, []types.Word{ 78 | { 79 | Num: num, 80 | Text: util.CleanPunction(strings.TrimSpace(seperatedWords[0])), 81 | Start: fromSec, 82 | End: mid, 83 | }, 84 | { 85 | Num: num + 1, 86 | Text: util.CleanPunction(strings.TrimSpace(seperatedWords[1])), 87 | Start: mid, 88 | End: toSec, 89 | }, 90 | }...) 91 | num += 2 92 | } else { 93 | transcriptionData.Words = append(transcriptionData.Words, types.Word{ 94 | Num: num, 95 | Text: util.CleanPunction(strings.TrimSpace(word.Text)), 96 | Start: fromSec, 97 | End: toSec, 98 | }) 99 | num++ 100 | } 101 | } 102 | } 103 | log.GetLogger().Info("WhispercppProcessor转录成功") 104 | return &transcriptionData, nil 105 | } 106 | 107 | // 新增时间戳转换函数 108 | func parseTimestampToSeconds(timeStr string) (float64, error) { 109 | parts := strings.Split(timeStr, ",") 110 | if len(parts) != 2 { 111 | return 0, fmt.Errorf("invalid timestamp format: %s", timeStr) 112 | } 113 | 114 | timePart := strings.Split(parts[0], ":") 115 | if len(timePart) != 3 { 116 | return 0, fmt.Errorf("invalid time format: %s", parts[0]) 117 | } 118 | 119 | hours, _ := strconv.Atoi(timePart[0]) 120 | minutes, _ := strconv.Atoi(timePart[1]) 121 | seconds, _ := strconv.Atoi(timePart[2]) 122 | milliseconds, _ := strconv.Atoi(parts[1]) 123 | 124 | return float64(hours*3600+minutes*60+seconds) + float64(milliseconds)/1000, nil 125 | } 126 | -------------------------------------------------------------------------------- /pkg/whisperkit/init.go: -------------------------------------------------------------------------------- 1 | package whisperkit 2 | 3 | type WhisperKitProcessor struct { 4 | WorkDir string // 生成中间文件的目录 5 | Model string 6 | } 7 | 8 | func NewWhisperKitProcessor(model string) *WhisperKitProcessor { 9 | return &WhisperKitProcessor{ 10 | Model: model, 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /pkg/whisperkit/transcription.go: -------------------------------------------------------------------------------- 1 | package whisperkit 2 | 3 | import ( 4 | "encoding/json" 5 | "krillin-ai/internal/storage" 6 | "krillin-ai/internal/types" 7 | "krillin-ai/log" 8 | "krillin-ai/pkg/util" 9 | "os" 10 | "os/exec" 11 | "strings" 12 | 13 | "go.uber.org/zap" 14 | ) 15 | 16 | func (c *WhisperKitProcessor) Transcription(audioFile, language, workDir string) (*types.TranscriptionData, error) { 17 | cmdArgs := []string{ 18 | "transcribe", 19 | "--model-path", "./models/whisperkit/openai_whisper-large-v2", 20 | "--audio-encoder-compute-units", "all", 21 | "--text-decoder-compute-units", "all", 22 | "--language", language, 23 | "--report", 24 | "--report-path", workDir, 25 | "--word-timestamps", 26 | "--skip-special-tokens", 27 | "--audio-path", audioFile, 28 | } 29 | cmd := exec.Command(storage.WhisperKitPath, cmdArgs...) 30 | log.GetLogger().Info("WhisperKitProcessor转录开始", zap.String("cmd", cmd.String())) 31 | output, err := cmd.CombinedOutput() 32 | if err != nil { 33 | log.GetLogger().Error("WhisperKitProcessor cmd 执行失败", zap.String("output", string(output)), zap.Error(err)) 34 | return nil, err 35 | } 36 | log.GetLogger().Info("WhisperKitProcessor转录json生成完毕", zap.String("audio file", audioFile)) 37 | 38 | var result types.WhisperKitOutput 39 | fileData, err := os.Open(util.ChangeFileExtension(audioFile, ".json")) 40 | if err != nil { 41 | log.GetLogger().Error("WhisperKitProcessor 打开json文件失败", zap.Error(err)) 42 | return nil, err 43 | } 44 | defer fileData.Close() 45 | decoder := json.NewDecoder(fileData) 46 | if err = decoder.Decode(&result); err != nil { 47 | log.GetLogger().Error("WhisperKitProcessor 解析json文件失败", zap.Error(err)) 48 | return nil, err 49 | } 50 | 51 | var ( 52 | transcriptionData types.TranscriptionData 53 | num int 54 | ) 55 | for _, segment := range result.Segments { 56 | transcriptionData.Text += strings.ReplaceAll(segment.Text, "—", " ") // 连字符处理,因为模型存在很多错误添加到连字符 57 | for _, word := range segment.Words { 58 | if strings.Contains(word.Word, "—") { 59 | // 对称切分 60 | mid := (word.Start + word.End) / 2 61 | seperatedWords := strings.Split(word.Word, "—") 62 | transcriptionData.Words = append(transcriptionData.Words, []types.Word{ 63 | { 64 | Num: num, 65 | Text: util.CleanPunction(strings.TrimSpace(seperatedWords[0])), 66 | Start: word.Start, 67 | End: mid, 68 | }, 69 | { 70 | Num: num + 1, 71 | Text: util.CleanPunction(strings.TrimSpace(seperatedWords[1])), 72 | Start: mid, 73 | End: word.End, 74 | }, 75 | }...) 76 | num += 2 77 | } else { 78 | transcriptionData.Words = append(transcriptionData.Words, types.Word{ 79 | Num: num, 80 | Text: util.CleanPunction(strings.TrimSpace(word.Word)), 81 | Start: word.Start, 82 | End: word.End, 83 | }) 84 | num++ 85 | } 86 | } 87 | } 88 | log.GetLogger().Info("WhisperKitProcessor转录成功") 89 | return &transcriptionData, nil 90 | } 91 | -------------------------------------------------------------------------------- /static/background.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krillinai/KrillinAI/6b1b22bb98aa14b327a257cf24c521cc827aebd1/static/background.jpg -------------------------------------------------------------------------------- /static/embed.go: -------------------------------------------------------------------------------- 1 | package static 2 | 3 | import "embed" 4 | 5 | //go:embed index.html background.jpg 6 | var EmbeddedFiles embed.FS 7 | --------------------------------------------------------------------------------