├── .gitignore ├── README.md ├── config.ini ├── go.mod ├── main.go ├── tests_srt └── 01.srt └── videosrt ├── aliyun ├── cloud │ ├── cloud.go │ └── tool.go └── oss │ └── oss.go ├── app.go ├── config └── ini │ └── config.go ├── ffmpeg └── ffmpeg.go ├── mylog └── log.go ├── tool.go ├── tool └── chinese_simple.go └── video.go /.gitignore: -------------------------------------------------------------------------------- 1 | go.sum 2 | *.wav 3 | *.mp3 4 | *.exe 5 | *.srt 6 | log.txt 7 | .idea 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## video-srt 2 | 3 | 这是一个可以识别视频语音自动生成字幕SRT文件的开源命令行工具。 4 | 5 | 本项目使用了阿里云的[OSS对象存储](https://www.aliyun.com/product/oss?spm=5176.12825654.eofdhaal5.13.e9392c4aGfj5vj&aly_as=K11FcpO8)、[录音文件识别](https://ai.aliyun.com/nls/filetrans?spm=5176.12061031.1228726.1.47fe3cb43I34mn)的相关业务接口。 6 | 7 | Windows-GUI版本:[https://github.com/wxbool/video-srt-windows](https://github.com/wxbool/video-srt-windows) 8 | 9 | ## 下载安装 10 | ```shell 11 | go get -u github.com/wxbool/video-srt 12 | ``` 13 | 14 | ## 使用 15 | ###### 项目使用了 [ffmpeg](http://ffmpeg.org/) 依赖,请先下载安装,并设置环境变量. 16 | 17 | * 设置服务接口配置(config.ini) 18 | ```ini 19 | #字幕相关设置 20 | [srt] 21 | #智能分段处理:true(开启) false(关闭) 22 | intelligent_block=true 23 | 24 | #阿里云Oss对象服务配置 25 | #文档:https://help.aliyun.com/document_detail/31827.html?spm=a2c4g.11186623.6.582.4e7858a85Dr5pA 26 | [aliyunOss] 27 | # OSS 对外服务的访问域名 28 | endpoint=your.Endpoint 29 | # 存储空间(Bucket)名称 30 | bucketName=your.BucketName 31 | # 存储空间(Bucket 域名)地址 32 | bucketDomain=your.BucketDomain 33 | accessKeyId=your.AccessKeyId 34 | accessKeySecret=your.AccessKeySecret 35 | 36 | #阿里云语音识别配置 37 | #文档: 38 | [aliyunClound] 39 | # 在管控台中创建的项目Appkey,项目的唯一标识 40 | appKey=your.AppKey 41 | accessKeyId=your.AccessKeyId 42 | accessKeySecret=your.AccessKeySecret 43 | ``` 44 | 45 | * 生成字幕文件(CLI) 46 | 47 | ```shell 48 | go run main.go video.mp4 49 | ``` 50 | 51 | * 生成字幕文件(可执行文件 | [video-srt.exe](https://github.com/wxbool/video-srt/blob/master/video-srt.exe)) 52 | ```shell 53 | video-srt video.mp4 54 | ``` 55 | 56 | 57 | ## FAQ 58 | * 支持哪些语言? 59 | * 视频字幕文本识别的核心服务是由阿里云`录音文件识别`业务提供的接口进行的,支持汉语普通话、方言、欧美英语等语言 60 | * 如何才能使用这个工具? 61 | * 注册阿里云账号 62 | * 账号快速实名认证 63 | * 开通 `访问控制` 服务,并创建角色,设置开放 `OSS对象存储`、`智能语音交互` 的访问权限 64 | * 开通 `OSS对象存储` 服务,并创建一个存储空间(Bucket)(读写权限设置为公共读) 65 | * 开通 `智能语音交互` 服务,并创建项目(根据使用场景选择识别语言以及偏好等) 66 | * 设置 `config.ini` 文件的配置项 67 | * 命令行执行(详见`使用`) -------------------------------------------------------------------------------- /config.ini: -------------------------------------------------------------------------------- 1 | #字幕相关设置 2 | [srt] 3 | #智能分段处理:true(开启) false(关闭) 4 | intelligent_block=true 5 | 6 | #阿里云Oss对象服务配置 7 | #文档:https://help.aliyun.com/document_detail/31827.html?spm=a2c4g.11186623.6.582.4e7858a85Dr5pA 8 | [aliyunOss] 9 | # OSS 对外服务的访问域名 10 | endpoint=your.Endpoint 11 | # 存储空间(Bucket)名称 12 | bucketName=your.BucketName 13 | # 存储空间(Bucket 域名)地址 14 | bucketDomain=your.BucketDomain 15 | accessKeyId=your.AccessKeyId 16 | accessKeySecret=your.AccessKeySecret 17 | 18 | #阿里云语音识别配置 19 | #文档: 20 | [aliyunClound] 21 | # 在管控台中创建的项目Appkey,项目的唯一标识 22 | appKey=your.AppKey 23 | accessKeyId=your.AccessKeyId 24 | accessKeySecret=your.AccessKeySecret -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module videosrt 2 | 3 | go 1.14 4 | 5 | require ( 6 | github.com/Unknwon/goconfig v1.0.0 7 | github.com/aliyun/alibaba-cloud-sdk-go v1.61.1526 8 | github.com/aliyun/aliyun-oss-go-sdk v2.2.1+incompatible 9 | github.com/buger/jsonparser v1.1.1 10 | github.com/pkg/errors v0.9.1 11 | golang.org/x/time v0.0.0-20220224211638-0e9765cccd65 // indirect 12 | ) 13 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "os" 7 | "path/filepath" 8 | "time" 9 | "videosrt/videosrt" 10 | ) 11 | 12 | //定义配置文件 13 | const CONFIG = "config.ini" 14 | 15 | func main() { 16 | 17 | //致命错误捕获 18 | defer func() { 19 | if err := recover(); err != nil { 20 | fmt.Println("执行错误 : ", err) 21 | os.Exit(500) 22 | } 23 | }() 24 | 25 | appDir, err := filepath.Abs(filepath.Dir(os.Args[0])) //应用执行根目录 26 | if err != nil { 27 | panic(err) 28 | } 29 | 30 | //初始化 31 | if len(os.Args) < 2 { 32 | os.Args = append(os.Args , "") 33 | } 34 | 35 | var video string 36 | 37 | //设置命令行参数 38 | flag.StringVar(&video, "f", "", "enter a video file waiting to be processed .") 39 | 40 | flag.Parse() 41 | 42 | if video == "" && os.Args[1] != "" && os.Args[1] != "-f" { 43 | video = os.Args[1] 44 | } 45 | 46 | //获取应用 47 | app := videosrt.NewApp(CONFIG) 48 | 49 | appDir = videosrt.WinDir(appDir) 50 | 51 | //初始化应用 52 | app.Init(appDir) 53 | 54 | //调起应用 55 | app.Run(videosrt.WinDir(video)) 56 | 57 | //延迟退出 58 | time.Sleep(time.Second * 1) 59 | } 60 | -------------------------------------------------------------------------------- /tests_srt/01.srt: -------------------------------------------------------------------------------- 1 | 1 2 | 00:00:04,300 --> 00:00:04,600 3 | 计算机 4 | 5 | 6 | 2 7 | 00:00:06,030 --> 00:00:08,610 8 | 习惯以什么样的方式来做计算的 9 | 10 | 11 | 3 12 | 00:00:09,030 --> 00:00:11,850 13 | 比如说我们今天想要算算这么一个方程 14 | 15 | 16 | 4 17 | 00:00:11,850 --> 00:00:13,350 18 | r x加六等于二十 19 | 20 | 21 | 5 22 | 00:00:14,390 --> 00:00:14,930 23 | 我们怎么算 24 | 25 | 26 | 6 27 | 00:00:15,680 --> 00:00:15,860 28 | 二 29 | 30 | 31 | 7 32 | 00:00:16,520 --> 00:00:16,670 33 | x 34 | 35 | 36 | 8 37 | 00:00:17,940 --> 00:00:18,210 38 | 加 39 | 40 | 41 | 9 42 | 00:00:18,970 --> 00:00:19,000 43 | 六 44 | 45 | 46 | 10 47 | 00:00:19,860 --> 00:00:21,030 48 | 等于二十 49 | 50 | 51 | 11 52 | 00:00:23,300 --> 00:00:25,250 53 | 我们学过方强啊 54 | 55 | 56 | 12 57 | 00:00:25,250 --> 00:00:26,750 58 | 我们知道这是很容易啊 59 | 60 | 61 | 13 62 | 00:00:26,750 --> 00:00:27,020 63 | 那就是 64 | 65 | 66 | 14 67 | 00:00:27,750 --> 00:00:31,170 68 | rs等于二十减六 69 | 70 | 71 | 15 72 | 00:00:31,880 --> 00:00:36,530 73 | 那就是x等于十四除以二 74 | 75 | 76 | 16 77 | 00:00:36,530 --> 00:00:37,190 78 | 那就等于去了 79 | 80 | 81 | 17 82 | 00:00:39,980 --> 00:00:40,370 83 | 是啊 84 | 85 | 86 | 18 87 | 00:00:40,370 --> 00:00:43,100 88 | 这是对于人来说很容易算 89 | 90 | 91 | 19 92 | 00:00:43,100 --> 00:00:43,850 93 | 但是呢 94 | 95 | 96 | 20 97 | 00:00:43,850 --> 00:00:44,690 98 | 对于计算机来说 99 | 100 | 101 | 21 102 | 00:00:45,680 --> 00:00:48,110 103 | 你要让他去学会怎么样列这样的方程式 104 | 105 | 106 | 22 107 | 00:00:49,140 --> 00:00:49,470 108 | 不容易 109 | 110 | 111 | 23 112 | 00:00:50,830 --> 00:00:51,700 113 | 所以计算机怎么算 114 | 115 | 116 | 24 117 | 00:00:52,460 --> 00:00:54,140 118 | 计算机会有另外的算法 119 | 120 | 121 | 25 122 | 00:00:54,140 --> 00:00:55,700 123 | 另外的办法来做这样的事情 124 | 125 | 126 | 26 127 | 00:00:56,410 --> 00:00:56,680 128 | 比如说 129 | 130 | 131 | 27 132 | 00:00:58,160 --> 00:00:58,490 133 | 假设呢 134 | 135 | 136 | 28 137 | 00:00:59,340 --> 00:00:59,430 138 | 呃 139 | 140 | 141 | 29 142 | 00:01:00,330 --> 00:01:01,860 143 | 你现在什么都不会 144 | 145 | 146 | 30 147 | 00:01:01,860 --> 00:01:02,550 148 | 没有学过方程 149 | 150 | 151 | 31 152 | 00:01:02,550 --> 00:01:03,900 153 | 是你看到这样的式子 154 | 155 | 156 | 32 157 | 00:01:03,900 --> 00:01:04,500 158 | 你会怎么去想 159 | 160 | 161 | 33 162 | 00:01:07,260 --> 00:01:08,760 163 | 有一个想法是这样 164 | 165 | 166 | 34 167 | 00:01:08,760 --> 00:01:09,180 168 | 我们能不能 169 | 170 | 171 | 35 172 | 00:01:10,080 --> 00:01:10,560 173 | 凑这个数 174 | 175 | 176 | 36 177 | 00:01:11,180 --> 00:01:12,140 178 | 我先试试看 179 | 180 | 181 | 37 182 | 00:01:12,140 --> 00:01:13,070 183 | 如果是一 184 | 185 | 186 | 38 187 | 00:01:13,720 --> 00:01:14,710 188 | 那么二乘以一 189 | 190 | 191 | 39 192 | 00:01:16,880 --> 00:01:16,910 193 | 哎 194 | 195 | 196 | 40 197 | 00:01:17,830 --> 00:01:21,370 198 | 乘以一等于二加六等于八 199 | 200 | 201 | 41 202 | 00:01:22,310 --> 00:01:22,700 203 | 好吧 204 | 205 | 206 | 42 207 | 00:01:22,700 --> 00:01:23,900 208 | 他不得二十 209 | 210 | 211 | 43 212 | 00:01:25,800 --> 00:01:26,220 213 | 嗯 214 | 215 | 216 | 44 217 | 00:01:26,220 --> 00:01:26,760 218 | 不行 219 | 220 | 221 | 45 222 | 00:01:26,760 --> 00:01:27,780 223 | 那我们再来 224 | 225 | 226 | 46 227 | 00:01:27,780 --> 00:01:29,130 228 | 如果这个不是一 229 | 230 | 231 | 47 232 | 00:01:29,130 --> 00:01:30,030 233 | 这个是二 234 | 235 | 236 | 48 237 | 00:01:30,030 --> 00:01:32,520 238 | 那么这里就有四加六就有十四 239 | 240 | 241 | 49 242 | 00:01:34,290 --> 00:01:34,440 243 | 啊 244 | 245 | 246 | 50 247 | 00:01:34,440 --> 00:01:34,800 248 | 好吧 249 | 250 | 251 | 51 252 | 00:01:34,800 --> 00:01:35,070 253 | 就十 254 | 255 | 256 | 52 257 | 00:01:35,950 --> 00:01:36,790 258 | 然后呢还不够 259 | 260 | 261 | 53 262 | 00:01:38,290 --> 00:01:40,120 263 | 于是再试一下三够不够 264 | 265 | 266 | 54 267 | 00:01:40,810 --> 00:01:42,400 268 | 最后我们一直往上凑 269 | 270 | 271 | 55 272 | 00:01:42,400 --> 00:01:42,880 273 | 凑到说 274 | 275 | 276 | 56 277 | 00:01:42,880 --> 00:01:43,540 278 | 哎 279 | 280 | 281 | 57 282 | 00:01:43,540 --> 00:01:44,470 283 | 正好是七的时候 284 | 285 | 286 | 58 287 | 00:01:45,190 --> 00:01:46,180 288 | 这个是不是可以的 289 | 290 | 291 | 59 292 | 00:01:47,020 --> 00:01:49,180 293 | 所以咱们计算机在算东西的时候啊 294 | 295 | 296 | 60 297 | 00:01:49,180 --> 00:01:50,200 298 | 其实他也经常是 299 | 300 | 301 | 61 302 | 00:01:50,690 --> 00:01:50,870 303 | 啊 304 | 305 | 306 | 62 307 | 00:01:50,870 --> 00:01:52,280 308 | 这么这么傻傻的来算东西 309 | 310 | 311 | 63 312 | 00:01:52,910 --> 00:01:54,170 313 | 因为对这些来说呢 314 | 315 | 316 | 64 317 | 00:01:54,660 --> 00:01:55,020 318 | 去 319 | 320 | 321 | 65 322 | 00:01:55,580 --> 00:01:56,420 323 | 凑一个数出来 324 | 325 | 326 | 66 327 | 00:01:57,250 --> 00:01:58,960 328 | 比找一个方法来算 329 | 330 | 331 | 67 332 | 00:01:58,960 --> 00:01:59,770 333 | 东西会更容易 334 | 335 | 336 | 68 337 | 00:02:00,410 --> 00:02:00,590 338 | 啊 339 | 340 | 341 | 69 342 | 00:02:00,590 --> 00:02:02,810 343 | 所以计算机机场真的就是怎么干呢 344 | 345 | 346 | 70 347 | 00:02:02,810 --> 00:02:02,840 348 | 我 349 | 350 | 351 | 71 352 | 00:02:03,820 --> 00:02:04,870 353 | 一个一个的 354 | 355 | 356 | 72 357 | 00:02:04,870 --> 00:02:06,970 358 | 把所有的可能都去列出来 359 | 360 | 361 | 73 362 | 00:02:06,970 --> 00:02:08,680 363 | 然后一个个的去试 364 | 365 | 366 | 74 367 | 00:02:08,680 --> 00:02:09,310 368 | 哪一个是对的 369 | 370 | 371 | 75 372 | 00:02:09,950 --> 00:02:11,120 373 | 一直到找到是对的为止 374 | 375 | 376 | 76 377 | 00:02:11,760 --> 00:02:13,020 378 | 这种方法呢 379 | 380 | 381 | 77 382 | 00:02:13,020 --> 00:02:14,100 383 | 我们把它叫做枚举 384 | 385 | 386 | 78 387 | 00:02:15,070 --> 00:02:17,170 388 | 一枚一枚的举出来啊 389 | 390 | 391 | 79 392 | 00:02:17,170 --> 00:02:17,800 393 | 这就是枚举 394 | 395 | 396 | 80 397 | 00:02:18,760 --> 00:02:19,210 398 | 那 399 | 400 | 401 | 81 402 | 00:02:19,990 --> 00:02:20,680 403 | 除了这个方法 404 | 405 | 406 | 82 407 | 00:02:20,680 --> 00:02:26,590 408 | 但枚举我们会看到说有一个很可能会有很大的缺点就是你得一个个是啊啊 409 | 410 | 411 | 83 412 | 00:02:26,590 --> 00:02:28,180 413 | 咱们现在这个是欺 414 | 415 | 416 | 84 417 | 00:02:28,180 --> 00:02:29,500 418 | 从一数到七 419 | 420 | 421 | 85 422 | 00:02:29,500 --> 00:02:29,860 423 | 呃 424 | 425 | 426 | 86 427 | 00:02:29,860 --> 00:02:31,180 428 | 早七个就可以了 429 | 430 | 431 | 87 432 | 00:02:31,180 --> 00:02:32,380 433 | 如果这是七万呢 434 | 435 | 436 | 88 437 | 00:02:32,380 --> 00:02:35,140 438 | 也得从一所到七万就比较慢对不对 439 | 440 | 441 | 89 442 | 00:02:35,140 --> 00:02:37,870 443 | 所以为了让计算机能算的快 444 | 445 | 446 | 90 447 | 00:02:37,870 --> 00:02:39,340 448 | 我们就会有各种各样的 449 | 450 | 451 | 91 452 | 00:02:39,820 --> 00:02:40,030 453 | 算法 454 | 455 | 456 | 92 457 | 00:02:40,870 --> 00:02:42,520 458 | 比方说我们举个例子 459 | 460 | 461 | 93 462 | 00:02:42,520 --> 00:02:43,870 463 | 对于这个方程来说 464 | 465 | 466 | 94 467 | 00:02:43,870 --> 00:02:44,710 468 | 我们有一个比较 469 | 470 | 471 | 95 472 | 00:02:45,680 --> 00:02:47,060 473 | 简单的方案 474 | 475 | 476 | 96 477 | 00:02:47,060 --> 00:02:48,230 478 | 我不要再算方程 479 | 480 | 481 | 97 482 | 00:02:48,230 --> 00:02:50,750 483 | 我也不要他一个个去枚举出来 484 | 485 | 486 | 98 487 | 00:02:50,750 --> 00:02:51,890 488 | 我们可以有一个别的方法 489 | 490 | 491 | 99 492 | 00:02:52,500 --> 00:02:53,250 493 | 这个方法呢 494 | 495 | 496 | 100 497 | 00:02:53,250 --> 00:03:00,270 498 | 我们可以试看是不是这样假如说我们已经知道这个x的范围大概是从一到 499 | 500 | 501 | 101 502 | 00:03:01,580 --> 00:03:02,030 503 | 十之间 504 | 505 | 506 | 102 507 | 00:03:02,740 --> 00:03:02,950 508 | 啊 509 | 510 | 511 | 103 512 | 00:03:02,950 --> 00:03:03,550 513 | 比如说我 514 | 515 | 516 | 104 517 | 00:03:03,550 --> 00:03:05,590 518 | 我们已经有这么一个条件是知道的啊 519 | 520 | 521 | 105 522 | 00:03:06,500 --> 00:03:06,650 523 | 哎 524 | 525 | 526 | 106 527 | 00:03:06,650 --> 00:03:08,090 528 | 所以我们解决问题经常是这样 529 | 530 | 531 | 107 532 | 00:03:08,090 --> 00:03:09,710 533 | 如果你知道的条件越多 534 | 535 | 536 | 108 537 | 00:03:09,710 --> 00:03:12,230 538 | 那么你解决问题的手段肯定就会越好 539 | 540 | 541 | 109 542 | 00:03:12,230 --> 00:03:14,270 543 | 假如我们知道他是一到十之间 544 | 545 | 546 | 110 547 | 00:03:15,110 --> 00:03:16,100 548 | 那我们就可以先试试看 549 | 550 | 551 | 111 552 | 00:03:16,100 --> 00:03:17,540 553 | 如果我们先上个 554 | 555 | 556 | 112 557 | 00:03:17,540 --> 00:03:18,410 558 | 我们不是一直来 559 | 560 | 561 | 113 562 | 00:03:18,410 --> 00:03:19,790 563 | 就是以我先上个五 564 | 565 | 566 | 114 567 | 00:03:19,790 --> 00:03:20,180 568 | 所以呢 569 | 570 | 571 | 115 572 | 00:03:20,180 --> 00:03:21,050 573 | 我们情况就会变成说 574 | 575 | 576 | 116 577 | 00:03:22,010 --> 00:03:22,400 578 | 今天呢 579 | 580 | 581 | 117 582 | 00:03:23,020 --> 00:03:25,330 583 | 我们不是一上来就来一我们先上个五 584 | 585 | 586 | 118 587 | 00:03:26,410 --> 00:03:28,030 588 | 二千五是十所以 589 | 590 | 591 | 119 592 | 00:03:28,030 --> 00:03:28,660 593 | 呢这笔 594 | 595 | 596 | 120 597 | 00:03:28,660 --> 00:03:29,500 598 | 加回来呢就是 599 | 600 | 601 | 121 602 | 00:03:29,500 --> 00:03:31,320 603 | 十六我 604 | 605 | 606 | 122 607 | 00:03:31,320 --> 00:03:36,370 608 | 随着小鱼 609 | 610 | 611 | 123 612 | 00:03:36,370 --> 00:03:37,330 613 | 二是说明什么 614 | 615 | 616 | 124 617 | 00:03:37,330 --> 00:03:40,900 618 | 说明我们要寻求的那个答案应该比武要来的大 619 | 620 | 621 | 125 622 | 00:03:42,000 --> 00:03:45,060 623 | 在五和那个石中间 624 | 625 | 626 | 126 627 | 00:03:45,060 --> 00:03:47,130 628 | 所以我们就接下来去是什么呢 629 | 630 | 631 | 127 632 | 00:03:47,130 --> 00:03:48,180 633 | 我核实中间那个数 634 | 635 | 636 | 128 637 | 00:03:48,870 --> 00:03:50,790 638 | 于是我们发现说我们可以是七 639 | 640 | 641 | 129 642 | 00:03:50,790 --> 00:03:51,300 643 | 或者是八 644 | 645 | 646 | 130 647 | 00:03:52,290 --> 00:03:54,390 648 | 假如我们一下子正好我们就是了 649 | 650 | 651 | 131 652 | 00:03:54,390 --> 00:03:55,620 653 | 七你说我们就发现说 654 | 655 | 656 | 132 657 | 00:03:55,620 --> 00:03:58,230 658 | 而乘以七这边刚好就是二十 659 | 660 | 661 | 133 662 | 00:03:58,230 --> 00:03:59,070 663 | 于是呢 664 | 665 | 666 | 134 667 | 00:03:59,070 --> 00:03:59,790 668 | 我们就得到答案了 669 | 670 | 671 | 135 672 | 00:04:01,480 --> 00:04:01,720 673 | 两步 674 | 675 | 676 | 136 677 | 00:04:02,730 --> 00:04:03,690 678 | 我们就可以得到他 679 | 680 | 681 | 137 682 | 00:04:04,750 --> 00:04:04,930 683 | 啊 684 | 685 | 686 | 138 687 | 00:04:04,930 --> 00:04:07,690 688 | 这种方案这种方法我们叫做二分法 689 | 690 | 691 | 139 692 | 00:04:07,690 --> 00:04:10,660 693 | 以后我们会看到很多在用计算机解决问题的过程当中 694 | 695 | 696 | 140 697 | 00:04:11,770 --> 00:04:12,370 698 | 人们 699 | 700 | 701 | 141 702 | 00:04:12,370 --> 00:04:13,600 703 | 毕竟这已经是一个 704 | 705 | 706 | 142 707 | 00:04:15,370 --> 00:04:17,230 708 | 六七十年的东西了 709 | 710 | 711 | 143 712 | 00:04:17,230 --> 00:04:18,940 713 | 人民进想出了很多很多的方法 714 | 715 | 716 | 144 717 | 00:04:18,940 --> 00:04:20,560 718 | 对于各种的问题 719 | 720 | 721 | 145 722 | 00:04:20,560 --> 00:04:23,140 723 | 我们有各种更好的办法去计算 724 | 725 | 726 | 146 727 | 00:04:23,140 --> 00:04:23,860 728 | 他去解决他 729 | 730 | 731 | 147 732 | 00:04:25,200 --> 00:04:27,360 733 | 一旦我们有了一个程序 734 | 735 | 736 | 148 737 | 00:04:28,050 --> 00:04:29,640 738 | 这个程序写出来之后 739 | 740 | 741 | 149 742 | 00:04:30,630 --> 00:04:33,120 743 | 计算器怎么能够去执行你的这个程序呢 744 | 745 | 746 | 150 747 | 00:04:33,120 --> 00:04:39,240 748 | 我们前面已经看到你的程序写出来是用一种类似于英文啊 749 | 750 | 751 | 151 752 | 00:04:39,240 --> 00:04:41,130 753 | 里面都是字母数字这样的东西 754 | 755 | 756 | 152 757 | 00:04:41,130 --> 00:04:41,520 758 | 但是呢 759 | 760 | 761 | 153 762 | 00:04:41,520 --> 00:04:43,140 763 | 计算机实际懂的语言呢 764 | 765 | 766 | 154 767 | 00:04:43,140 --> 00:04:47,040 768 | 是那种十六进制都是数字的那种那那种方式他才懂 769 | 770 | 771 | 155 772 | 00:04:47,040 --> 00:04:50,160 773 | 那么在这两个之间是有有差异的对吧 774 | 775 | 776 | 156 777 | 00:04:50,160 --> 00:04:50,280 778 | 那么 779 | 780 | 781 | 157 782 | 00:04:50,690 --> 00:04:53,900 783 | 计算机去呃执行你写的那个程序 784 | 785 | 786 | 158 787 | 00:04:53,900 --> 00:04:55,220 788 | 他就有两种方案 789 | 790 | 791 | 159 792 | 00:04:55,220 --> 00:04:56,150 793 | 一种方案呢 794 | 795 | 796 | 160 797 | 00:04:56,150 --> 00:04:56,780 798 | 我们叫做解释 799 | 800 | 801 | 161 802 | 00:04:57,800 --> 00:04:58,370 803 | 就是说 804 | 805 | 806 | 162 807 | 00:04:58,370 --> 00:05:00,830 808 | 我需要有一个计算机上的程序 809 | 810 | 811 | 163 812 | 00:05:00,830 --> 00:05:04,070 813 | 这个程序呢他能够读懂你写的那个程序 814 | 815 | 816 | 164 817 | 00:05:05,210 --> 00:05:06,350 818 | 这有点绕口是吧 819 | 820 | 821 | 165 822 | 00:05:06,350 --> 00:05:08,690 823 | 有一个程序读懂你写的程序啊 824 | 825 | 826 | 166 827 | 00:05:08,690 --> 00:05:10,310 828 | 他把你写的程序独进去 829 | 830 | 831 | 167 832 | 00:05:10,310 --> 00:05:11,300 833 | 不进去以后呢 834 | 835 | 836 | 168 837 | 00:05:11,300 --> 00:05:13,280 838 | 他看你地方要他做什么啊 839 | 840 | 841 | 169 842 | 00:05:13,280 --> 00:05:14,270 843 | 去厨房ok 844 | 845 | 846 | 170 847 | 00:05:14,270 --> 00:05:15,890 848 | 他就让这段机做了一个去厨房的事 849 | 850 | 851 | 171 852 | 00:05:17,010 --> 00:05:19,230 853 | 以第一件事儿让他做什么t等于二ok 854 | 855 | 856 | 172 857 | 00:05:19,230 --> 00:05:20,910 858 | 大家这样去做t等于二的三 859 | 860 | 861 | 173 862 | 00:05:20,910 --> 00:05:23,280 863 | 所以他会看懂你的程序 864 | 865 | 866 | 174 867 | 00:05:23,280 --> 00:05:25,650 868 | 他会转身让计算机去做 869 | 870 | 871 | 175 872 | 00:05:25,650 --> 00:05:26,370 873 | 你要他做的事情 874 | 875 | 876 | 176 877 | 00:05:27,520 --> 00:05:29,350 878 | 如果是这么执行的程序呢 879 | 880 | 881 | 177 882 | 00:05:29,350 --> 00:05:30,310 883 | 这种方式叫做解释 884 | 885 | 886 | 178 887 | 00:05:31,430 --> 00:05:32,420 888 | 那还有一种方式呢 889 | 890 | 891 | 179 892 | 00:05:32,420 --> 00:05:33,260 893 | 叫做编译 894 | 895 | 896 | 180 897 | 00:05:34,260 --> 00:05:36,480 898 | 就是先有一个程序 899 | 900 | 901 | 181 902 | 00:05:36,480 --> 00:05:38,280 903 | 所以咱们还得有一个程序是吧 904 | 905 | 906 | 182 907 | 00:05:38,280 --> 00:05:40,620 908 | 那个程序呢拿着你的程序 909 | 910 | 911 | 183 912 | 00:05:40,620 --> 00:05:42,600 913 | 他能读懂你程序完了以后呢 914 | 915 | 916 | 184 917 | 00:05:42,600 --> 00:05:44,160 918 | 他把你的程序翻译一下 919 | 920 | 921 | 185 922 | 00:05:45,170 --> 00:05:46,310 923 | 变成计算机能懂的 924 | 925 | 926 | 186 927 | 00:05:47,040 --> 00:05:47,310 928 | 程序 929 | 930 | 931 | 187 932 | 00:05:47,850 --> 00:05:48,450 933 | 于是呢 934 | 935 | 936 | 188 937 | 00:05:48,450 --> 00:05:50,220 938 | 你的程序被翻译过以后呢 939 | 940 | 941 | 189 942 | 00:05:50,220 --> 00:05:50,880 943 | 你读不懂了 944 | 945 | 946 | 190 947 | 00:05:50,880 --> 00:05:51,930 948 | 但是计算机能读懂了 949 | 950 | 951 | 191 952 | 00:05:53,030 --> 00:05:55,460 953 | 然后他把这个计算机能读懂我们程序呢 954 | 955 | 956 | 192 957 | 00:05:55,460 --> 00:05:56,120 958 | 留下来 959 | 960 | 961 | 193 962 | 00:05:56,120 --> 00:05:57,320 963 | 也就是保存下来 964 | 965 | 966 | 194 967 | 00:05:57,770 --> 00:05:58,520 968 | 那以后 969 | 970 | 971 | 195 972 | 00:05:58,520 --> 00:05:59,960 973 | 你的程序想要执行的时候 974 | 975 | 976 | 196 977 | 00:05:59,960 --> 00:06:02,360 978 | 就不是拿你写的那个程序去执行 979 | 980 | 981 | 197 982 | 00:06:02,360 --> 00:06:05,270 983 | 而是哪他翻译过以后的那个程序去执行 984 | 985 | 986 | 198 987 | 00:06:06,220 --> 00:06:07,810 988 | 那么这个过程啊 989 | 990 | 991 | 199 992 | 00:06:07,810 --> 00:06:09,070 993 | 这个翻译的过程 994 | 995 | 996 | 200 997 | 00:06:09,070 --> 00:06:10,360 998 | 我们把它叫做编译 999 | 1000 | 1001 | 201 1002 | 00:06:11,110 --> 00:06:11,800 1003 | 啊编译 1004 | 1005 | 1006 | 202 1007 | 00:06:11,800 --> 00:06:14,590 1008 | 那么编译完以后就是计算机直接能够懂的东西 1009 | 1010 | 1011 | 203 1012 | 00:06:14,590 --> 00:06:18,190 1013 | 所以程序的执行是有这样两种可能的方式的 1014 | 1015 | 1016 | 204 1017 | 00:06:20,050 --> 00:06:20,890 1018 | 也就是说呢 1019 | 1020 | 1021 | 205 1022 | 00:06:20,890 --> 00:06:22,870 1023 | 这个解释是怎么回事呢 1024 | 1025 | 1026 | 206 1027 | 00:06:22,870 --> 00:06:24,820 1028 | 咱们写了一个程序是吧 1029 | 1030 | 1031 | 207 1032 | 00:06:24,820 --> 00:06:25,960 1033 | 就程序呢 1034 | 1035 | 1036 | 208 1037 | 00:06:25,960 --> 00:06:28,090 1038 | 写在一个文件里头啊 1039 | 1040 | 1041 | 209 1042 | 00:06:28,090 --> 00:06:29,230 1043 | 存在于文件里头 1044 | 1045 | 1046 | 210 1047 | 00:06:29,230 --> 00:06:31,630 1048 | 完了以后呢交给一个程序 1049 | 1050 | 1051 | 211 1052 | 00:06:33,840 --> 00:06:34,470 1053 | 那程序呢 1054 | 1055 | 1056 | 212 1057 | 00:06:35,190 --> 00:06:36,510 1058 | 他能读懂你的程序 1059 | 1060 | 1061 | 213 1062 | 00:06:36,510 --> 00:06:37,230 1063 | 可是呢 1064 | 1065 | 1066 | 214 1067 | 00:06:37,230 --> 00:06:38,280 1068 | 他不产生任何东西 1069 | 1070 | 1071 | 215 1072 | 00:06:38,280 --> 00:06:39,450 1073 | 他只是去做动作 1074 | 1075 | 1076 | 216 1077 | 00:06:40,250 --> 00:06:45,050 1078 | 他拿着你的程序去开始一条一条一条一条的走走完以后呢 1079 | 1080 | 1081 | 217 1082 | 00:06:45,370 --> 00:06:45,940 1083 | 就做完了 1084 | 1085 | 1086 | 218 1087 | 00:06:45,940 --> 00:06:46,630 1088 | 这事就做完了 1089 | 1090 | 1091 | 219 1092 | 00:06:46,630 --> 00:06:49,000 1093 | 那下一回你要再运行你的程序的时候 1094 | 1095 | 1096 | 220 1097 | 00:06:49,000 --> 00:06:51,880 1098 | 你还得把这个能够解释你的程序的这个东西呢 1099 | 1100 | 1101 | 221 1102 | 00:06:51,880 --> 00:06:55,330 1103 | 给召唤出来由他来执行你的程序而编译呢 1104 | 1105 | 1106 | 222 1107 | 00:06:55,330 --> 00:06:56,860 1108 | 是说你还是拿着这个源代码 1109 | 1110 | 1111 | 223 1112 | 00:06:56,860 --> 00:06:59,530 1113 | 是不是你交给了一个翻译官 1114 | 1115 | 1116 | 224 1117 | 00:07:00,850 --> 00:07:01,030 1118 | 啊 1119 | 1120 | 1121 | 225 1122 | 00:07:01,030 --> 00:07:03,790 1123 | 这个翻译的人呢翻译的这个程序呢负责呢 1124 | 1125 | 1126 | 226 1127 | 00:07:03,790 --> 00:07:04,510 1128 | 把你的程序呢 1129 | 1130 | 1131 | 227 1132 | 00:07:05,270 --> 00:07:07,580 1133 | 变成了另外一个程序 1134 | 1135 | 1136 | 228 1137 | 00:07:07,580 --> 00:07:08,270 1138 | 那里头呢 1139 | 1140 | 1141 | 229 1142 | 00:07:08,270 --> 00:07:10,790 1143 | 都是各种计算机的不懂的东西啊 1144 | 1145 | 1146 | 230 1147 | 00:07:10,790 --> 00:07:11,330 1148 | 人家都不懂了 1149 | 1150 | 1151 | 231 1152 | 00:07:12,840 --> 00:07:13,170 1153 | 完了以后 1154 | 1155 | 1156 | 232 1157 | 00:07:14,000 --> 00:07:14,960 1158 | 你的程序一点儿没执行 1159 | 1160 | 1161 | 233 1162 | 00:07:15,780 --> 00:07:16,050 1163 | 啊 1164 | 1165 | 1166 | 234 1167 | 00:07:16,050 --> 00:07:16,890 1168 | 该你要他做什么 1169 | 1170 | 1171 | 235 1172 | 00:07:16,890 --> 00:07:17,940 1173 | 什么都不去做 1174 | 1175 | 1176 | 236 1177 | 00:07:17,940 --> 00:07:23,970 1178 | 然后你得让这个程序去运行去这个程序在计算机上运行起来以后呢 1179 | 1180 | 1181 | 237 1182 | 00:07:23,970 --> 00:07:25,860 1183 | 他才能够给你得到你要的那个结果 1184 | 1185 | 1186 | 238 1187 | 00:07:26,710 --> 00:07:26,890 1188 | 啊 1189 | 1190 | 1191 | 239 1192 | 00:07:26,890 --> 00:07:28,330 1193 | 所以上头那一个啊 1194 | 1195 | 1196 | 240 1197 | 00:07:28,330 --> 00:07:29,980 1198 | 这叫做解释啊 1199 | 1200 | 1201 | 241 1202 | 00:07:29,980 --> 00:07:31,750 1203 | 这个呢叫做编译 1204 | 1205 | 1206 | 242 1207 | 00:07:31,750 --> 00:07:34,240 1208 | 这是程序执行的两种不同的方式 1209 | 1210 | 1211 | 243 1212 | 00:07:34,240 --> 00:07:37,120 1213 | 那因为有这样两种方式去执行程序 1214 | 1215 | 1216 | 244 1217 | 00:07:37,120 --> 00:07:38,320 1218 | 就会有人有一个误解说 1219 | 1220 | 1221 | 245 1222 | 00:07:39,460 --> 00:07:41,740 1223 | 这个语言是解释执行的 1224 | 1225 | 1226 | 246 1227 | 00:07:41,740 --> 00:07:44,110 1228 | 那个语言是编译执行的 1229 | 1230 | 1231 | 247 1232 | 00:07:44,110 --> 00:07:44,830 1233 | 其实呢 1234 | 1235 | 1236 | 248 1237 | 00:07:44,830 --> 00:07:47,140 1238 | 对于计算机的编程语言来说 1239 | 1240 | 1241 | 249 1242 | 00:07:47,140 --> 00:07:51,100 1243 | 语言本身是没有解释和编译的区分的啊 1244 | 1245 | 1246 | 250 1247 | 00:07:51,100 --> 00:07:53,170 1248 | 任何一种语言既可以解释执行 1249 | 1250 | 1251 | 251 1252 | 00:07:53,170 --> 00:07:54,670 1253 | 也可以便于执行 1254 | 1255 | 1256 | 252 1257 | 00:07:54,670 --> 00:07:56,920 1258 | 这只是传统和习惯的问题 1259 | 1260 | 1261 | 253 1262 | 00:07:57,540 --> 00:07:59,520 1263 | 只是这种语言常用的 1264 | 1265 | 1266 | 254 1267 | 00:07:59,520 --> 00:08:01,140 1268 | 他的程序的执行方式 1269 | 1270 | 1271 | 255 1272 | 00:08:01,140 --> 00:08:04,350 1273 | 比如说我们经常听到说c点是编译型的语言没错 1274 | 1275 | 1276 | 256 1277 | 00:08:04,350 --> 00:08:06,210 1278 | 因为一般我们写c员的ce元的 1279 | 1280 | 1281 | 257 1282 | 00:08:06,210 --> 00:08:07,350 1283 | 所以写出来以后呢 1284 | 1285 | 1286 | 258 1287 | 00:08:07,350 --> 00:08:08,760 1288 | 你是需要编译 1289 | 1290 | 1291 | 259 1292 | 00:08:08,760 --> 00:08:09,780 1293 | 然后再来运行 1294 | 1295 | 1296 | 260 1297 | 00:08:11,060 --> 00:08:12,800 1298 | 但是也有人去做了 1299 | 1300 | 1301 | 261 1302 | 00:08:12,800 --> 00:08:14,660 1303 | c元的解释器是吧 1304 | 1305 | 1306 | 262 1307 | 00:08:14,660 --> 00:08:16,280 1308 | 然后一般我们都这样说拍上语言 1309 | 1310 | 1311 | 263 1312 | 00:08:16,280 --> 00:08:17,960 1313 | 它是一个解释性的语言 1314 | 1315 | 1316 | 264 1317 | 00:08:17,960 --> 00:08:18,950 1318 | 因为你不需要编译 1319 | 1320 | 1321 | 265 1322 | 00:08:18,950 --> 00:08:20,960 1323 | 直接就可以拿源代码去执行了 1324 | 1325 | 1326 | 266 1327 | 00:08:20,960 --> 00:08:21,410 1328 | 但是呢 1329 | 1330 | 1331 | 267 1332 | 00:08:21,410 --> 00:08:22,370 1333 | 也有人愿意的话 1334 | 1335 | 1336 | 268 1337 | 00:08:22,370 --> 00:08:24,830 1338 | 你也可以去做他的这个编译器去啊 1339 | 1340 | 1341 | 269 1342 | 00:08:24,830 --> 00:08:24,980 1343 | 所以 1344 | 1345 | 1346 | 270 1347 | 00:08:25,600 --> 00:08:27,280 1348 | 语言本身没有解释和 1349 | 1350 | 1351 | 271 1352 | 00:08:28,090 --> 00:08:28,480 1353 | 便宜的 1354 | 1355 | 1356 | 272 1357 | 00:08:28,480 --> 00:08:30,580 1358 | 这个区分只是他的具体的执行方式而已 1359 | 1360 | 1361 | 273 1362 | 00:08:31,410 --> 00:08:34,170 1363 | 那这两种方式有没有好坏呢 1364 | 1365 | 1366 | 274 1367 | 00:08:34,870 --> 00:08:35,650 1368 | 解释性的语言 1369 | 1370 | 1371 | 275 1372 | 00:08:35,650 --> 00:08:37,270 1373 | 能够有一些特殊的计算能力 1374 | 1375 | 1376 | 276 1377 | 00:08:37,270 --> 00:08:38,980 1378 | 因为源代码在 1379 | 1380 | 1381 | 277 1382 | 00:08:39,820 --> 00:08:41,140 1383 | 执行的过程当中 1384 | 1385 | 1386 | 278 1387 | 00:08:41,140 --> 00:08:43,000 1388 | 不断的去被解读 1389 | 1390 | 1391 | 279 1392 | 00:08:43,000 --> 00:08:46,180 1393 | 那么有可以有可能让我们可以做一些特殊的事情啊 1394 | 1395 | 1396 | 280 1397 | 00:08:46,180 --> 00:08:47,680 1398 | 比方说运行过程当中 1399 | 1400 | 1401 | 281 1402 | 00:08:47,680 --> 00:08:49,090 1403 | 源代码是不是可以被修改啊 1404 | 1405 | 1406 | 282 1407 | 00:08:49,090 --> 00:08:49,240 1408 | 等等 1409 | 1410 | 1411 | 283 1412 | 00:08:50,400 --> 00:08:50,940 1413 | 呃 1414 | 1415 | 1416 | 284 1417 | 00:08:50,940 --> 00:08:51,300 1418 | 过去呢 1419 | 1420 | 1421 | 285 1422 | 00:08:51,300 --> 00:08:53,970 1423 | 我们一般会觉得时候解释性的语言会运行的比较慢 1424 | 1425 | 1426 | 286 1427 | 00:08:53,970 --> 00:08:54,540 1428 | 因为 1429 | 1430 | 1431 | 287 1432 | 00:08:55,050 --> 00:08:56,610 1433 | 你不是直接在计算机上运行的 1434 | 1435 | 1436 | 288 1437 | 00:08:56,610 --> 00:08:56,820 1438 | 对吧 1439 | 1440 | 1441 | 289 1442 | 00:08:56,820 --> 00:08:59,730 1443 | 你需要有一个程序帮助你去运行他 1444 | 1445 | 1446 | 290 1447 | 00:08:59,730 --> 00:09:00,060 1448 | 但是呢 1449 | 1450 | 1451 | 291 1452 | 00:09:00,060 --> 00:09:00,450 1453 | 现在呢 1454 | 1455 | 1456 | 292 1457 | 00:09:00,450 --> 00:09:03,150 1458 | 我们发现呢这个我们看到啊 1459 | 1460 | 1461 | 293 1462 | 00:09:03,150 --> 00:09:04,200 1463 | 解释性的语言 1464 | 1465 | 1466 | 294 1467 | 00:09:04,200 --> 00:09:07,650 1468 | 他的运行效率和边形的语言的运行效率呢 1469 | 1470 | 1471 | 295 1472 | 00:09:07,650 --> 00:09:09,030 1473 | 已经不是重要的问题 1474 | 1475 | 1476 | 296 1477 | 00:09:09,030 --> 00:09:12,270 1478 | 因为计算机的速度已经到了一个比较高的层次 1479 | 1480 | 1481 | 297 1482 | 00:09:12,270 --> 00:09:12,780 1483 | 上面 1484 | 1485 | 1486 | 298 1487 | 00:09:12,780 --> 00:09:14,400 1488 | 那么在那个速度底下解释 1489 | 1490 | 1491 | 299 1492 | 00:09:14,400 --> 00:09:14,610 1493 | 编译 1494 | 1495 | 1496 | 300 1497 | 00:09:14,910 --> 00:09:16,980 1498 | 的运行的性能不是太大的 1499 | 1500 | 1501 | 301 1502 | 00:09:16,980 --> 00:09:19,530 1503 | 这个呃这个这个差异啊 1504 | 1505 | 1506 | 302 1507 | 00:09:19,530 --> 00:09:21,360 1508 | 那边型的语言有边形语言的好 1509 | 1510 | 1511 | 303 1512 | 00:09:21,360 --> 00:09:23,880 1513 | 就比如说他能够有确定的运算性能 1514 | 1515 | 1516 | 304 1517 | 00:09:23,880 --> 00:09:25,320 1518 | 这个意思是说我 1519 | 1520 | 1521 | 305 1522 | 00:09:25,630 --> 00:09:27,880 1523 | 同样的程序在运行的过程当中是这么快的 1524 | 1525 | 1526 | 306 1527 | 00:09:27,880 --> 00:09:28,870 1528 | 他就是这么快是吧 1529 | 1530 | 1531 | 307 1532 | 00:09:28,870 --> 00:09:30,940 1533 | 他能够有一个明确的运算性能 1534 | 1535 | 1536 | 308 1537 | 00:09:30,940 --> 00:09:32,680 1538 | 所以在今天来说 1539 | 1540 | 1541 | 309 1542 | 00:09:32,680 --> 00:09:37,300 1543 | 这两种方式执行方式已经没有非常大的本质上的区别啊 1544 | 1545 | 1546 | 310 1547 | 00:09:37,300 --> 00:09:39,100 1548 | 这只是传统和喜好的问题 1549 | 1550 | 1551 | -------------------------------------------------------------------------------- /videosrt/aliyun/cloud/cloud.go: -------------------------------------------------------------------------------- 1 | package cloud 2 | 3 | import ( 4 | "encoding/json" 5 | "github.com/aliyun/alibaba-cloud-sdk-go/sdk" 6 | "github.com/aliyun/alibaba-cloud-sdk-go/sdk/requests" 7 | "github.com/aliyun/alibaba-cloud-sdk-go/sdk/responses" 8 | "github.com/pkg/errors" 9 | "strconv" 10 | "time" 11 | ) 12 | 13 | //SDK 14 | //https://help.aliyun.com/document_detail/94072.html?spm=a2c4g.11186623.6.584.3a1153d5yDFr5B 15 | 16 | type AliyunClound struct { 17 | AccessKeyId string 18 | AccessKeySecret string 19 | AppKey string 20 | } 21 | 22 | 23 | //阿里云录音文件识别结果集 24 | type AliyunAudioRecognitionResult struct { 25 | Text string //文本结果 26 | ChannelId int64 //音轨ID 27 | BeginTime int64 //该句的起始时间偏移,单位为毫秒 28 | EndTime int64 //该句的结束时间偏移,单位为毫秒 29 | SilenceDuration int64 //本句与上一句之间的静音时长,单位为秒 30 | SpeechRate int64 //本句的平均语速,单位为每分钟字数 31 | EmotionValue int64 //情绪能量值1-10,值越高情绪越强烈 32 | } 33 | 34 | //阿里云识别词语数据集 35 | type AliyunAudioWord struct { 36 | Word string 37 | ChannelId int64 38 | BeginTime int64 39 | EndTime int64 40 | } 41 | 42 | 43 | // 地域ID,常量内容,请勿改变 44 | const REGION_ID string = "cn-shanghai" 45 | const ENDPOINT_NAME string = "cn-shanghai" 46 | const PRODUCT string = "nls-filetrans" 47 | const DOMAIN string = "filetrans.cn-shanghai.aliyuncs.com" 48 | const API_VERSION string = "2018-08-17" 49 | const POST_REQUEST_ACTION string = "SubmitTask" 50 | const GET_REQUEST_ACTION string = "GetTaskResult" 51 | // 请求参数key 52 | const KEY_APP_KEY string = "appkey" 53 | const KEY_FILE_LINK string = "file_link" 54 | const KEY_VERSION string = "version" 55 | const KEY_ENABLE_WORDS string = "enable_words" 56 | 57 | //是否打开ITN,中文数字将转为阿拉伯数字输出,默认值为false 58 | const KEY_ENABLE_INVERSE_TEXT_NORMAL = "enable_inverse_text_normalization" 59 | //是否启⽤语义断句,取值:true/false,默认值false 60 | const KEY_ENABLE_SEMANTIC_SENTENCE_DETECTION = "enable_semantic_sentence_detection" 61 | //是否启用时间戳校准功能,取值:true/false,默认值false 62 | const KEY_ENABLE_TIMESTAMP_ALIGNMENT = "enable_timestamp_alignment" 63 | 64 | // 响应参数key 65 | const KEY_TASK string = "Task" 66 | const KEY_TASK_ID string = "TaskId" 67 | const KEY_STATUS_TEXT string = "StatusText" 68 | const KEY_RESULT string = "Result" 69 | // 状态值 70 | const STATUS_SUCCESS string = "SUCCESS" 71 | const STATUS_RUNNING string = "RUNNING" 72 | const STATUS_QUEUEING string = "QUEUEING" 73 | 74 | 75 | //发起录音文件识别 76 | //接口文档 https://help.aliyun.com/document_detail/90727.html?spm=a2c4g.11186623.6.581.691af6ebYsUkd1 77 | func (c AliyunClound) NewAudioFile(fileLink string) (string , *sdk.Client , error) { 78 | client, err := sdk.NewClientWithAccessKey(REGION_ID, c.AccessKeyId, c.AccessKeySecret) 79 | if err != nil { 80 | return "" , client , err 81 | } 82 | 83 | postRequest := requests.NewCommonRequest() 84 | postRequest.Domain = DOMAIN 85 | postRequest.Version = API_VERSION 86 | postRequest.Product = PRODUCT 87 | postRequest.ApiName = POST_REQUEST_ACTION 88 | postRequest.Method = "POST" 89 | 90 | mapTask := make(map[string]string) 91 | mapTask[KEY_APP_KEY] = c.AppKey 92 | mapTask[KEY_FILE_LINK] = fileLink 93 | // 新接入请使用4.0版本,已接入(默认2.0)如需维持现状,请注释掉该参数设置 94 | mapTask[KEY_VERSION] = "4.0" 95 | // 设置是否输出词信息,默认为false,开启时需要设置version为4.0 96 | mapTask[KEY_ENABLE_WORDS] = "true" 97 | 98 | //统一后处理 99 | mapTask[KEY_ENABLE_INVERSE_TEXT_NORMAL] = "true" 100 | mapTask[KEY_ENABLE_SEMANTIC_SENTENCE_DETECTION] = "true" 101 | mapTask[KEY_ENABLE_TIMESTAMP_ALIGNMENT] = "true" 102 | 103 | // to json 104 | task, err := json.Marshal(mapTask) 105 | if err != nil { 106 | return "" , client , errors.New("to json error .") 107 | } 108 | postRequest.FormParams[KEY_TASK] = string(task) 109 | // 发起请求 110 | postResponse, err := client.ProcessCommonRequest(postRequest) 111 | if err != nil { 112 | return "" , client , err 113 | } 114 | postResponseContent := postResponse.GetHttpContentString() 115 | //校验请求 116 | if (postResponse.GetHttpStatus() != 200) { 117 | return "" , client , errors.New("录音文件识别请求失败 , Http错误码 : " + strconv.Itoa(postResponse.GetHttpStatus())) 118 | } 119 | //解析数据 120 | var postMapResult map[string]interface{} 121 | err = json.Unmarshal([]byte(postResponseContent), &postMapResult) 122 | if err != nil { 123 | return "" , client , errors.New("to map struct error .") 124 | } 125 | 126 | var taskId = "" 127 | var statusText = "" 128 | statusText = postMapResult[KEY_STATUS_TEXT].(string) 129 | 130 | //检验结果 131 | if statusText == STATUS_SUCCESS { 132 | taskId = postMapResult[KEY_TASK_ID].(string) 133 | return taskId , client , nil 134 | } 135 | 136 | return "" , client , errors.New("录音文件识别请求失败 !") 137 | } 138 | 139 | 140 | //获取录音文件识别结果 141 | //接口文档 https://help.aliyun.com/document_detail/90727.html?spm=a2c4g.11186623.6.581.691af6ebYsUkd1 142 | func (c AliyunClound) GetAudioFileResult(taskId string , client *sdk.Client , logOutput func(text string) , callback func(result []byte)) (err error) { 143 | getRequest := requests.NewCommonRequest() 144 | getRequest.Domain = DOMAIN 145 | getRequest.Version = API_VERSION 146 | getRequest.Product = PRODUCT 147 | getRequest.ApiName = GET_REQUEST_ACTION 148 | getRequest.Method = "GET" 149 | getRequest.QueryParams[KEY_TASK_ID] = taskId 150 | statusText := "" 151 | 152 | var ( 153 | trys = 0 154 | getResponse *responses.CommonResponse 155 | getResponseContent string 156 | ) 157 | 158 | //遍历获取识别结果 159 | for trys < 10 { 160 | 161 | if trys != 0 { 162 | logOutput("尝试重新查询识别结果,第" + strconv.Itoa(trys) + "次") 163 | } 164 | 165 | getResponse, err = client.ProcessCommonRequest(getRequest) 166 | if err != nil { 167 | logOutput("查询识别结果失败:" + err.Error()) 168 | trys++ 169 | time.Sleep(time.Second * time.Duration(trys)) 170 | continue 171 | } 172 | 173 | getResponseContent = getResponse.GetHttpContentString() 174 | if (getResponse.GetHttpStatus() != 200) { 175 | logOutput("查询识别结果失败,Http错误码:" + strconv.Itoa(getResponse.GetHttpStatus())) 176 | trys++ 177 | time.Sleep(time.Second * time.Duration(trys)) 178 | continue 179 | } 180 | 181 | var getMapResult map[string]interface{} 182 | err = json.Unmarshal([]byte(getResponseContent), &getMapResult) 183 | if err != nil { 184 | trys++ 185 | logOutput("查询识别结果失败,解析结果失败:" + err.Error()) 186 | continue 187 | } 188 | 189 | //校验遍历条件 190 | statusText = getMapResult[KEY_STATUS_TEXT].(string) 191 | if statusText == STATUS_RUNNING || statusText == STATUS_QUEUEING { 192 | time.Sleep(3 * time.Second) 193 | } else { 194 | break 195 | } 196 | } 197 | 198 | if statusText == STATUS_SUCCESS && getResponse != nil { 199 | //调用回调函数 200 | callback(getResponse.GetHttpContentBytes()) 201 | } else { 202 | err = errors.New("录音文件识别失败 , (" + c.GetErrorStatusTextMessage(statusText) + ")") 203 | return 204 | } 205 | return 206 | } 207 | 208 | //获取错误信息 209 | func (c AliyunClound) GetErrorStatusTextMessage (statusText string) string { 210 | var code map[string]string = map[string]string{ 211 | "REQUEST_APPKEY_UNREGISTERED":"阿里云智能语音项目未创建/无访问权限。请检查语音引擎Appkey是否填写错误;如果是海外地区,在软件创建语音引擎时,服务区域需要选择“海外”", 212 | "USER_BIZDURATION_QUOTA_EXCEED":"单日2小时识别免费额度超出限制", 213 | "FILE_DOWNLOAD_FAILED":"文件访问失败,请检查OSS存储空间访问权限。请将OSS存储空间设置为“公共读”", 214 | "FILE_TOO_LARGE":"音频文件超出512MB", 215 | "FILE_PARSE_FAILED":"音频文件解析失败,请检查音频文件是否有损坏", 216 | "UNSUPPORTED_SAMPLE_RATE":"采样率不匹配", 217 | "FILE_TRANS_TASK_EXPIRED":"音频文件识别任务过期,请重试", 218 | "REQUEST_INVALID_FILE_URL_VALUE":"音频文件访问失败,请检查OSS存储空间访问权限", 219 | "FILE_404_NOT_FOUND":"音频文件访问失败,请检查OSS存储空间访问权限", 220 | "FILE_403_FORBIDDEN":"音频文件访问失败,请检查OSS存储空间访问权限", 221 | "FILE_SERVER_ERROR":"音频文件访问失败,请检查请求的文件所在的服务是否可用", 222 | "INTERNAL_ERROR":"识别内部通用错误,请稍候重试", 223 | } 224 | 225 | if _, ok := code[statusText]; ok { 226 | return code[statusText] 227 | } else { 228 | return statusText 229 | } 230 | } -------------------------------------------------------------------------------- /videosrt/aliyun/cloud/tool.go: -------------------------------------------------------------------------------- 1 | package cloud 2 | 3 | import ( 4 | "github.com/buger/jsonparser" 5 | "regexp" 6 | "strings" 7 | "unicode" 8 | "unicode/utf8" 9 | "videosrt/videosrt/tool" 10 | ) 11 | 12 | 13 | type AliyunAudioRecognitionResultBlock struct { 14 | AliyunAudioRecognitionResult 15 | Blocks []int 16 | BlockEmptyTag bool 17 | BlockEmptyHandle bool 18 | } 19 | 20 | //阿里云录音录音文件识别 - 智能分段处理 21 | func AliyunAudioResultWordHandle(result [] byte , callback func (vresult *AliyunAudioRecognitionResult)) { 22 | var audioResult = make(map[int64][] *AliyunAudioRecognitionResultBlock) 23 | var wordResult = make(map[int64][]*AliyunAudioWord) 24 | var err error 25 | 26 | //获取录音识别数据集 27 | _, err = jsonparser.ArrayEach(result, func(value []byte, dataType jsonparser.ValueType, offset int, err error) { 28 | text , _ := jsonparser.GetString(value, "Text") 29 | channelId , _ := jsonparser.GetInt(value, "ChannelId") 30 | beginTime , _ := jsonparser.GetInt(value, "BeginTime") 31 | endTime , _ := jsonparser.GetInt(value, "EndTime") 32 | silenceDuration , _ := jsonparser.GetInt(value, "SilenceDuration") 33 | speechRate , _ := jsonparser.GetInt(value, "SpeechRate") 34 | emotionValue , _ := jsonparser.GetInt(value, "EmotionValue") 35 | 36 | vresult := &AliyunAudioRecognitionResultBlock {} 37 | vresult.Text = text 38 | vresult.ChannelId = channelId 39 | vresult.BeginTime = beginTime 40 | vresult.EndTime = endTime 41 | vresult.SilenceDuration = silenceDuration 42 | vresult.SpeechRate = speechRate 43 | vresult.EmotionValue = emotionValue 44 | 45 | _ , isPresent := audioResult[channelId] 46 | if isPresent { 47 | //追加 48 | audioResult[channelId] = append(audioResult[channelId] , vresult) 49 | } else { 50 | //初始 51 | audioResult[channelId] = []*AliyunAudioRecognitionResultBlock{} 52 | audioResult[channelId] = append(audioResult[channelId] , vresult) 53 | } 54 | } , "Result", "Sentences") 55 | if err != nil { 56 | panic(err) 57 | } 58 | 59 | //获取词语数据集 60 | _, err = jsonparser.ArrayEach(result , func(value []byte, dataType jsonparser.ValueType, offset int, err error) { 61 | word , _ := jsonparser.GetString(value, "Word") 62 | channelId , _ := jsonparser.GetInt(value, "ChannelId") 63 | beginTime , _ := jsonparser.GetInt(value, "BeginTime") 64 | endTime , _ := jsonparser.GetInt(value, "EndTime") 65 | vresult := &AliyunAudioWord{ 66 | Word:word, 67 | ChannelId:channelId, 68 | BeginTime:beginTime, 69 | EndTime:endTime, 70 | } 71 | _, isPresent := wordResult[channelId] 72 | if isPresent { 73 | //追加 74 | wordResult[channelId] = append(wordResult[channelId] , vresult) 75 | } else { 76 | //初始 77 | wordResult[channelId] = []*AliyunAudioWord{} 78 | wordResult[channelId] = append(wordResult[channelId] , vresult) 79 | } 80 | } , "Result" , "Words") 81 | if err != nil { 82 | panic(err) 83 | } 84 | 85 | 86 | var symbol = []string{"?","。",",","!",";","、","?",".",",","!"} 87 | //数据集处理 88 | for _ , value := range audioResult { 89 | for _ , data := range value { 90 | // filter 91 | data.Text = FilterText(data.Text) 92 | 93 | data.Blocks = GetTextBlock(data.Text) 94 | data.Text = ReplaceStrs(data.Text , symbol , "") 95 | 96 | if len(data.Blocks) == 0 { 97 | data.BlockEmptyTag = true 98 | } 99 | } 100 | } 101 | 102 | //遍历输出 103 | for _ , value := range wordResult { 104 | 105 | var block string = "" 106 | var blockRune int = 0 107 | var lastBlock int = 0 108 | 109 | var beginTime int64 = 0 110 | var blockBool = false 111 | 112 | var ischinese = IsChineseWords(value) //校验中文 113 | 114 | var chineseNumberWordIndexs []int 115 | var chineseNumberDiffLength int = 0 116 | 117 | for i , word := range value { 118 | if blockBool || i == 0 { 119 | beginTime = word.BeginTime 120 | blockBool = false 121 | } 122 | 123 | if ischinese && block == "" { 124 | chineseNumberWordIndexs = []int{} 125 | chineseNumberDiffLength = 0 126 | } 127 | 128 | if ischinese { 129 | block += word.Word 130 | if tool.CheckChineseNumber(word.Word) && FindSliceIntCount(chineseNumberWordIndexs , i) == 0 { 131 | cl := tool.ChineseNumberToLowercaseLength(word.Word) - utf8.RuneCountInString(word.Word) 132 | if cl > 0 { 133 | chineseNumberDiffLength += cl 134 | chineseNumberWordIndexs = append(chineseNumberWordIndexs , i) 135 | } else { 136 | //例外 137 | if i != 0 { 138 | newWord := value[i-1].Word + word.Word 139 | cl := tool.ChineseNumberToLowercaseLength(newWord) - utf8.RuneCountInString(newWord) 140 | if cl > 0 { 141 | chineseNumberDiffLength += cl 142 | chineseNumberWordIndexs = append(chineseNumberWordIndexs , i) 143 | } 144 | } 145 | } 146 | } 147 | } else { 148 | block += CompleSpace(word.Word) //补全空格 149 | } 150 | 151 | blockRune = utf8.RuneCountInString(block) 152 | 153 | //fmt.Println("chineseNumberDiffLength : " , chineseNumberWordIndexs , chineseNumberDiffLength , word.Word) 154 | 155 | for channel , p := range audioResult { 156 | if word.ChannelId != channel { 157 | continue 158 | } 159 | 160 | for windex , w := range p { 161 | 162 | if (word.BeginTime >= w.BeginTime && word.EndTime <= w.EndTime) || ((word.BeginTime < w.EndTime && word.EndTime > w.EndTime) && (FindSliceIntCount(w.Blocks , -1) != len(w.Blocks))) { 163 | flag := false 164 | early := false 165 | 166 | if !w.BlockEmptyTag { 167 | for t , B := range w.Blocks{ 168 | //fmt.Println("blockRune : " , blockRune , B , word.Word) 169 | if ((blockRune >= B) || (blockRune + chineseNumberDiffLength >= B)) && B != -1 { 170 | flag = true 171 | 172 | //fmt.Println(w.Blocks) 173 | //fmt.Println(B , lastBlock , (B - lastBlock) , word.Word) 174 | //fmt.Println(w.Text) 175 | //fmt.Println( block ) 176 | //fmt.Println("\n") 177 | 178 | var thisText = "" 179 | //容错机制 180 | if t == (len(w.Blocks) - 1) { 181 | thisText = SubString(w.Text , lastBlock , 10000) 182 | } else { 183 | //下个词提前结束 184 | if i < len(value)-1 && value[i+1].BeginTime >= w.EndTime{ 185 | thisText = SubString(w.Text , lastBlock , 10000) 186 | early = true 187 | } else { 188 | thisText = SubString(w.Text , lastBlock , (B - lastBlock)) 189 | } 190 | } 191 | 192 | lastBlock = B 193 | if early == true { 194 | //全部设置为-1 195 | for vt,vb := range w.Blocks{ 196 | if vb != -1 { 197 | w.Blocks[vt] = -1; 198 | } 199 | } 200 | } else { 201 | w.Blocks[t] = -1 202 | } 203 | 204 | vresult := &AliyunAudioRecognitionResult{ 205 | Text:thisText, 206 | ChannelId:channel, 207 | BeginTime:beginTime, 208 | EndTime:word.EndTime, 209 | SilenceDuration:w.SilenceDuration, 210 | SpeechRate:w.SpeechRate, 211 | EmotionValue:w.EmotionValue, 212 | } 213 | callback(vresult) //回调传参 214 | 215 | blockBool = true 216 | break 217 | } 218 | } 219 | 220 | //fmt.Println("word.Word : " , word.Word) 221 | //fmt.Println(block) 222 | 223 | if FindSliceIntCount(w.Blocks , -1) == len(w.Blocks) { 224 | //全部截取完成 225 | block = "" 226 | lastBlock = 0 227 | } 228 | //容错机制 229 | if FindSliceIntCount(w.Blocks , -1) == (len(w.Blocks)-1) && flag == false { 230 | var thisText = SubString(w.Text , lastBlock , 10000) 231 | 232 | w.Blocks[len(w.Blocks) - 1] = -1 233 | //vresult 234 | vresult := &AliyunAudioRecognitionResult{ 235 | Text:thisText, 236 | ChannelId:channel, 237 | BeginTime:beginTime, 238 | EndTime:w.EndTime, 239 | SilenceDuration:w.SilenceDuration, 240 | SpeechRate:w.SpeechRate, 241 | EmotionValue:w.EmotionValue, 242 | } 243 | 244 | //fmt.Println( thisText ) 245 | //fmt.Println( block ) 246 | //fmt.Println( word.Word , beginTime, w.EndTime , flag , word.EndTime ) 247 | 248 | callback(vresult) //回调传参 249 | 250 | //覆盖下一段落的时间戳 251 | if windex < (len(p)-1) { 252 | beginTime = p[windex+1].BeginTime 253 | } else { 254 | beginTime = w.EndTime 255 | } 256 | 257 | //清除参数 258 | block = "" 259 | lastBlock = 0 260 | } 261 | } else { 262 | 263 | //清除参数 264 | block = "" 265 | lastBlock = 0 266 | blockBool = true 267 | 268 | if w.BlockEmptyHandle == false { 269 | vresult := &AliyunAudioRecognitionResult{ 270 | Text:w.Text, 271 | ChannelId:w.ChannelId, 272 | BeginTime:w.BeginTime, 273 | EndTime:w.EndTime, 274 | SilenceDuration:w.SilenceDuration, 275 | SpeechRate:w.SpeechRate, 276 | EmotionValue:w.EmotionValue, 277 | } 278 | callback(vresult) //回调传参 279 | w.BlockEmptyHandle = true 280 | } 281 | 282 | } 283 | 284 | } 285 | } 286 | } 287 | } 288 | } 289 | } 290 | 291 | 292 | 293 | func FindSliceIntCount(slice []int , target int) int { 294 | c := 0 295 | for _ , v := range slice { 296 | if target == v { 297 | c++ 298 | } 299 | } 300 | return c 301 | } 302 | 303 | 304 | //批量替换多个关键词文本 305 | func ReplaceStrs(strs string , olds []string , s string) string { 306 | for _ , word := range olds { 307 | strs = strings.Replace(strs , word , s , -1) 308 | } 309 | return strs 310 | } 311 | 312 | func StringIndex(strs string , word rune) int { 313 | strsRune := []rune(strs) 314 | for i,v := range strsRune { 315 | if v == word { 316 | return i 317 | } 318 | } 319 | return -1 320 | } 321 | 322 | //补全右边空格 323 | func CompleSpace(s string) string { 324 | s = strings.TrimLeft(s , " "); 325 | s = strings.TrimRight(s , " "); 326 | return s + " "; 327 | } 328 | 329 | func IsChineseWords(words []*AliyunAudioWord) bool { 330 | for _,v := range words { 331 | if (IsChineseChar(v.Word)){ 332 | return true 333 | } 334 | } 335 | return false 336 | } 337 | 338 | func IsChineseChar(str string) bool { 339 | for _, r := range str { 340 | if unicode.Is(unicode.Scripts["Han"], r) || (regexp.MustCompile("[\u3002\uff1b\uff0c\uff1a\u201c\u201d\uff08\uff09\u3001\uff1f\u300a\u300b]").MatchString(string(r))) { 341 | return true 342 | } 343 | } 344 | return false 345 | } 346 | 347 | func IndexRunes(strs string , olds []rune) int { 348 | min := -1 349 | for i , word := range olds { 350 | index := StringIndex(strs , word) 351 | //println( "ts : " , index) 352 | if i == 0 { 353 | min = index 354 | } else { 355 | if min == -1 { 356 | min = index 357 | } else { 358 | if index < min && index != -1 { 359 | min = index 360 | } 361 | } 362 | } 363 | } 364 | return min 365 | } 366 | 367 | func GetTextBlock(strs string) ([]int) { 368 | var symbol_zhcn = []rune{'?','。',',','!',';','、','?','.',',','!'} 369 | //var symbol_en = []rune{'?','.',',','!'} 370 | strsRune := []rune(strs) 371 | 372 | blocks := []int{} 373 | for { 374 | index := IndexRunes(strs , symbol_zhcn) 375 | if index == -1 { 376 | break 377 | } 378 | strs = string(strsRune[0:index]) + string(strsRune[(index + 1):]) 379 | strsRune = []rune(strs) 380 | blocks = append(blocks , index) 381 | } 382 | return blocks 383 | } 384 | 385 | 386 | func SubString(str string , begin int ,length int) (substr string) { 387 | // 将字符串的转换成[]rune 388 | rs := []rune(str) 389 | lth := len(rs) 390 | 391 | // 简单的越界判断 392 | if begin < 0 { 393 | begin = 0 394 | } 395 | if begin >= lth { 396 | begin = lth 397 | } 398 | if length < 0 { 399 | length = 0 400 | } 401 | end := begin + length 402 | if end > lth { 403 | end = lth 404 | } 405 | // 返回子串 406 | return string(rs[begin:end]) 407 | } 408 | 409 | 410 | //过滤文本 411 | func FilterText(text string) string { 412 | //去除换行符 413 | re, _ := regexp.Compile("[\n|\r|\r\n]+") 414 | text = re.ReplaceAllString(text, "") 415 | return text 416 | } -------------------------------------------------------------------------------- /videosrt/aliyun/oss/oss.go: -------------------------------------------------------------------------------- 1 | package oss 2 | 3 | import ( 4 | "github.com/aliyun/aliyun-oss-go-sdk/oss" 5 | "strconv" 6 | "strings" 7 | "time" 8 | ) 9 | 10 | //SDK 11 | //https://github.com/aliyun/aliyun-oss-go-sdk/blob/master/README-CN.md 12 | 13 | type AliyunOss struct { 14 | Endpoint string 15 | AccessKeyId string 16 | AccessKeySecret string 17 | BucketName string //yourBucketName 18 | BucketDomain string //Bucket 域名 19 | } 20 | 21 | 22 | //获取Buckets列表 23 | func (c AliyunOss) GetListBuckets() ([]string , error) { 24 | client, err := oss.New(c.Endpoint , c.AccessKeyId , c.AccessKeySecret) 25 | if err != nil { 26 | return nil,err 27 | } 28 | 29 | lsRes, err := client.ListBuckets() 30 | if err != nil { 31 | return nil,err 32 | } 33 | 34 | result := []string{} 35 | for _, bucket := range lsRes.Buckets { 36 | result = append(result , bucket.Name) 37 | } 38 | 39 | return result,nil 40 | } 41 | 42 | 43 | //上传本地文件 44 | //localFileName:本地文件 45 | //objectName:oss文件名称 46 | func (c AliyunOss) UploadFile(localFileName string , objectName string) (string , error) { 47 | // 创建OSSClient实例 48 | client, err := oss.New(c.Endpoint , c.AccessKeyId , c.AccessKeySecret) 49 | if err != nil { 50 | return "",err 51 | } 52 | // 获取存储空间 53 | bucket, err := client.Bucket(c.BucketName) 54 | if err != nil { 55 | return "",err 56 | } 57 | 58 | //分日期存储 59 | date := time.Now() 60 | year := date.Year() 61 | month := date.Month() 62 | day := date.Day() 63 | objectName = strconv.Itoa(year) + "/" + strconv.Itoa(int(month)) + "/" + strconv.Itoa(day) + "/" + objectName 64 | 65 | // 上传文件 66 | err = bucket.PutObjectFromFile(objectName , localFileName) 67 | if err != nil { 68 | return "",err 69 | } 70 | 71 | return objectName , nil 72 | } 73 | 74 | 75 | //获取文件 url link 76 | func (c AliyunOss) GetObjectFileUrl(objectFile string) string { 77 | if strings.Index(c.BucketDomain, "http://") == -1 && strings.Index(c.BucketDomain, "https://") == -1 { 78 | return "http://" + c.BucketDomain + "/" + objectFile 79 | } else { 80 | return c.BucketDomain + "/" + objectFile 81 | } 82 | } -------------------------------------------------------------------------------- /videosrt/app.go: -------------------------------------------------------------------------------- 1 | package videosrt 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "github.com/buger/jsonparser" 7 | "os" 8 | "path" 9 | "strconv" 10 | "videosrt/videosrt/aliyun/cloud" 11 | "videosrt/videosrt/aliyun/oss" 12 | "videosrt/videosrt/config/ini" 13 | "videosrt/videosrt/ffmpeg" 14 | ) 15 | 16 | 17 | //主应用 18 | type VideoSrt struct { 19 | Ffmpeg ffmpeg.Ffmpeg 20 | AliyunOss oss.AliyunOss //oss 21 | AliyunClound cloud.AliyunClound //语音识别引擎 22 | 23 | IntelligentBlock bool //智能分段处理 24 | TempDir string //临时文件目录 25 | AppDir string //应用根目录 26 | } 27 | 28 | 29 | //获取应用 30 | func NewApp(cfg string) *VideoSrt { 31 | app := ReadConfig(cfg) 32 | 33 | return app 34 | } 35 | 36 | 37 | //读取配置 38 | func ReadConfig (cfg string) *VideoSrt { 39 | if file, e := ini.LoadConfigFile(cfg , ".");e != nil { 40 | panic(e); 41 | } else { 42 | appconfig := &VideoSrt{} 43 | 44 | //AliyunOss 45 | appconfig.AliyunOss.Endpoint = file.GetMust("aliyunOss.endpoint" , "") 46 | appconfig.AliyunOss.AccessKeyId = file.GetMust("aliyunOss.accessKeyId" , "") 47 | appconfig.AliyunOss.AccessKeySecret = file.GetMust("aliyunOss.accessKeySecret" , "") 48 | appconfig.AliyunOss.BucketName = file.GetMust("aliyunOss.bucketName" , "") 49 | appconfig.AliyunOss.BucketDomain = file.GetMust("aliyunOss.bucketDomain" , "") 50 | 51 | //AliyunClound 52 | appconfig.AliyunClound.AccessKeyId = file.GetMust("aliyunClound.accessKeyId" , "") 53 | appconfig.AliyunClound.AccessKeySecret = file.GetMust("aliyunClound.accessKeySecret" , "") 54 | appconfig.AliyunClound.AppKey = file.GetMust("aliyunClound.appKey" , "") 55 | 56 | 57 | appconfig.IntelligentBlock = file.GetBoolMust("srt.intelligent_block" , false) 58 | appconfig.TempDir = "temp/audio" 59 | 60 | return appconfig 61 | } 62 | } 63 | 64 | 65 | //应用初始化 66 | func (app *VideoSrt) Init(appDir string) { 67 | app.AppDir = appDir 68 | } 69 | 70 | //应用运行 71 | func (app *VideoSrt) Run(video string) { 72 | if video == "" { 73 | panic("enter a video file waiting to be processed .") 74 | } 75 | 76 | //校验视频 77 | if VaildVideo(video) != true { 78 | panic("the input video file does not exist .") 79 | } 80 | 81 | tmpAudioDir := app.AppDir + "/" + app.TempDir 82 | if !DirExists(tmpAudioDir) { 83 | //创建目录 84 | if err := CreateDir(tmpAudioDir , true); err != nil { 85 | panic(err) 86 | } 87 | } 88 | tmpAudioFile := GetRandomCodeString(15) + ".mp3" 89 | tmpAudio := tmpAudioDir + "/" + tmpAudioFile 90 | 91 | Log("提取音频文件 ...") 92 | 93 | //分离视频音频 94 | ExtractVideoAudio(video , tmpAudio) 95 | 96 | Log("上传音频文件 ...") 97 | 98 | //上传音频至OSS 99 | filelink := UploadAudioToClound(app.AliyunOss , tmpAudio) 100 | //获取完整链接 101 | filelink = app.AliyunOss.GetObjectFileUrl(filelink) 102 | fmt.Println("filelink:" , filelink) 103 | 104 | Log("上传文件成功 , 识别中 ...") 105 | 106 | //阿里云录音文件识别 107 | AudioResult := AliyunAudioRecognition(app.AliyunClound, filelink , app.IntelligentBlock) 108 | 109 | Log("文件识别成功 , 字幕处理中 ...") 110 | 111 | //校验字幕段落是否为空 112 | if CheckEmptyResult(AudioResult) { 113 | panic("检测到识别结果为空,生成字幕失败(检查:媒体文件人声是否清晰?语音引擎与媒体语言是否匹配?)") 114 | } 115 | 116 | //输出字幕文件 117 | AliyunAudioResultMakeSubtitleFile(video , AudioResult) 118 | 119 | Log("完成") 120 | 121 | //删除临时文件 122 | if remove := os.Remove(tmpAudio); remove != nil { 123 | panic(remove) 124 | } 125 | } 126 | 127 | 128 | //提取视频音频文件 129 | func ExtractVideoAudio(video string , tmpAudio string) { 130 | if err := ffmpeg.ExtractAudio(video , tmpAudio); err != nil { 131 | panic(err) 132 | } 133 | } 134 | 135 | 136 | //上传音频至oss 137 | func UploadAudioToClound(target oss.AliyunOss , audioFile string) string { 138 | name := "" 139 | //提取文件名称 140 | if fileInfo, e := os.Stat(audioFile);e != nil { 141 | panic(e) 142 | } else { 143 | name = fileInfo.Name() 144 | } 145 | 146 | //上传 147 | if file , e := target.UploadFile(audioFile , name); e != nil { 148 | panic(e) 149 | } else { 150 | return file 151 | } 152 | } 153 | 154 | 155 | //阿里云录音文件识别 156 | func AliyunAudioRecognition(engine cloud.AliyunClound , filelink string , intelligent_block bool) (AudioResult map[int64][] *cloud.AliyunAudioRecognitionResult) { 157 | //创建识别请求 158 | taskid, client, e := engine.NewAudioFile(filelink) 159 | if e != nil { 160 | panic(e) 161 | } 162 | 163 | AudioResult = make(map[int64][] *cloud.AliyunAudioRecognitionResult) 164 | 165 | //遍历获取识别结果 166 | err := engine.GetAudioFileResult(taskid , client , func(text string) { 167 | Log(text) 168 | } , func(result []byte) { 169 | //mylog.WriteLog( string(result) ) 170 | 171 | //结果处理 172 | statusText, _ := jsonparser.GetString(result, "StatusText") //结果状态 173 | if statusText == cloud.STATUS_SUCCESS { 174 | 175 | //智能分段 176 | if intelligent_block { 177 | cloud.AliyunAudioResultWordHandle(result , func(vresult *cloud.AliyunAudioRecognitionResult) { 178 | channelId := vresult.ChannelId 179 | 180 | _ , isPresent := AudioResult[channelId] 181 | if isPresent { 182 | //追加 183 | AudioResult[channelId] = append(AudioResult[channelId] , vresult) 184 | } else { 185 | //初始 186 | AudioResult[channelId] = []*cloud.AliyunAudioRecognitionResult{} 187 | AudioResult[channelId] = append(AudioResult[channelId] , vresult) 188 | } 189 | }) 190 | return 191 | } 192 | 193 | _, err := jsonparser.ArrayEach(result, func(value []byte, dataType jsonparser.ValueType, offset int, err error) { 194 | text , _ := jsonparser.GetString(value, "Text") 195 | channelId , _ := jsonparser.GetInt(value, "ChannelId") 196 | beginTime , _ := jsonparser.GetInt(value, "BeginTime") 197 | endTime , _ := jsonparser.GetInt(value, "EndTime") 198 | silenceDuration , _ := jsonparser.GetInt(value, "SilenceDuration") 199 | speechRate , _ := jsonparser.GetInt(value, "SpeechRate") 200 | emotionValue , _ := jsonparser.GetInt(value, "EmotionValue") 201 | 202 | vresult := &cloud.AliyunAudioRecognitionResult { 203 | Text:text, 204 | ChannelId:channelId, 205 | BeginTime:beginTime, 206 | EndTime:endTime, 207 | SilenceDuration:silenceDuration, 208 | SpeechRate:speechRate, 209 | EmotionValue:emotionValue, 210 | } 211 | 212 | _ , isPresent := AudioResult[channelId] 213 | if isPresent { 214 | //追加 215 | AudioResult[channelId] = append(AudioResult[channelId] , vresult) 216 | } else { 217 | //初始 218 | AudioResult[channelId] = []*cloud.AliyunAudioRecognitionResult{} 219 | AudioResult[channelId] = append(AudioResult[channelId] , vresult) 220 | } 221 | } , "Result", "Sentences") 222 | if err != nil { 223 | panic(err) 224 | } 225 | } 226 | }) 227 | if err != nil { 228 | panic("查询文件识别结果失败:" + err.Error()) 229 | } 230 | return 231 | } 232 | 233 | 234 | //阿里云录音识别结果集生成字幕文件 235 | func AliyunAudioResultMakeSubtitleFile(video string , AudioResult map[int64][] *cloud.AliyunAudioRecognitionResult) { 236 | subfileDir := path.Dir(video) 237 | subfile := GetFileBaseName(video) 238 | 239 | for _ , result := range AudioResult { 240 | thisfile := subfileDir + "/" + subfile + ".srt" 241 | //输出字幕文件 242 | println("输出文件:" , thisfile) 243 | 244 | file, e := os.Create(thisfile) 245 | if e != nil { 246 | panic(e) 247 | } 248 | 249 | index := 0 250 | for _ , data := range result { 251 | linestr := MakeSubtitleText(index , data.BeginTime , data.EndTime , data.Text) 252 | file.WriteString(linestr) 253 | index++ 254 | } 255 | file.Close() //defer 256 | break 257 | } 258 | } 259 | 260 | 261 | //拼接字幕字符串 262 | func MakeSubtitleText(index int , startTime int64 , endTime int64 , text string) string { 263 | var content bytes.Buffer 264 | content.WriteString(strconv.Itoa(index)) 265 | content.WriteString("\n") 266 | content.WriteString(SubtitleTimeMillisecond(startTime)) 267 | content.WriteString(" --> ") 268 | content.WriteString(SubtitleTimeMillisecond(endTime)) 269 | content.WriteString("\n") 270 | content.WriteString(text) 271 | content.WriteString("\n") 272 | content.WriteString("\n") 273 | return content.String() 274 | } 275 | 276 | //检查是否为空输出 277 | func CheckEmptyResult(AudioResult map[int64][] *cloud.AliyunAudioRecognitionResult) bool { 278 | empty := true 279 | for _,v := range AudioResult { 280 | for range v { 281 | empty = false 282 | break 283 | } 284 | } 285 | return empty 286 | } 287 | -------------------------------------------------------------------------------- /videosrt/config/ini/config.go: -------------------------------------------------------------------------------- 1 | package ini 2 | 3 | import ( 4 | "github.com/Unknwon/goconfig" 5 | "strings" 6 | ) 7 | 8 | //获取值类型 9 | const ( 10 | CFG_BOOL = iota 11 | CFG_FLOAT64 12 | CFG_INT 13 | CFG_INT64 14 | CFG_STRING 15 | ) 16 | 17 | type Cfg struct { 18 | filename string //.ini 配置文件 19 | sep string //key.value连接符 20 | 21 | instance goconfig.ConfigFile //配置文件实例 22 | } 23 | 24 | 25 | //加载配置文件 26 | func LoadConfigFile(file string , sep string) (Cfg , error) { 27 | cfg := Cfg{} 28 | config , err := goconfig.LoadConfigFile(file) 29 | if err != nil { 30 | return cfg , err 31 | } 32 | cfg.filename = file 33 | cfg.sep = sep 34 | cfg.instance = *config 35 | return cfg , nil 36 | } 37 | 38 | //根据键名获取值[返回string][失败返回nil] 39 | func (c Cfg) Get(key string) interface{} { 40 | return c.GetValue(key , CFG_STRING , nil) 41 | } 42 | 43 | //根据键名获取值[必须] 44 | func (c Cfg) GetMust(key string , def string) string { 45 | val := c.GetValue(key , CFG_STRING , def) 46 | 47 | if val , ok := val.(string); !ok { 48 | return def 49 | } else { 50 | return val 51 | } 52 | } 53 | 54 | //根据键名获取值[返回bool][失败返回nil] 55 | func (c Cfg) GetBool(key string) interface{} { 56 | return c.GetValue(key , CFG_BOOL , nil) 57 | } 58 | 59 | //根据键名获取值[必须] 60 | func (c Cfg) GetBoolMust(key string , def bool) bool { 61 | val := c.GetValue(key , CFG_BOOL , def) 62 | 63 | if val , ok := val.(bool); !ok { 64 | return def 65 | } else { 66 | return val 67 | } 68 | } 69 | 70 | //根据键名获取值[返回float64][失败返回nil] 71 | func (c Cfg) GetFloat64(key string) interface{} { 72 | return c.GetValue(key , CFG_FLOAT64 , nil) 73 | } 74 | 75 | //根据键名获取值[必须] 76 | func (c Cfg) GetFloat64Must(key string , def float64) float64 { 77 | val := c.GetValue(key , CFG_FLOAT64 , def) 78 | 79 | if val , ok := val.(float64); !ok { 80 | return def 81 | } else { 82 | return val 83 | } 84 | } 85 | 86 | //根据键名获取值[返回int][失败返回nil] 87 | func (c Cfg) GetInt(key string) interface{} { 88 | return c.GetValue(key , CFG_INT , nil) 89 | } 90 | 91 | //根据键名获取值[必须] 92 | func (c Cfg) GetIntMust(key string , def int) int { 93 | val := c.GetValue(key , CFG_INT , def) 94 | 95 | if val , ok := val.(int); !ok { 96 | return def 97 | } else { 98 | return val 99 | } 100 | } 101 | 102 | //根据键名获取值[返回int64][失败返回nil] 103 | func (c Cfg) GetInt64(key string) interface{} { 104 | return c.GetValue(key , CFG_INT64 , nil) 105 | } 106 | 107 | //根据键名获取值[必须] 108 | func (c Cfg) GetInt64Must(key string , def int64) int64 { 109 | val := c.GetValue(key , CFG_INT64 , def) 110 | 111 | if val , ok := val.(int64); !ok { 112 | return def 113 | } else { 114 | return val 115 | } 116 | } 117 | 118 | //获取配置值 119 | func (c Cfg) GetValue(key string , flag int , def interface{}) interface{} { 120 | keysplit := strings.Split(key , c.sep) 121 | if len(keysplit) < 2 { 122 | return def 123 | } 124 | switch flag { 125 | case CFG_BOOL: 126 | if value , err := c.instance.Bool(keysplit[0] , keysplit[1]);err != nil { 127 | return def 128 | } else { 129 | return value 130 | } 131 | case CFG_FLOAT64: 132 | if value , err := c.instance.Float64(keysplit[0] , keysplit[1]);err != nil { 133 | return def 134 | } else { 135 | return value 136 | } 137 | case CFG_INT: 138 | if value , err := c.instance.Int(keysplit[0] , keysplit[1]);err != nil { 139 | return def 140 | } else { 141 | return value 142 | } 143 | case CFG_INT64: 144 | if value , err := c.instance.Int64(keysplit[0] , keysplit[1]);err != nil { 145 | return def 146 | } else { 147 | return value 148 | } 149 | case CFG_STRING: 150 | if value , err := c.instance.GetValue(keysplit[0] , keysplit[1]);err != nil { 151 | return def 152 | } else { 153 | return value 154 | } 155 | } 156 | return def 157 | } -------------------------------------------------------------------------------- /videosrt/ffmpeg/ffmpeg.go: -------------------------------------------------------------------------------- 1 | package ffmpeg 2 | 3 | import ( 4 | "github.com/pkg/errors" 5 | "os/exec" 6 | ) 7 | 8 | type Ffmpeg struct { 9 | Os string //ffmpeg 文件目录 10 | } 11 | 12 | 13 | //提取视频音频 14 | func ExtractAudio (video string , tmpAudio string) (error) { 15 | ts := exec.Command("ffmpeg" , "-version") 16 | if _ , err := ts.CombinedOutput() ; err != nil { 17 | return errors.New("请先安装 ffmpeg 依赖 ,并设置环境变量") 18 | } 19 | 20 | cmd := exec.Command("ffmpeg" , "-i" , video , "-ar" , "16000" , tmpAudio) 21 | _, err := cmd.CombinedOutput() 22 | if err != nil { 23 | return err 24 | } 25 | return nil 26 | } 27 | -------------------------------------------------------------------------------- /videosrt/mylog/log.go: -------------------------------------------------------------------------------- 1 | package mylog 2 | 3 | import ( 4 | "log" 5 | "os" 6 | ) 7 | 8 | //日志存储文件 9 | const LOGFILE = "log.txt" 10 | 11 | //写入日志 12 | func WriteLog(text ...interface{}) { 13 | if err := checkLogFile(LOGFILE); err != nil { 14 | panic(err.Error()) 15 | return 16 | } 17 | logfile , err := os.OpenFile(LOGFILE , os.O_APPEND , os.ModePerm) 18 | if err != nil { 19 | panic(err.Error()) 20 | return 21 | } 22 | 23 | defer logfile.Close() //关闭 24 | 25 | debugLog := log.New(logfile , "[info]" , log.Llongfile) 26 | debugLog.Println(text) 27 | } 28 | 29 | 30 | //检测日志文件 31 | func checkLogFile (path string) (error) { 32 | file , err := os.Open(path) 33 | if err != nil { 34 | if os.IsNotExist(err) { 35 | newfile , err := os.Create(path) 36 | if err != nil { 37 | return err 38 | } 39 | newfile.Close() 40 | } 41 | } 42 | file.Close() 43 | return nil 44 | } 45 | 46 | -------------------------------------------------------------------------------- /videosrt/tool.go: -------------------------------------------------------------------------------- 1 | package videosrt 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "math/rand" 7 | "os" 8 | "path" 9 | "strconv" 10 | "strings" 11 | "time" 12 | ) 13 | 14 | //日志输出 15 | func Log(agrs ...interface{}) { 16 | fmt.Println(agrs ...) 17 | } 18 | 19 | //Windows下Dir路径转换 20 | func WinDir(dir string) string { 21 | return strings.Replace(dir , "\\" , "/" , -1) 22 | } 23 | 24 | //获取文件名称(不带后缀) 25 | func GetFileBaseName(filepath string) string { 26 | basefile := path.Base(filepath) 27 | ext := path.Ext(filepath) 28 | 29 | return strings.Replace(basefile , ext , "" , 1) 30 | } 31 | 32 | //检验目录是否存在 33 | func DirExists(path string) bool { 34 | _, err := os.Stat(path) 35 | if err == nil { 36 | return true 37 | } 38 | if os.IsNotExist(err) { 39 | return false 40 | } 41 | return false 42 | } 43 | 44 | 45 | //创建目录 46 | func CreateDir(path string , all bool) error { 47 | var err error 48 | if all { 49 | err = os.MkdirAll(path, os.ModePerm) 50 | } else { 51 | err = os.Mkdir(path, os.ModePerm) 52 | } 53 | if err != nil { 54 | return err 55 | } 56 | return nil 57 | } 58 | 59 | //获取随机字符串 60 | func GetRandomCodeString(len int) string { 61 | rand.Seed(time.Now().Unix()) //设置随机种子 62 | 63 | seed := "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" 64 | seedArr := strings.Split(seed , "") 65 | 66 | result := []string{} 67 | index := 0 68 | for index < len { 69 | s := GetIntRandomNumber(0 , 61) 70 | result = append(result , seedArr[s]) 71 | 72 | index++ 73 | } 74 | 75 | return strings.Join(result , "") 76 | } 77 | 78 | 79 | //获取某范围的随机整数 80 | func GetIntRandomNumber(min int64 , max int64) int64 { 81 | return rand.Int63n(max - min) + min 82 | } 83 | 84 | 85 | //字幕时间戳转换 86 | func SubtitleTimeMillisecond(time int64) string { 87 | var miao int64 = 0 88 | var min int64 = 0 89 | var hours int64 = 0 90 | var millisecond int64 = 0 91 | 92 | millisecond = (time % 1000) 93 | miao = (time / 1000) 94 | 95 | if miao > 59 { 96 | min = (time / 1000) / 60 97 | miao = miao % 60 98 | } 99 | if min > 59 { 100 | hours = (time / 1000) / 3600 101 | min = min % 60 102 | } 103 | 104 | //00:00:06,770 105 | var miaoText = RepeatStr(strconv.FormatInt(miao , 10) , "0" , 2 , true) 106 | var minText = RepeatStr(strconv.FormatInt(min , 10) , "0" , 2 , true) 107 | var hoursText = RepeatStr(strconv.FormatInt(hours , 10) , "0" , 2 , true) 108 | var millisecondText = RepeatStr(strconv.FormatInt(millisecond , 10) , "0" , 3 , true) 109 | 110 | return hoursText + ":" + minText + ":" + miaoText + "," + millisecondText 111 | } 112 | 113 | 114 | func RepeatStr(str string , s string , length int , before bool) string { 115 | ln := len(str) 116 | 117 | if ln >= length { 118 | return str 119 | } 120 | 121 | if before { 122 | return strings.Repeat(s , (length - ln)) + str 123 | } else { 124 | return str + strings.Repeat(s , (length - ln)) 125 | } 126 | } 127 | 128 | 129 | //打印对象转JSON数据 130 | func DumpObjectToJson(data interface{} , title string) { 131 | if data != nil { 132 | jsonData , _ := json.Marshal(data) 133 | fmt.Println(title , string(jsonData)) 134 | } 135 | } -------------------------------------------------------------------------------- /videosrt/tool/chinese_simple.go: -------------------------------------------------------------------------------- 1 | package tool 2 | 3 | import ( 4 | "regexp" 5 | "strings" 6 | "unicode/utf8" 7 | ) 8 | 9 | 10 | func CheckChineseNumber(s string) bool { 11 | regx := regexp.MustCompile("(.*)([一|二|两|三|四|五|六|七|八|九|十|百|千|万|亿]+)(.*)") 12 | return regx.MatchString(s) 13 | } 14 | 15 | func ChineseNumberToLowercaseLength(s string) int { 16 | st := GetStringUtf8Length(s) 17 | regx := regexp.MustCompile("([一|二|两|三|四|五|六|七|八|九|十|百|千|万|亿]+)(.*)") 18 | s = regx.ReplaceAllString(s , "$1") 19 | if s == "" || !IsChineseNumber(s) { 20 | return st 21 | } 22 | rst := GetStringUtf8Length(s) 23 | cha_t := 0 24 | if st > rst { 25 | cha_t = st - rst 26 | } 27 | s = strings.TrimSpace(s) 28 | zhTexts := strings.Split(s , "") 29 | zhTextsLens := len(zhTexts) 30 | 31 | numberPosi := true 32 | maxBaseNumber := 1 33 | 34 | if s == "十" { 35 | maxBaseNumber = 2 36 | } else { 37 | for i:=0; i 6 { 114 | return false 115 | } 116 | regx := regexp.MustCompile(`^(\\|\{|\}|\[|\]|(|)|\(|\)|\*|/|~|<|>|_|\-|\+|=|&|%|\$|@|#|—|」|「|!|,|。|。|‍|、|?|;|:|‘|’|”|“|"|'|,|\.|\?|;|:|!|\s)+$`) 117 | if regx.Match([]byte(s)) { 118 | return true 119 | } else { 120 | return false 121 | } 122 | } -------------------------------------------------------------------------------- /videosrt/video.go: -------------------------------------------------------------------------------- 1 | package videosrt 2 | 3 | import ( 4 | "os" 5 | ) 6 | 7 | //校验文件是否存在 8 | func VaildVideo (video string) bool { 9 | _, err := os.Stat(video) //os.Stat获取文件信息 10 | if err != nil { 11 | if os.IsExist(err) { 12 | return true 13 | } 14 | return false 15 | } 16 | return true 17 | } 18 | --------------------------------------------------------------------------------